Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
b6c6f4f9
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b6c6f4f9
编写于
9月 23, 2022
作者:
Y
YuanRisheng
提交者:
GitHub
9月 23, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
move selected_rows_functor (#46373)
上级
4dd5bf79
变更
35
展开全部
隐藏空白更改
内联
并排
Showing
35 changed file
with
331 addition
and
359 deletion
+331
-359
paddle/fluid/distributed/ps/service/communicator/communicator.h
.../fluid/distributed/ps/service/communicator/communicator.h
+3
-4
paddle/fluid/imperative/gradient_accumulator.cc
paddle/fluid/imperative/gradient_accumulator.cc
+25
-28
paddle/fluid/operators/clip_by_norm_op.h
paddle/fluid/operators/clip_by_norm_op.h
+1
-1
paddle/fluid/operators/math/CMakeLists.txt
paddle/fluid/operators/math/CMakeLists.txt
+0
-14
paddle/fluid/operators/math/selected_rows_functor_test.cc
paddle/fluid/operators/math/selected_rows_functor_test.cc
+12
-19
paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
+6
-9
paddle/fluid/operators/optimizers/adagrad_op.cc
paddle/fluid/operators/optimizers/adagrad_op.cc
+1
-1
paddle/fluid/operators/optimizers/adam_op_functor.h
paddle/fluid/operators/optimizers/adam_op_functor.h
+2
-2
paddle/fluid/operators/optimizers/ftrl_op.h
paddle/fluid/operators/optimizers/ftrl_op.h
+2
-2
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+0
-1
paddle/phi/kernels/cpu/adagrad_kernel.cc
paddle/phi/kernels/cpu/adagrad_kernel.cc
+3
-4
paddle/phi/kernels/cpu/add_n_kernel.cc
paddle/phi/kernels/cpu/add_n_kernel.cc
+1
-1
paddle/phi/kernels/funcs/CMakeLists.txt
paddle/phi/kernels/funcs/CMakeLists.txt
+14
-0
paddle/phi/kernels/funcs/lamb_functors.h
paddle/phi/kernels/funcs/lamb_functors.h
+2
-2
paddle/phi/kernels/funcs/selected_rows_functor.cc
paddle/phi/kernels/funcs/selected_rows_functor.cc
+130
-132
paddle/phi/kernels/funcs/selected_rows_functor.cu
paddle/phi/kernels/funcs/selected_rows_functor.cu
+92
-94
paddle/phi/kernels/funcs/selected_rows_functor.h
paddle/phi/kernels/funcs/selected_rows_functor.h
+10
-13
paddle/phi/kernels/gpu/adagrad_kernel.cu
paddle/phi/kernels/gpu/adagrad_kernel.cu
+3
-4
paddle/phi/kernels/gpu/adam_kernel.cu
paddle/phi/kernels/gpu/adam_kernel.cu
+0
-1
paddle/phi/kernels/gpu/adamw_kernel.cu
paddle/phi/kernels/gpu/adamw_kernel.cu
+1
-1
paddle/phi/kernels/impl/adagrad_kernel_impl.h
paddle/phi/kernels/impl/adagrad_kernel_impl.h
+0
-1
paddle/phi/kernels/impl/add_n_kernel_impl.h
paddle/phi/kernels/impl/add_n_kernel_impl.h
+1
-1
paddle/phi/kernels/impl/clip_kernel_impl.h
paddle/phi/kernels/impl/clip_kernel_impl.h
+0
-1
paddle/phi/kernels/impl/momentum_kernel_impl.h
paddle/phi/kernels/impl/momentum_kernel_impl.h
+2
-2
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/clip_kernel.h
paddle/phi/kernels/selected_rows/clip_kernel.h
+0
-1
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
+2
-2
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
+2
-2
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
+2
-2
paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
...phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
+2
-2
paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
...e/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
+2
-2
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
+2
-2
未找到文件。
paddle/fluid/distributed/ps/service/communicator/communicator.h
浏览文件 @
b6c6f4f9
...
@@ -37,13 +37,13 @@ limitations under the License. */
...
@@ -37,13 +37,13 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/split.h"
#include "paddle/fluid/string/split.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
paddle
{
namespace
distributed
{
namespace
distributed
{
...
@@ -212,11 +212,10 @@ inline void MergeVars(const std::string &var_name,
...
@@ -212,11 +212,10 @@ inline void MergeVars(const std::string &var_name,
}
}
phi
::
CPUContext
dev_ctx
;
phi
::
CPUContext
dev_ctx
;
if
(
merge_add
)
{
if
(
merge_add
)
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_add
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_add
;
merge_add
(
dev_ctx
,
inputs
,
out_slr
);
merge_add
(
dev_ctx
,
inputs
,
out_slr
);
}
else
{
}
else
{
paddle
::
operators
::
math
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
T
>
phi
::
funcs
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
T
>
merge_average
;
merge_average
;
merge_average
(
dev_ctx
,
inputs
,
out_slr
);
merge_average
(
dev_ctx
,
inputs
,
out_slr
);
}
}
...
...
paddle/fluid/imperative/gradient_accumulator.cc
浏览文件 @
b6c6f4f9
...
@@ -22,7 +22,6 @@
...
@@ -22,7 +22,6 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
...
@@ -30,6 +29,7 @@
...
@@ -30,6 +29,7 @@
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#ifdef PADDLE_WITH_XPU
#ifdef PADDLE_WITH_XPU
#include "xpu/refactor/math.h"
#include "xpu/refactor/math.h"
#endif
#endif
...
@@ -354,15 +354,14 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
...
@@ -354,15 +354,14 @@ void SelectedRowsAddToTensor(const VarType& src, VarType* dst) {
framework
::
TransToProtoVarType
(
src_selected_rows
.
value
().
dtype
());
framework
::
TransToProtoVarType
(
src_selected_rows
.
value
().
dtype
());
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
#define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type) \
#define PADDLE_SELECTED_ROWS_ADD_TO_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
paddle::operators::math::SelectedRowsAddToTensor<dev_ctx_type, cpp_type> \
phi::funcs::SelectedRowsAddToTensor<dev_ctx_type, cpp_type> functor; \
functor; \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
src_selected_rows, \
dst_tensor); \
dst_tensor); \
return; \
return; \
}
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
@@ -406,15 +405,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
...
@@ -406,15 +405,14 @@ void SelectedRowsAddTensor(const VarType& src_selected_rows_var,
dst_tensor
->
Resize
(
src_tensor
.
dims
());
dst_tensor
->
Resize
(
src_tensor
.
dims
());
dst_tensor
->
mutable_data
(
place
,
src_tensor
.
dtype
());
dst_tensor
->
mutable_data
(
place
,
src_tensor
.
dtype
());
#define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \
#define PADDLE_SELECTED_ROWS_ADD_TENSOR(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::operators::math::SelectedRowsAddTensor<dev_ctx_type, cpp_type> \
phi::funcs::SelectedRowsAddTensor<dev_ctx_type, cpp_type> functor; \
functor; \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
functor(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
src_selected_rows, \
src_tensor, \
src_tensor, \
dst_tensor); \
dst_tensor); \
return; \
return; \
}
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
@@ -469,15 +467,14 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
...
@@ -469,15 +467,14 @@ std::shared_ptr<ReturnVarType> SelectedRowsMerge(const VarType& src1,
phi
::
SelectedRows
*
dst_selected_rows
=
phi
::
SelectedRows
*
dst_selected_rows
=
GetEmptyInnerTensor
<
phi
::
SelectedRows
>
(
dst_var
.
get
());
GetEmptyInnerTensor
<
phi
::
SelectedRows
>
(
dst_var
.
get
());
#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type) \
#define PADDLE_SELECTED_ROWS_ADD(dev_ctx_type, cpp_type) \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
if (data_type == framework::DataTypeTrait<cpp_type>::DataType()) { \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
paddle::platform::DeviceContext* dev_ctx = pool.Get(place); \
paddle::operators::math::scatter::MergeAdd<dev_ctx_type, cpp_type> \
phi::funcs::scatter::MergeAdd<dev_ctx_type, cpp_type> merge_add; \
merge_add; \
merge_add(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
merge_add(*(dynamic_cast<dev_ctx_type*>(dev_ctx)), \
src_selected_rows, \
src_selected_rows, \
dst_selected_rows); \
dst_selected_rows); \
return dst_var; \
return dst_var; \
}
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
...
...
paddle/fluid/operators/clip_by_norm_op.h
浏览文件 @
b6c6f4f9
...
@@ -17,8 +17,8 @@ limitations under the License. */
...
@@ -17,8 +17,8 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/fluid/operators/math/CMakeLists.txt
浏览文件 @
b6c6f4f9
...
@@ -31,20 +31,6 @@ math_library(sampler DEPS generator)
...
@@ -31,20 +31,6 @@ math_library(sampler DEPS generator)
# math_library(math_function DEPS blas dense_tensor tensor)
# math_library(math_function DEPS blas dense_tensor tensor)
math_library
(
maxouting
)
math_library
(
maxouting
)
if
(
WITH_MKLDNN
)
math_library
(
selected_rows_functor
DEPS
selected_rows_utils
math_function
blas
mkldnn_axpy_handler
mixed_vector
)
else
()
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function
blas mixed_vector
)
endif
()
math_library
(
sequence_padding
)
math_library
(
sequence_padding
)
math_library
(
sequence_pooling DEPS math_function jit_kernel_helper
)
math_library
(
sequence_pooling DEPS math_function jit_kernel_helper
)
math_library
(
sequence_scale
)
math_library
(
sequence_scale
)
...
...
paddle/fluid/operators/math/selected_rows_functor_test.cc
浏览文件 @
b6c6f4f9
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
@@ -48,7 +48,7 @@ TEST(selected_rows_functor, cpu_add) {
...
@@ -48,7 +48,7 @@ TEST(selected_rows_functor, cpu_add) {
// simplely concat two SelectedRows
// simplely concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
p
addle
::
operators
::
math
::
SelectedRowsAdd
<
phi
::
CPUContext
,
float
>
add_functor
;
p
hi
::
funcs
::
SelectedRowsAdd
<
phi
::
CPUContext
,
float
>
add_functor
;
add_functor
(
ctx
,
*
selected_rows1
,
*
selected_rows2
,
output
.
get
());
add_functor
(
ctx
,
*
selected_rows1
,
*
selected_rows2
,
output
.
get
());
auto
out_height
=
output
->
height
();
auto
out_height
=
output
->
height
();
...
@@ -88,8 +88,7 @@ TEST(selected_rows_functor, cpu_add) {
...
@@ -88,8 +88,7 @@ TEST(selected_rows_functor, cpu_add) {
new
paddle
::
framework
::
Tensor
()};
new
paddle
::
framework
::
Tensor
()};
tensor2
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
tensor2
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTensor
<
phi
::
CPUContext
,
float
>
phi
::
funcs
::
SelectedRowsAddTensor
<
phi
::
CPUContext
,
float
>
add_tensor_functor
;
add_tensor_functor
;
add_tensor_functor
(
ctx
,
*
output
,
*
tensor1
,
tensor2
.
get
());
add_tensor_functor
(
ctx
,
*
output
,
*
tensor1
,
tensor2
.
get
());
auto
*
tensor2_data
=
tensor2
->
data
<
float
>
();
auto
*
tensor2_data
=
tensor2
->
data
<
float
>
();
...
@@ -141,8 +140,7 @@ TEST(selected_rows_functor, cpu_add_to) {
...
@@ -141,8 +140,7 @@ TEST(selected_rows_functor, cpu_add_to) {
// simplely concat two SelectedRows
// simplely concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTo
<
phi
::
CPUContext
,
float
>
phi
::
funcs
::
SelectedRowsAddTo
<
phi
::
CPUContext
,
float
>
add_to_functor
;
add_to_functor
;
add_to_functor
(
ctx
,
*
selected_rows1
,
0
,
output
.
get
());
add_to_functor
(
ctx
,
*
selected_rows1
,
0
,
output
.
get
());
add_to_functor
(
ctx
,
*
selected_rows2
,
in1_value
->
numel
(),
output
.
get
());
add_to_functor
(
ctx
,
*
selected_rows2
,
in1_value
->
numel
(),
output
.
get
());
...
@@ -179,7 +177,7 @@ TEST(selected_rows_functor, cpu_add_to) {
...
@@ -179,7 +177,7 @@ TEST(selected_rows_functor, cpu_add_to) {
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
add_to_tensor_functor
;
add_to_tensor_functor
;
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
...
@@ -216,7 +214,7 @@ TEST(selected_rows_functor, cpu_merge_average_float) {
...
@@ -216,7 +214,7 @@ TEST(selected_rows_functor, cpu_merge_average_float) {
cpu_place
);
cpu_place
);
functor
(
ctx
,
in_value
,
1.0
);
functor
(
ctx
,
in_value
,
1.0
);
p
addle
::
operators
::
math
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
float
>
p
hi
::
funcs
::
scatter
::
MergeAverage
<
phi
::
CPUContext
,
float
>
merge_average_functor
;
merge_average_functor
;
phi
::
SelectedRows
output
=
merge_average_functor
(
ctx
,
*
selected_rows
);
phi
::
SelectedRows
output
=
merge_average_functor
(
ctx
,
*
selected_rows
);
...
@@ -253,8 +251,7 @@ TEST(selected_rows_functor, cpu_merge_add_float) {
...
@@ -253,8 +251,7 @@ TEST(selected_rows_functor, cpu_merge_add_float) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
merge_add_functor
;
merge_add_functor
(
ctx
,
*
selected_rows
,
output
.
get
());
merge_add_functor
(
ctx
,
*
selected_rows
,
output
.
get
());
auto
out_height
=
output
->
height
();
auto
out_height
=
output
->
height
();
...
@@ -290,8 +287,7 @@ TEST(selected_rows_functor, cpu_merge_add_int) {
...
@@ -290,8 +287,7 @@ TEST(selected_rows_functor, cpu_merge_add_int) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
int
>
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
int
>
merge_add_functor
;
merge_add_functor
;
merge_add_functor
(
ctx
,
*
selected_rows
,
output
.
get
());
merge_add_functor
(
ctx
,
*
selected_rows
,
output
.
get
());
auto
out_height
=
output
->
height
();
auto
out_height
=
output
->
height
();
...
@@ -337,8 +333,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi) {
...
@@ -337,8 +333,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
output
->
set_height
(
height
);
output
->
set_height
(
height
);
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
merge_add_functor
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
selected_rows1
.
get
());
inputs
.
push_back
(
selected_rows1
.
get
());
...
@@ -387,8 +382,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
...
@@ -387,8 +382,7 @@ TEST(selected_rows_functor, cpu_merge_add_multi_noduplicated) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
output
->
set_height
(
height
);
output
->
set_height
(
height
);
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
float
>
merge_add_functor
;
merge_add_functor
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
selected_rows1
.
get
());
inputs
.
push_back
(
selected_rows1
.
get
());
...
@@ -444,8 +438,7 @@ TEST(selected_rows_functor, cpu_sum_to) {
...
@@ -444,8 +438,7 @@ TEST(selected_rows_functor, cpu_sum_to) {
auto
*
out_value
=
output
->
mutable_value
();
auto
*
out_value
=
output
->
mutable_value
();
// simplely concat two SelectedRows
// simplely concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
cpu_place
);
paddle
::
operators
::
math
::
SelectedRowsSumTo
<
phi
::
CPUContext
,
float
>
phi
::
funcs
::
SelectedRowsSumTo
<
phi
::
CPUContext
,
float
>
sum_to_functor
;
sum_to_functor
;
sum_to_functor
(
ctx
,
sum_to_functor
(
ctx
,
std
::
vector
<
phi
::
SelectedRows
*>
(
std
::
vector
<
phi
::
SelectedRows
*>
(
{
selected_rows1
.
get
(),
selected_rows2
.
get
()}),
{
selected_rows1
.
get
(),
selected_rows2
.
get
()}),
...
@@ -479,7 +472,7 @@ TEST(selected_rows_functor, cpu_sum_to) {
...
@@ -479,7 +472,7 @@ TEST(selected_rows_functor, cpu_sum_to) {
new
paddle
::
framework
::
Tensor
()};
new
paddle
::
framework
::
Tensor
()};
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
cpu_place
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
float
>
add_to_tensor_functor
;
add_to_tensor_functor
;
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
auto
*
tensor1_data
=
tensor1
->
data
<
float
>
();
auto
*
tensor1_data
=
tensor1
->
data
<
float
>
();
...
...
paddle/fluid/operators/math/selected_rows_functor_test.cu.cc
浏览文件 @
b6c6f4f9
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
@@ -61,7 +61,7 @@ TEST(selected_rows_functor, gpu_add) {
...
@@ -61,7 +61,7 @@ TEST(selected_rows_functor, gpu_add) {
// simply concat two SelectedRows
// simply concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
gpu_place
);
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
gpu_place
);
p
addle
::
operators
::
math
::
SelectedRowsAdd
<
phi
::
GPUContext
,
float
>
add_functor
;
p
hi
::
funcs
::
SelectedRowsAdd
<
phi
::
GPUContext
,
float
>
add_functor
;
add_functor
(
ctx
,
*
selected_rows1
,
*
selected_rows2
,
output
.
get
());
add_functor
(
ctx
,
*
selected_rows1
,
*
selected_rows2
,
output
.
get
());
auto
out_height
=
output
->
height
();
auto
out_height
=
output
->
height
();
...
@@ -105,8 +105,7 @@ TEST(selected_rows_functor, gpu_add) {
...
@@ -105,8 +105,7 @@ TEST(selected_rows_functor, gpu_add) {
new
paddle
::
framework
::
Tensor
()};
new
paddle
::
framework
::
Tensor
()};
tensor2
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
gpu_place
);
tensor2
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
gpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTensor
<
phi
::
GPUContext
,
float
>
phi
::
funcs
::
SelectedRowsAddTensor
<
phi
::
GPUContext
,
float
>
add_tensor_functor
;
add_tensor_functor
;
add_tensor_functor
(
ctx
,
*
output
,
*
tensor1
,
tensor2
.
get
());
add_tensor_functor
(
ctx
,
*
output
,
*
tensor1
,
tensor2
.
get
());
paddle
::
framework
::
Tensor
tensor2_cpu
;
paddle
::
framework
::
Tensor
tensor2_cpu
;
...
@@ -164,8 +163,7 @@ TEST(selected_rows_functor, gpu_add_to) {
...
@@ -164,8 +163,7 @@ TEST(selected_rows_functor, gpu_add_to) {
// simply concat two SelectedRows
// simply concat two SelectedRows
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
gpu_place
);
out_value
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
7
,
10
}),
gpu_place
);
paddle
::
operators
::
math
::
SelectedRowsAddTo
<
phi
::
GPUContext
,
float
>
phi
::
funcs
::
SelectedRowsAddTo
<
phi
::
GPUContext
,
float
>
add_to_functor
;
add_to_functor
;
add_to_functor
(
ctx
,
*
selected_rows1
,
0
,
output
.
get
());
add_to_functor
(
ctx
,
*
selected_rows1
,
0
,
output
.
get
());
add_to_functor
(
ctx
,
*
selected_rows2
,
in1_value
->
numel
(),
output
.
get
());
add_to_functor
(
ctx
,
*
selected_rows2
,
in1_value
->
numel
(),
output
.
get
());
...
@@ -206,7 +204,7 @@ TEST(selected_rows_functor, gpu_add_to) {
...
@@ -206,7 +204,7 @@ TEST(selected_rows_functor, gpu_add_to) {
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
gpu_place
);
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
height
,
row_numel
}),
gpu_place
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
functor
(
ctx
,
tensor1
.
get
(),
3.0
);
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
float
>
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
float
>
add_to_tensor_functor
;
add_to_tensor_functor
;
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
add_to_tensor_functor
(
ctx
,
*
output
,
tensor1
.
get
());
...
@@ -261,8 +259,7 @@ TEST(selected_rows_functor, gpu_merge_add) {
...
@@ -261,8 +259,7 @@ TEST(selected_rows_functor, gpu_merge_add) {
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
std
::
unique_ptr
<
phi
::
SelectedRows
>
output
{
new
phi
::
SelectedRows
()};
output
->
set_height
(
height
);
output
->
set_height
(
height
);
paddle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
float
>
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
float
>
merge_add_functor
;
merge_add_functor
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
std
::
vector
<
const
phi
::
SelectedRows
*>
inputs
;
inputs
.
push_back
(
selected_rows1
.
get
());
inputs
.
push_back
(
selected_rows1
.
get
());
...
...
paddle/fluid/operators/optimizers/adagrad_op.cc
浏览文件 @
b6c6f4f9
...
@@ -17,10 +17,10 @@ limitations under the License. */
...
@@ -17,10 +17,10 @@ limitations under the License. */
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/core/infermeta_utils.h"
#include "paddle/phi/infermeta/multiary.h"
#include "paddle/phi/infermeta/multiary.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/fluid/operators/optimizers/adam_op_functor.h
浏览文件 @
b6c6f4f9
...
@@ -16,12 +16,12 @@
...
@@ -16,12 +16,12 @@
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
namespace
scatter
=
p
addle
::
operators
::
math
::
scatter
;
namespace
scatter
=
p
hi
::
funcs
::
scatter
;
static
inline
float
GetAttrFromTensor
(
const
framework
::
Tensor
*
tensor
)
{
static
inline
float
GetAttrFromTensor
(
const
framework
::
Tensor
*
tensor
)
{
const
float
*
tensor_data
=
tensor
->
data
<
float
>
();
const
float
*
tensor_data
=
tensor
->
data
<
float
>
();
...
...
paddle/fluid/operators/optimizers/ftrl_op.h
浏览文件 @
b6c6f4f9
...
@@ -15,8 +15,8 @@ limitations under the License. */
...
@@ -15,8 +15,8 @@ limitations under the License. */
#pragma once
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -193,7 +193,7 @@ class FTRLOpKernel : public framework::OpKernel<T> {
...
@@ -193,7 +193,7 @@ class FTRLOpKernel : public framework::OpKernel<T> {
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
math
::
scatter
::
MergeAdd
<
DeviceContext
,
T
>
merge_func
;
phi
::
funcs
::
scatter
::
MergeAdd
<
DeviceContext
,
T
>
merge_func
;
merge_func
(
merge_func
(
ctx
.
template
device_context
<
DeviceContext
>(),
*
grad
,
merged_grad
);
ctx
.
template
device_context
<
DeviceContext
>(),
*
grad
,
merged_grad
);
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
b6c6f4f9
...
@@ -19,7 +19,6 @@ limitations under the License. */
...
@@ -19,7 +19,6 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/amp/fp16_type_traits.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
...
...
paddle/phi/kernels/cpu/adagrad_kernel.cc
浏览文件 @
b6c6f4f9
...
@@ -14,10 +14,10 @@
...
@@ -14,10 +14,10 @@
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
#include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
namespace
phi
{
namespace
phi
{
...
@@ -38,7 +38,7 @@ struct SparseAdagradFunctor<phi::CPUContext, T> {
...
@@ -38,7 +38,7 @@ struct SparseAdagradFunctor<phi::CPUContext, T> {
DenseTensor
*
param
)
{
DenseTensor
*
param
)
{
// 1. g_m.rows = set(g.rows)
// 1. g_m.rows = set(g.rows)
auto
grad_width
=
grad
.
value
().
dims
()[
1
];
auto
grad_width
=
grad
.
value
().
dims
()[
1
];
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
CPUContext
,
T
>
merge_func
;
auto
grad_merge
=
merge_func
(
context
,
grad
);
auto
grad_merge
=
merge_func
(
context
,
grad
);
auto
&
merge_rows
=
grad_merge
.
rows
();
auto
&
merge_rows
=
grad_merge
.
rows
();
auto
*
grad_merge_data
=
grad_merge
.
mutable_value
()
->
template
data
<
T
>();
auto
*
grad_merge_data
=
grad_merge
.
mutable_value
()
->
template
data
<
T
>();
...
@@ -47,8 +47,7 @@ struct SparseAdagradFunctor<phi::CPUContext, T> {
...
@@ -47,8 +47,7 @@ struct SparseAdagradFunctor<phi::CPUContext, T> {
auto
grad_square
=
auto
grad_square
=
SquareSelectedRows
<
phi
::
CPUContext
,
T
>
(
context
,
grad_merge
);
SquareSelectedRows
<
phi
::
CPUContext
,
T
>
(
context
,
grad_merge
);
paddle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
T
>
phi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
CPUContext
,
T
>
functor
;
functor
;
functor
(
context
,
grad_square
,
moment
);
functor
(
context
,
grad_square
,
moment
);
// 3. update parameter
// 3. update parameter
...
...
paddle/phi/kernels/cpu/add_n_kernel.cc
浏览文件 @
b6c6f4f9
...
@@ -53,7 +53,7 @@ void AddNKernel(const Context& dev_ctx,
...
@@ -53,7 +53,7 @@ void AddNKernel(const Context& dev_ctx,
}
}
}
}
p
addle
::
operators
::
math
::
SelectedRowsAddToTensor
<
Context
,
T
>
functor
;
p
hi
::
funcs
::
SelectedRowsAddToTensor
<
Context
,
T
>
functor
;
// If in_place, just skip the first tensor
// If in_place, just skip the first tensor
for
(
size_t
i
=
start
;
i
<
in_num
;
i
++
)
{
for
(
size_t
i
=
start
;
i
<
in_num
;
i
++
)
{
if
(
DenseTensor
::
classof
(
x
[
i
]))
{
if
(
DenseTensor
::
classof
(
x
[
i
]))
{
...
...
paddle/phi/kernels/funcs/CMakeLists.txt
浏览文件 @
b6c6f4f9
...
@@ -38,3 +38,17 @@ else()
...
@@ -38,3 +38,17 @@ else()
math_library
(
fft DEPS dense_tensor pocketfft
)
math_library
(
fft DEPS dense_tensor pocketfft
)
endif
()
endif
()
endif
()
endif
()
if
(
WITH_MKLDNN
)
math_library
(
selected_rows_functor
DEPS
selected_rows_utils
math_function
blas
mkldnn_axpy_handler
mixed_vector
)
else
()
math_library
(
selected_rows_functor DEPS selected_rows_utils math_function
blas mixed_vector
)
endif
()
paddle/phi/kernels/funcs/lamb_functors.h
浏览文件 @
b6c6f4f9
...
@@ -19,19 +19,19 @@ limitations under the License. */
...
@@ -19,19 +19,19 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "paddle/fluid/memory/buffer.h"
#include "paddle/fluid/memory/buffer.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/extensions.h"
#include "paddle/phi/kernels/funcs/eigen/extensions.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/funcs/squared_l2_norm.h"
#include "paddle/phi/kernels/funcs/squared_l2_norm.h"
#include "paddle/phi/kernels/funcs/tensor_to_string.h"
#include "paddle/phi/kernels/funcs/tensor_to_string.h"
namespace
phi
{
namespace
phi
{
namespace
scatter
=
p
addle
::
operators
::
math
::
scatter
;
namespace
scatter
=
p
hi
::
funcs
::
scatter
;
template
<
typename
T
,
bool
IsMultiPrecision
>
template
<
typename
T
,
bool
IsMultiPrecision
>
struct
LambMomentREGUpdateFunctor
{
struct
LambMomentREGUpdateFunctor
{
...
...
paddle/
fluid/operators/math
/selected_rows_functor.cc
→
paddle/
phi/kernels/funcs
/selected_rows_functor.cc
浏览文件 @
b6c6f4f9
此差异已折叠。
点击以展开。
paddle/
fluid/operators/math
/selected_rows_functor.cu
→
paddle/
phi/kernels/funcs
/selected_rows_functor.cu
浏览文件 @
b6c6f4f9
...
@@ -15,15 +15,14 @@ limitations under the License. */
...
@@ -15,15 +15,14 @@ limitations under the License. */
#include <set>
#include <set>
#include <vector>
#include <vector>
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/float16.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
paddle
{
namespace
phi
{
namespace
operators
{
namespace
funcs
{
namespace
math
{
template
<
typename
T
>
template
<
typename
T
>
struct
SelectedRowsAdd
<
phi
::
GPUContext
,
T
>
{
struct
SelectedRowsAdd
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
void
operator
()(
const
phi
::
GPUContext
&
context
,
...
@@ -34,14 +33,14 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
...
@@ -34,14 +33,14 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_height
,
in1_height
,
input2
.
height
(),
input2
.
height
(),
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"But received first input height = "
"[%d], second input height = [%d]"
,
"[%d], second input height = [%d]"
,
in1_height
,
in1_height
,
input2
.
height
()));
input2
.
height
()));
output
->
set_height
(
in1_height
);
output
->
set_height
(
in1_height
);
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
());
paddle
::
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
());
auto
&
in2_rows
=
input2
.
rows
();
auto
&
in2_rows
=
input2
.
rows
();
std
::
vector
<
int64_t
>
out_rows
;
std
::
vector
<
int64_t
>
out_rows
;
out_rows
.
reserve
(
in1_rows
.
size
()
+
in2_rows
.
size
());
out_rows
.
reserve
(
in1_rows
.
size
()
+
in2_rows
.
size
());
...
@@ -59,7 +58,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
...
@@ -59,7 +58,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
in1_row_numel
,
in2_value
.
numel
()
/
in2_rows
.
size
(),
in2_value
.
numel
()
/
in2_rows
.
size
(),
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
in1_row_numel
,
...
@@ -67,7 +66,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
...
@@ -67,7 +66,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
in1_row_numel
,
out_value
->
numel
()
/
out_rows
.
size
(),
out_value
->
numel
()
/
out_rows
.
size
(),
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and oupput width must be equal."
"The input and oupput width must be equal."
"But received input width = [%d], output width = [%d]"
,
"But received input width = [%d], output width = [%d]"
,
in1_row_numel
,
in1_row_numel
,
...
@@ -77,35 +76,35 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
...
@@ -77,35 +76,35 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
in1_place
=
input1
.
place
();
auto
in1_place
=
input1
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in1_place
),
true
,
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
"The running environment is not on the GPU place."
));
auto
in2_place
=
input2
.
place
();
auto
in2_place
=
input2
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in2_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in2_place
),
true
,
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
"The running environment is not on the GPU place."
));
auto
out_place
=
context
.
GetPlace
();
auto
out_place
=
context
.
GetPlace
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
out_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
out_place
),
true
,
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
"The running environment is not on the GPU place."
));
memory
::
Copy
(
out_place
,
paddle
::
memory
::
Copy
(
out_place
,
out_data
,
out_data
,
in1_place
,
in1_place
,
in1_data
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
context
.
stream
());
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
auto
*
in2_data
=
in2_value
.
data
<
T
>
();
memory
::
Copy
(
out_place
,
paddle
::
memory
::
Copy
(
out_place
,
out_data
+
in1_value
.
numel
(),
out_data
+
in1_value
.
numel
(),
in2_place
,
in2_place
,
in2_data
,
in2_data
,
in2_value
.
numel
()
*
sizeof
(
T
),
in2_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
context
.
stream
());
}
}
};
};
...
@@ -137,15 +136,15 @@ template <typename T>
...
@@ -137,15 +136,15 @@ template <typename T>
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
T
>
{
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
const
phi
::
SelectedRows
&
input1
,
const
framework
::
Tensor
&
input2
,
const
phi
::
Dense
Tensor
&
input2
,
framework
::
Tensor
*
output
)
{
phi
::
Dense
Tensor
*
output
)
{
auto
in1_height
=
input1
.
height
();
auto
in1_height
=
input1
.
height
();
auto
in2_dims
=
input2
.
dims
();
auto
in2_dims
=
input2
.
dims
();
auto
out_dims
=
output
->
dims
();
auto
out_dims
=
output
->
dims
();
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_height
,
in1_height
,
in2_dims
[
0
],
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"The two inputs height must be equal."
"But received first input height = [%d], first input height = [%d]"
,
"But received first input height = [%d], first input height = [%d]"
,
in1_height
,
in1_height
,
...
@@ -153,7 +152,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
...
@@ -153,7 +152,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_height
,
in1_height
,
out_dims
[
0
],
out_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and output height must be equal."
"The input and output height must be equal."
"But received input height = [%d], output height = [%d]"
,
"But received input height = [%d], output height = [%d]"
,
in1_height
,
in1_height
,
...
@@ -166,7 +165,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
...
@@ -166,7 +165,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
in1_row_numel
,
input2
.
numel
()
/
in1_height
,
input2
.
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
in1_row_numel
,
...
@@ -174,7 +173,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
...
@@ -174,7 +173,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
in1_row_numel
,
output
->
numel
()
/
in1_height
,
output
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The input and output width must be equal."
"The input and output width must be equal."
"But received input width = [%d], output width = [%d]"
,
"But received input width = [%d], output width = [%d]"
,
in1_row_numel
,
in1_row_numel
,
...
@@ -198,16 +197,16 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
...
@@ -198,16 +197,16 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
out_data
,
out_data
,
in1_row_numel
);
in1_row_numel
);
auto
out_eigen
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
out_eigen
=
EigenVector
<
T
>::
Flatten
(
*
output
);
auto
in2_eigen
=
framework
::
EigenVector
<
T
>::
Flatten
(
input2
);
auto
in2_eigen
=
EigenVector
<
T
>::
Flatten
(
input2
);
out_eigen
.
device
(
*
context
.
eigen_device
())
=
out_eigen
+
in2_eigen
;
out_eigen
.
device
(
*
context
.
eigen_device
())
=
out_eigen
+
in2_eigen
;
}
}
};
};
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
float
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
float
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAdd
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAdd
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAddTensor
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
template
<
typename
T
>
template
<
typename
T
>
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
T
>
{
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
T
>
{
...
@@ -219,11 +218,11 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
...
@@ -219,11 +218,11 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_height
,
in1_height
,
input2
->
height
(),
input2
->
height
(),
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"But received first input height = "
"[%d], second input height = [%d]"
,
"[%d], second input height = [%d]"
,
in1_height
,
in1_height
,
input2
->
height
()));
input2
->
height
()));
auto
&
in1_rows
=
input1
.
rows
();
auto
&
in1_rows
=
input1
.
rows
();
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
auto
&
in2_rows
=
*
(
input2
->
mutable_rows
());
...
@@ -238,24 +237,24 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
...
@@ -238,24 +237,24 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
}
}
auto
in1_place
=
input1
.
place
();
auto
in1_place
=
input1
.
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in1_place
),
true
,
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
"The running environment is not on the GPU place."
));
auto
in2_place
=
input2
->
place
();
auto
in2_place
=
input2
->
place
();
PADDLE_ENFORCE_EQ
(
platform
::
is_gpu_place
(
in1_place
),
PADDLE_ENFORCE_EQ
(
p
addle
::
p
latform
::
is_gpu_place
(
in1_place
),
true
,
true
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The running environment is not on the GPU place."
));
"The running environment is not on the GPU place."
));
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
in1_data
=
in1_value
.
data
<
T
>
();
auto
*
in2_data
=
in2_value
->
data
<
T
>
();
auto
*
in2_data
=
in2_value
->
data
<
T
>
();
memory
::
Copy
(
in2_place
,
paddle
::
memory
::
Copy
(
in2_place
,
in2_data
+
input2_offset
,
in2_data
+
input2_offset
,
in1_place
,
in1_place
,
in1_data
,
in1_data
,
in1_value
.
numel
()
*
sizeof
(
T
),
in1_value
.
numel
()
*
sizeof
(
T
),
context
.
stream
());
context
.
stream
());
}
}
};
};
...
@@ -263,7 +262,7 @@ template struct SelectedRowsAddTo<phi::GPUContext, float>;
...
@@ -263,7 +262,7 @@ template struct SelectedRowsAddTo<phi::GPUContext, float>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
int
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
int
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
int64_t
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
int64_t
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAddTo
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
namespace
{
namespace
{
template
<
typename
T
,
int
block_size
>
template
<
typename
T
,
int
block_size
>
...
@@ -289,17 +288,17 @@ template <typename T>
...
@@ -289,17 +288,17 @@ template <typename T>
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
T
>
{
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
)
{
phi
::
Dense
Tensor
*
input2
)
{
auto
in1_height
=
input1
.
height
();
auto
in1_height
=
input1
.
height
();
auto
in2_dims
=
input2
->
dims
();
auto
in2_dims
=
input2
->
dims
();
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_height
,
in1_height
,
in2_dims
[
0
],
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"But received first input height = "
"[%d], second input height = [%d]"
,
"[%d], second input height = [%d]"
,
in1_height
,
in1_height
,
in2_dims
[
0
]));
in2_dims
[
0
]));
auto
&
in1_value
=
input1
.
value
();
auto
&
in1_value
=
input1
.
value
();
auto
&
in1_rows
=
input1
.
rows
();
auto
&
in1_rows
=
input1
.
rows
();
...
@@ -308,7 +307,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
...
@@ -308,7 +307,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
in1_row_numel
,
input2
->
numel
()
/
in1_height
,
input2
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
in1_row_numel
,
...
@@ -333,7 +332,7 @@ template struct SelectedRowsAddToTensor<phi::GPUContext, float>;
...
@@ -333,7 +332,7 @@ template struct SelectedRowsAddToTensor<phi::GPUContext, float>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
double
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
int
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
int
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
int64_t
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
int64_t
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
p
latform
::
float16
>;
template
struct
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
p
hi
::
dtype
::
float16
>;
namespace
scatter
{
namespace
scatter
{
...
@@ -379,7 +378,7 @@ struct MergeAddImpl {
...
@@ -379,7 +378,7 @@ struct MergeAddImpl {
const
phi
::
SelectedRows
&
input
,
const
phi
::
SelectedRows
&
input
,
phi
::
SelectedRows
*
output
,
phi
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
)
{
const
bool
sorted_result
=
false
)
{
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
paddle
::
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
if
(
input_rows
.
size
()
==
0
)
{
if
(
input_rows
.
size
()
==
0
)
{
return
;
return
;
}
}
...
@@ -387,7 +386,7 @@ struct MergeAddImpl {
...
@@ -387,7 +386,7 @@ struct MergeAddImpl {
phi
::
SelectedRows
&
out
=
*
output
;
phi
::
SelectedRows
&
out
=
*
output
;
std
::
set
<
int64_t
>
row_set
(
input_rows
.
begin
(),
input_rows
.
end
());
std
::
set
<
int64_t
>
row_set
(
input_rows
.
begin
(),
input_rows
.
end
());
std
::
vector
<
int64_t
>
merge_rows_cpu
(
row_set
.
begin
(),
row_set
.
end
());
std
::
vector
<
int64_t
>
merge_rows_cpu
(
row_set
.
begin
(),
row_set
.
end
());
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
paddle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
auto
input_width
=
input
.
value
().
dims
()[
1
];
auto
input_width
=
input
.
value
().
dims
()[
1
];
...
@@ -446,20 +445,20 @@ struct MergeAddImpl {
...
@@ -446,20 +445,20 @@ struct MergeAddImpl {
if
(
input
->
rows
().
size
()
==
0
)
{
if
(
input
->
rows
().
size
()
==
0
)
{
continue
;
continue
;
}
}
PADDLE_ENFORCE_EQ
(
input_width
,
PADDLE_ENFORCE_EQ
(
input
->
value
().
dims
()[
1
]
,
input_width
,
platform
::
errors
::
InvalidArgument
(
input
->
value
().
dims
()[
1
],
"All input should have same "
phi
::
errors
::
InvalidArgument
(
"All input should have same "
"dimension except for the first one."
));
"dimension except for the first one."
));
PADDLE_ENFORCE_EQ
(
input_height
,
PADDLE_ENFORCE_EQ
(
input
->
height
()
,
input_height
,
platform
::
errors
::
InvalidArgument
(
input
->
height
(),
"All input should have same height."
));
phi
::
errors
::
InvalidArgument
(
"All input should have same height."
));
merged_row_set
.
insert
(
input
->
rows
().
begin
(),
input
->
rows
().
end
());
merged_row_set
.
insert
(
input
->
rows
().
begin
(),
input
->
rows
().
end
());
}
}
std
::
vector
<
int64_t
>
merge_rows_cpu
(
merged_row_set
.
begin
(),
std
::
vector
<
int64_t
>
merge_rows_cpu
(
merged_row_set
.
begin
(),
merged_row_set
.
end
());
merged_row_set
.
end
());
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
paddle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
out
.
set_rows
(
merge_rows
);
out
.
set_rows
(
merge_rows
);
out
.
set_height
(
input_height
);
out
.
set_height
(
input_height
);
...
@@ -530,10 +529,10 @@ TEMPLATE_SPECIALIZED_FOR_MERGEADD(float)
...
@@ -530,10 +529,10 @@ TEMPLATE_SPECIALIZED_FOR_MERGEADD(float)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
double
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
double
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
int
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
int
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
int64_t
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
int64_t
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
float16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
float16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
bfloat16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
bfloat16
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
complex
<
float
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
complex
<
float
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
latform
::
complex
<
double
>
)
TEMPLATE_SPECIALIZED_FOR_MERGEADD
(
p
hi
::
dtype
::
complex
<
double
>
)
template
<
typename
T
,
int
block_size
>
template
<
typename
T
,
int
block_size
>
__global__
void
UpdateToTensorKernel
(
const
T
*
selected_rows
,
__global__
void
UpdateToTensorKernel
(
const
T
*
selected_rows
,
...
@@ -591,7 +590,7 @@ struct UpdateToTensor<phi::GPUContext, T> {
...
@@ -591,7 +590,7 @@ struct UpdateToTensor<phi::GPUContext, T> {
void
operator
()(
const
phi
::
GPUContext
&
context
,
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
ScatterOps
&
op
,
const
ScatterOps
&
op
,
const
phi
::
SelectedRows
&
input1
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
)
{
Dense
Tensor
*
input2
)
{
// NOTE: Use SelectedRowsAddToTensor for better performance
// NOTE: Use SelectedRowsAddToTensor for better performance
// no additional MergeAdd called.
// no additional MergeAdd called.
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
...
@@ -602,11 +601,11 @@ struct UpdateToTensor<phi::GPUContext, T> {
...
@@ -602,11 +601,11 @@ struct UpdateToTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_height
,
in1_height
,
in2_dims
[
0
],
in2_dims
[
0
],
p
latform
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
p
hi
::
errors
::
InvalidArgument
(
"The two inputs height must be equal."
"But received first input height = "
"But received first input height = "
"[%d], second input height = [%d]"
,
"[%d], second input height = [%d]"
,
in1_height
,
in1_height
,
in2_dims
[
0
]));
in2_dims
[
0
]));
auto
&
in1_value
=
merged_in1
.
value
();
auto
&
in1_value
=
merged_in1
.
value
();
auto
&
in1_rows
=
merged_in1
.
rows
();
auto
&
in1_rows
=
merged_in1
.
rows
();
...
@@ -615,7 +614,7 @@ struct UpdateToTensor<phi::GPUContext, T> {
...
@@ -615,7 +614,7 @@ struct UpdateToTensor<phi::GPUContext, T> {
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
in1_row_numel
,
in1_row_numel
,
input2
->
numel
()
/
in1_height
,
input2
->
numel
()
/
in1_height
,
p
latform
::
errors
::
InvalidArgument
(
p
hi
::
errors
::
InvalidArgument
(
"The two inputs width must be equal."
"The two inputs width must be equal."
"But received first input width = [%d], second input width = [%d]"
,
"But received first input width = [%d], second input width = [%d]"
,
in1_row_numel
,
in1_row_numel
,
...
@@ -624,14 +623,13 @@ struct UpdateToTensor<phi::GPUContext, T> {
...
@@ -624,14 +623,13 @@ struct UpdateToTensor<phi::GPUContext, T> {
auto
*
in1_data
=
in1_value
.
template
data
<
T
>();
auto
*
in1_data
=
in1_value
.
template
data
<
T
>();
auto
*
in2_data
=
input2
->
data
<
T
>
();
auto
*
in2_data
=
input2
->
data
<
T
>
();
dim3
threads
(
platform
::
PADDLE_CUDA_NUM_THREADS
,
1
);
dim3
threads
(
p
addle
::
p
latform
::
PADDLE_CUDA_NUM_THREADS
,
1
);
dim3
grid
(
in1_rows
.
size
(),
1
);
dim3
grid
(
in1_rows
.
size
(),
1
);
UpdateToTensorKernel
<
T
,
platform
::
PADDLE_CUDA_NUM_THREADS
>
UpdateToTensorKernel
<
T
,
p
addle
::
p
latform
::
PADDLE_CUDA_NUM_THREADS
>
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
in1_data
,
in1_rows
.
cuda_data
(),
op
,
in2_data
,
in1_row_numel
);
in1_data
,
in1_rows
.
cuda_data
(),
op
,
in2_data
,
in1_row_numel
);
}
}
};
};
}
// namespace scatter
}
// namespace scatter
}
// namespace math
}
// namespace funcs
}
// namespace operators
}
// namespace phi
}
// namespace paddle
paddle/
fluid/operators/math
/selected_rows_functor.h
→
paddle/
phi/kernels/funcs
/selected_rows_functor.h
浏览文件 @
b6c6f4f9
...
@@ -16,19 +16,17 @@ limitations under the License. */
...
@@ -16,19 +16,17 @@ limitations under the License. */
#include <map>
#include <map>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/fluid/framework/selected_rows_utils.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#define INLINE_FOR2(sizei, sizej) \
#define INLINE_FOR2(sizei, sizej) \
for (int64_t i = 0; i < sizei; i++) \
for (int64_t i = 0; i < sizei; i++) \
for (int64_t j = 0; j < sizej; j++)
for (int64_t j = 0; j < sizej; j++)
namespace
paddle
{
namespace
phi
{
namespace
operators
{
namespace
funcs
{
namespace
math
{
// SelectedRows + SelectedRows will simplely concat value and rows.
// SelectedRows + SelectedRows will simplely concat value and rows.
// The real computation happens in dealing with LoDTensor.
// The real computation happens in dealing with LoDTensor.
...
@@ -44,8 +42,8 @@ template <typename DeviceContext, typename T>
...
@@ -44,8 +42,8 @@ template <typename DeviceContext, typename T>
struct
SelectedRowsAddTensor
{
struct
SelectedRowsAddTensor
{
void
operator
()(
const
DeviceContext
&
context
,
void
operator
()(
const
DeviceContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
const
phi
::
SelectedRows
&
input1
,
const
framework
::
Tensor
&
input2
,
const
phi
::
Dense
Tensor
&
input2
,
framework
::
Tensor
*
output
);
phi
::
Dense
Tensor
*
output
);
};
};
// input2 = input1 + input2
// input2 = input1 + input2
...
@@ -73,7 +71,7 @@ template <typename DeviceContext, typename T>
...
@@ -73,7 +71,7 @@ template <typename DeviceContext, typename T>
struct
SelectedRowsAddToTensor
{
struct
SelectedRowsAddToTensor
{
void
operator
()(
const
DeviceContext
&
context
,
void
operator
()(
const
DeviceContext
&
context
,
const
phi
::
SelectedRows
&
input1
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
);
phi
::
Dense
Tensor
*
input2
);
};
};
namespace
scatter
{
namespace
scatter
{
...
@@ -115,10 +113,9 @@ struct UpdateToTensor {
...
@@ -115,10 +113,9 @@ struct UpdateToTensor {
void
operator
()(
const
DeviceContext
&
context
,
void
operator
()(
const
DeviceContext
&
context
,
const
ScatterOps
&
op
,
const
ScatterOps
&
op
,
const
phi
::
SelectedRows
&
input1
,
const
phi
::
SelectedRows
&
input1
,
framework
::
Tensor
*
input2
);
phi
::
Dense
Tensor
*
input2
);
};
};
}
// namespace scatter
}
// namespace scatter
}
// namespace math
}
// namespace funcs
}
// namespace operators
}
// namespace phi
}
// namespace paddle
paddle/phi/kernels/gpu/adagrad_kernel.cu
浏览文件 @
b6c6f4f9
...
@@ -14,11 +14,11 @@
...
@@ -14,11 +14,11 @@
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
#include "paddle/phi/kernels/impl/adagrad_kernel_impl.h"
namespace
phi
{
namespace
phi
{
...
@@ -85,7 +85,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
...
@@ -85,7 +85,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
DenseTensor
*
param
)
{
DenseTensor
*
param
)
{
// 1. g_m.rows = set(g.rows)
// 1. g_m.rows = set(g.rows)
auto
grad_width
=
grad
.
value
().
dims
()[
1
];
auto
grad_width
=
grad
.
value
().
dims
()[
1
];
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
auto
grad_merge
=
merge_func
(
context
,
grad
);
auto
grad_merge
=
merge_func
(
context
,
grad
);
auto
*
grad_merge_data
=
grad_merge
.
mutable_value
()
->
template
data
<
T
>();
auto
*
grad_merge_data
=
grad_merge
.
mutable_value
()
->
template
data
<
T
>();
paddle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
grad_merge
.
rows
());
paddle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
grad_merge
.
rows
());
...
@@ -93,8 +93,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
...
@@ -93,8 +93,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
auto
grad_square
=
auto
grad_square
=
SquareSelectedRows
<
phi
::
GPUContext
,
T
>
(
context
,
grad_merge
);
SquareSelectedRows
<
phi
::
GPUContext
,
T
>
(
context
,
grad_merge
);
paddle
::
operators
::
math
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
T
>
phi
::
funcs
::
SelectedRowsAddToTensor
<
phi
::
GPUContext
,
T
>
functor
;
functor
;
functor
(
context
,
grad_square
,
moment
);
functor
(
context
,
grad_square
,
moment
);
// 3. update parameter
// 3. update parameter
...
...
paddle/phi/kernels/gpu/adam_kernel.cu
浏览文件 @
b6c6f4f9
...
@@ -19,7 +19,6 @@
...
@@ -19,7 +19,6 @@
#include <vector>
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/common/float16.h"
...
...
paddle/phi/kernels/gpu/adamw_kernel.cu
浏览文件 @
b6c6f4f9
...
@@ -19,7 +19,6 @@
...
@@ -19,7 +19,6 @@
#include <vector>
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/common/float16.h"
...
@@ -27,6 +26,7 @@
...
@@ -27,6 +26,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
phi
{
template
<
typename
T
,
typename
MT
>
template
<
typename
T
,
typename
MT
>
...
...
paddle/phi/kernels/impl/adagrad_kernel_impl.h
浏览文件 @
b6c6f4f9
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
#pragma once
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/phi/kernels/adagrad_kernel.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
...
paddle/phi/kernels/impl/add_n_kernel_impl.h
浏览文件 @
b6c6f4f9
...
@@ -21,7 +21,7 @@
...
@@ -21,7 +21,7 @@
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
phi
{
namespace
phi
{
...
...
paddle/phi/kernels/impl/clip_kernel_impl.h
浏览文件 @
b6c6f4f9
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
#pragma once
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/fluid/platform/transform.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
...
...
paddle/phi/kernels/impl/momentum_kernel_impl.h
浏览文件 @
b6c6f4f9
...
@@ -14,12 +14,12 @@
...
@@ -14,12 +14,12 @@
#pragma once
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/momentum_kernel.h"
#include "paddle/phi/kernels/momentum_kernel.h"
namespace
phi
{
namespace
phi
{
...
@@ -547,7 +547,7 @@ void MomentumSparseImpl(const Context& ctx,
...
@@ -547,7 +547,7 @@ void MomentumSparseImpl(const Context& ctx,
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
ctx
,
grad
,
merged_grad
);
merge_func
(
ctx
,
grad
,
merged_grad
);
auto
*
grad_merge_rows
=
merged_grad
->
mutable_rows
();
auto
*
grad_merge_rows
=
merged_grad
->
mutable_rows
();
...
...
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
浏览文件 @
b6c6f4f9
...
@@ -16,10 +16,10 @@
...
@@ -16,10 +16,10 @@
#include <math.h>
#include <math.h>
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/algorithm.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/rmsprop_kernel.h"
#include "paddle/phi/kernels/rmsprop_kernel.h"
namespace
phi
{
namespace
phi
{
...
@@ -304,7 +304,7 @@ void RmspropSparseKernel(const Context &ctx,
...
@@ -304,7 +304,7 @@ void RmspropSparseKernel(const Context &ctx,
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
phi
::
SelectedRows
*
merged_grad
=
&
tmp_merged_grad
;
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
ctx
,
grad
,
merged_grad
);
merge_func
(
ctx
,
grad
,
merged_grad
);
funcs
::
ForRange
<
Context
>
for_range
(
ctx
,
limit
);
funcs
::
ForRange
<
Context
>
for_range
(
ctx
,
limit
);
...
...
paddle/phi/kernels/selected_rows/clip_kernel.h
浏览文件 @
b6c6f4f9
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
#pragma once
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/device_context.h"
...
...
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
浏览文件 @
b6c6f4f9
...
@@ -16,11 +16,11 @@
...
@@ -16,11 +16,11 @@
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
phi
{
namespace
sr
{
namespace
sr
{
...
@@ -118,7 +118,7 @@ void AdamDenseParamSparseGradKernel(
...
@@ -118,7 +118,7 @@ void AdamDenseParamSparseGradKernel(
}
else
{
}
else
{
// merge duplicated rows if any.
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
grad_merge_ptr
=
&
tmp_grad_merge
;
}
}
...
...
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
浏览文件 @
b6c6f4f9
...
@@ -15,7 +15,6 @@
...
@@ -15,7 +15,6 @@
#include "paddle/phi/kernels/selected_rows/adam_kernel.h"
#include "paddle/phi/kernels/selected_rows/adam_kernel.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/common/float16.h"
...
@@ -23,6 +22,7 @@
...
@@ -23,6 +22,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
phi
{
namespace
sr
{
namespace
sr
{
...
@@ -191,7 +191,7 @@ void AdamDenseParamSparseGradKernel(
...
@@ -191,7 +191,7 @@ void AdamDenseParamSparseGradKernel(
}
else
{
}
else
{
// merge duplicated rows if any.
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
grad_merge_ptr
=
&
tmp_grad_merge
;
}
}
...
...
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
浏览文件 @
b6c6f4f9
...
@@ -19,7 +19,6 @@
...
@@ -19,7 +19,6 @@
#include <vector>
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/common/float16.h"
...
@@ -27,6 +26,7 @@
...
@@ -27,6 +26,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/for_range.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
phi
{
namespace
sr
{
namespace
sr
{
...
@@ -214,7 +214,7 @@ void AdamwDenseParamSparseGradKernel(
...
@@ -214,7 +214,7 @@ void AdamwDenseParamSparseGradKernel(
}
else
{
}
else
{
// merge duplicated rows if any.
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
grad_merge_ptr
=
&
tmp_grad_merge
;
}
}
...
...
paddle/phi/kernels/selected_rows/impl/add_n_kernel_impl.h
浏览文件 @
b6c6f4f9
...
@@ -16,10 +16,10 @@
...
@@ -16,10 +16,10 @@
#include "paddle/phi/kernels/selected_rows/add_n_kernel.h"
#include "paddle/phi/kernels/selected_rows/add_n_kernel.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
phi
{
namespace
sr
{
namespace
sr
{
...
@@ -73,7 +73,7 @@ void AddNKernel(const Context &dev_ctx,
...
@@ -73,7 +73,7 @@ void AddNKernel(const Context &dev_ctx,
}
}
}
}
if
(
has_data
)
{
if
(
has_data
)
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_add
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_add
;
merge_add
(
dev_ctx
,
inputs
,
out
);
merge_add
(
dev_ctx
,
inputs
,
out
);
out
->
SyncIndex
();
out
->
SyncIndex
();
...
...
paddle/phi/kernels/selected_rows/impl/clip_by_norm_kernel_impl.h
浏览文件 @
b6c6f4f9
...
@@ -14,11 +14,11 @@
...
@@ -14,11 +14,11 @@
#pragma once
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/kernels/clip_by_norm_kernel.h"
#include "paddle/phi/kernels/clip_by_norm_kernel.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h"
#include "paddle/phi/kernels/selected_rows/clip_by_norm_kernel.h"
namespace
phi
{
namespace
phi
{
...
@@ -30,7 +30,7 @@ void ClipByNormKernel(const Context& dev_ctx,
...
@@ -30,7 +30,7 @@ void ClipByNormKernel(const Context& dev_ctx,
float
max_norm
,
float
max_norm
,
SelectedRows
*
out
)
{
SelectedRows
*
out
)
{
phi
::
SelectedRows
merged_input
;
phi
::
SelectedRows
merged_input
;
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
x
,
&
merged_input
);
merge_func
(
dev_ctx
,
x
,
&
merged_input
);
auto
input
=
&
(
merged_input
.
value
());
auto
input
=
&
(
merged_input
.
value
());
out
->
set_rows
(
merged_input
.
rows
());
out
->
set_rows
(
merged_input
.
rows
());
...
...
paddle/phi/kernels/selected_rows/impl/clip_kernel_impl.h
浏览文件 @
b6c6f4f9
...
@@ -14,11 +14,11 @@
...
@@ -14,11 +14,11 @@
#pragma once
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/selected_rows/clip_kernel.h"
#include "paddle/phi/kernels/selected_rows/clip_kernel.h"
namespace
phi
{
namespace
phi
{
...
@@ -45,7 +45,7 @@ void ClipSparseKernel(const Context& dev_ctx,
...
@@ -45,7 +45,7 @@ void ClipSparseKernel(const Context& dev_ctx,
out
,
out
,
errors
::
InvalidArgument
(
"Inplace clip is not allowed "
errors
::
InvalidArgument
(
"Inplace clip is not allowed "
"when x is SelectedRows"
));
"when x is SelectedRows"
));
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
x
,
out
);
merge_func
(
dev_ctx
,
x
,
out
);
auto
*
out_tensor
=
out
->
mutable_value
();
auto
*
out_tensor
=
out
->
mutable_value
();
auto
*
out_data
=
out_tensor
->
data
<
T
>
();
auto
*
out_data
=
out_tensor
->
data
<
T
>
();
...
...
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
浏览文件 @
b6c6f4f9
...
@@ -12,10 +12,10 @@
...
@@ -12,10 +12,10 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/core/selected_rows.h"
#include "paddle/phi/kernels/funcs/lamb_functors.h"
#include "paddle/phi/kernels/funcs/lamb_functors.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
namespace
phi
{
namespace
phi
{
namespace
sr
{
namespace
sr
{
...
@@ -212,7 +212,7 @@ void ComputeRowImpl(const Context& dev_ctx,
...
@@ -212,7 +212,7 @@ void ComputeRowImpl(const Context& dev_ctx,
}
else
{
}
else
{
// merge duplicated rows if any.
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
// The rows of grad_merge have been sorted inside MergeAdd functor
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
grad_merge_ptr
=
&
tmp_grad_merge
;
grad_merge_ptr
=
&
tmp_grad_merge
;
}
}
...
...
paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
浏览文件 @
b6c6f4f9
...
@@ -18,7 +18,7 @@
...
@@ -18,7 +18,7 @@
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
phi
{
namespace
phi
{
namespace
sr
{
namespace
sr
{
...
@@ -27,7 +27,7 @@ template <typename T, typename Context>
...
@@ -27,7 +27,7 @@ template <typename T, typename Context>
void
MergeSelectedRowsKernel
(
const
Context
&
dev_ctx
,
void
MergeSelectedRowsKernel
(
const
Context
&
dev_ctx
,
const
SelectedRows
&
x
,
const
SelectedRows
&
x
,
SelectedRows
*
out
)
{
SelectedRows
*
out
)
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
T
>
merge_func
;
merge_func
(
dev_ctx
,
x
,
out
);
merge_func
(
dev_ctx
,
x
,
out
);
}
}
...
...
paddle/phi/kernels/selected_rows/xpu/adam_kernel.cc
浏览文件 @
b6c6f4f9
...
@@ -19,7 +19,7 @@
...
@@ -19,7 +19,7 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
#include "paddle/phi/kernels/funcs/adam_functors.h"
// See Note [ Why still include the fluid headers? ]
// See Note [ Why still include the fluid headers? ]
#include "paddle/
fluid/operators/math
/selected_rows_functor.h"
#include "paddle/
phi/kernels/funcs
/selected_rows_functor.h"
namespace
phi
{
namespace
phi
{
namespace
sr
{
namespace
sr
{
...
@@ -181,7 +181,7 @@ void AdamDenseParamSparseGradKernel(
...
@@ -181,7 +181,7 @@ void AdamDenseParamSparseGradKernel(
if
(
is_strict_sorted
)
{
if
(
is_strict_sorted
)
{
grad_merge_ptr
=
&
grad
;
grad_merge_ptr
=
&
grad
;
}
else
{
}
else
{
p
addle
::
operators
::
math
::
scatter
::
MergeAdd
<
Context
,
float
>
merge_func
;
p
hi
::
funcs
::
scatter
::
MergeAdd
<
Context
,
float
>
merge_func
;
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
merge_func
(
dev_ctx
,
grad
,
&
tmp_grad_merge
,
true
);
xpu_wait
(
dev_ctx
.
x_context
()
->
xpu_stream
);
xpu_wait
(
dev_ctx
.
x_context
()
->
xpu_stream
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录