Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
35d7d1f0
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
35d7d1f0
编写于
2月 08, 2023
作者:
H
Huang Jiyi
提交者:
GitHub
2月 08, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
move mixed_vector (#50282)
上级
e92e3aab
变更
83
显示空白变更内容
内联
并排
Showing
83 changed file
with
299 addition
and
305 deletion
+299
-305
paddle/fluid/distributed/collective/reducer.cc
paddle/fluid/distributed/collective/reducer.cc
+3
-3
paddle/fluid/distributed/ps/service/brpc_utils.cc
paddle/fluid/distributed/ps/service/brpc_utils.cc
+1
-1
paddle/fluid/distributed/test/brpc_utils_test.cc
paddle/fluid/distributed/test/brpc_utils_test.cc
+4
-4
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+0
-20
paddle/fluid/framework/data_feed.cc
paddle/fluid/framework/data_feed.cc
+1
-1
paddle/fluid/framework/data_type_test.cc
paddle/fluid/framework/data_type_test.cc
+1
-0
paddle/fluid/framework/dlpack_tensor.cc
paddle/fluid/framework/dlpack_tensor.cc
+1
-0
paddle/fluid/framework/dlpack_tensor_test.cc
paddle/fluid/framework/dlpack_tensor_test.cc
+1
-0
paddle/fluid/framework/eigen_test.cc
paddle/fluid/framework/eigen_test.cc
+3
-2
paddle/fluid/framework/fleet/heter_wrapper.cc
paddle/fluid/framework/fleet/heter_wrapper.cc
+2
-2
paddle/fluid/framework/lod_tensor.h
paddle/fluid/framework/lod_tensor.h
+2
-2
paddle/fluid/framework/lod_tensor_test.cu
paddle/fluid/framework/lod_tensor_test.cu
+2
-2
paddle/fluid/framework/tensor.h
paddle/fluid/framework/tensor.h
+2
-2
paddle/fluid/imperative/all_reduce.cc
paddle/fluid/imperative/all_reduce.cc
+4
-4
paddle/fluid/imperative/gloo_context.cc
paddle/fluid/imperative/gloo_context.cc
+2
-2
paddle/fluid/operators/assign_op_test.cc
paddle/fluid/operators/assign_op_test.cc
+1
-1
paddle/fluid/operators/ctc_align_op.cu
paddle/fluid/operators/ctc_align_op.cu
+1
-1
paddle/fluid/operators/cvm_op.cu
paddle/fluid/operators/cvm_op.cu
+1
-1
paddle/fluid/operators/detection/box_clip_op.cu
paddle/fluid/operators/detection/box_clip_op.cu
+1
-1
paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
+1
-1
paddle/fluid/operators/detection/generate_proposals_op.cu
paddle/fluid/operators/detection/generate_proposals_op.cu
+1
-1
paddle/fluid/operators/detection/target_assign_op.h
paddle/fluid/operators/detection/target_assign_op.h
+2
-2
paddle/fluid/operators/filter_by_instag_op.cu
paddle/fluid/operators/filter_by_instag_op.cu
+8
-8
paddle/fluid/operators/filter_by_instag_op.h
paddle/fluid/operators/filter_by_instag_op.h
+2
-2
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+1
-1
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
+6
-6
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+2
-2
paddle/fluid/operators/gru_op.cc
paddle/fluid/operators/gru_op.cc
+1
-1
paddle/fluid/operators/gru_op.cu.cc
paddle/fluid/operators/gru_op.cu.cc
+1
-1
paddle/fluid/operators/gru_op.h
paddle/fluid/operators/gru_op.h
+2
-2
paddle/fluid/operators/lookup_table_op.cu
paddle/fluid/operators/lookup_table_op.cu
+2
-2
paddle/fluid/operators/lookup_table_v2_op.cu
paddle/fluid/operators/lookup_table_v2_op.cu
+2
-2
paddle/fluid/operators/lstm_op.h
paddle/fluid/operators/lstm_op.h
+3
-3
paddle/fluid/operators/lstmp_op.h
paddle/fluid/operators/lstmp_op.h
+3
-3
paddle/fluid/operators/math/beam_search.cu
paddle/fluid/operators/math/beam_search.cu
+2
-2
paddle/fluid/operators/math/sequence_padding.cc
paddle/fluid/operators/math/sequence_padding.cc
+1
-1
paddle/fluid/operators/math/sequence_padding.cu
paddle/fluid/operators/math/sequence_padding.cu
+2
-2
paddle/fluid/operators/math/sequence_padding.h
paddle/fluid/operators/math/sequence_padding.h
+3
-3
paddle/fluid/operators/math/sequence_pooling.cu
paddle/fluid/operators/math/sequence_pooling.cu
+2
-2
paddle/fluid/operators/optimizers/ftrl_op.h
paddle/fluid/operators/optimizers/ftrl_op.h
+1
-1
paddle/fluid/operators/optimizers/sgd_op.cu
paddle/fluid/operators/optimizers/sgd_op.cu
+1
-1
paddle/fluid/operators/row_conv_op.cc
paddle/fluid/operators/row_conv_op.cc
+2
-2
paddle/fluid/operators/row_conv_op.cu
paddle/fluid/operators/row_conv_op.cu
+4
-4
paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
+1
-1
paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
+1
-1
paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
+7
-8
paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h
paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h
+16
-20
paddle/fluid/operators/sequence_ops/sequence_expand_op.cu
paddle/fluid/operators/sequence_ops/sequence_expand_op.cu
+18
-19
paddle/fluid/operators/sequence_ops/sequence_expand_op.h
paddle/fluid/operators/sequence_ops/sequence_expand_op.h
+24
-28
paddle/fluid/operators/sequence_ops/sequence_reverse_op.h
paddle/fluid/operators/sequence_ops/sequence_reverse_op.h
+1
-1
paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
+4
-4
paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
+7
-8
paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h
...uid/operators/sequence_ops/sequence_topk_avg_pooling_op.h
+1
-1
paddle/fluid/operators/shuffle_batch_op.h
paddle/fluid/operators/shuffle_batch_op.h
+2
-2
paddle/fluid/operators/tdm_child_op.h
paddle/fluid/operators/tdm_child_op.h
+1
-1
paddle/fluid/operators/tdm_sampler_op.h
paddle/fluid/operators/tdm_sampler_op.h
+1
-1
paddle/fluid/pybind/tensor.cc
paddle/fluid/pybind/tensor.cc
+1
-1
paddle/phi/core/CMakeLists.txt
paddle/phi/core/CMakeLists.txt
+5
-0
paddle/phi/core/mixed_vector.cc
paddle/phi/core/mixed_vector.cc
+13
-16
paddle/phi/core/mixed_vector.h
paddle/phi/core/mixed_vector.h
+27
-26
paddle/phi/kernels/cpu/edit_distance_kernel.cc
paddle/phi/kernels/cpu/edit_distance_kernel.cc
+3
-3
paddle/phi/kernels/funcs/selected_rows_functor.cc
paddle/phi/kernels/funcs/selected_rows_functor.cc
+4
-4
paddle/phi/kernels/funcs/selected_rows_functor.cu
paddle/phi/kernels/funcs/selected_rows_functor.cu
+11
-11
paddle/phi/kernels/funcs/sequence2batch.cc
paddle/phi/kernels/funcs/sequence2batch.cc
+1
-1
paddle/phi/kernels/funcs/sequence2batch.cu
paddle/phi/kernels/funcs/sequence2batch.cu
+2
-2
paddle/phi/kernels/funcs/sequence2batch.h
paddle/phi/kernels/funcs/sequence2batch.h
+1
-1
paddle/phi/kernels/funcs/sequence_scale.cu
paddle/phi/kernels/funcs/sequence_scale.cu
+1
-1
paddle/phi/kernels/gpu/adagrad_kernel.cu
paddle/phi/kernels/gpu/adagrad_kernel.cu
+2
-2
paddle/phi/kernels/gpu/edit_distance_kernel.cu
paddle/phi/kernels/gpu/edit_distance_kernel.cu
+2
-2
paddle/phi/kernels/gpu/embedding_grad_kernel.cu
paddle/phi/kernels/gpu/embedding_grad_kernel.cu
+3
-3
paddle/phi/kernels/gpu/sgd_kernel.cu
paddle/phi/kernels/gpu/sgd_kernel.cu
+2
-2
paddle/phi/kernels/impl/momentum_kernel_impl.h
paddle/phi/kernels/impl/momentum_kernel_impl.h
+1
-1
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
+1
-1
paddle/phi/kernels/impl/warpctc_kernel_impl.h
paddle/phi/kernels/impl/warpctc_kernel_impl.h
+3
-3
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
+1
-1
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
+1
-1
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
+1
-1
paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc
...le/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc
+2
-2
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
+1
-1
paddle/phi/tests/core/CMakeLists.txt
paddle/phi/tests/core/CMakeLists.txt
+17
-0
paddle/phi/tests/core/test_mixed_vector.cc
paddle/phi/tests/core/test_mixed_vector.cc
+5
-5
paddle/phi/tests/core/test_mixed_vector.cu
paddle/phi/tests/core/test_mixed_vector.cu
+14
-13
tools/parallel_UT_rule.py
tools/parallel_UT_rule.py
+2
-2
未找到文件。
paddle/fluid/distributed/collective/reducer.cc
浏览文件 @
35d7d1f0
...
@@ -1113,7 +1113,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
...
@@ -1113,7 +1113,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
const
auto
&
rank_
=
process_group_
->
GetRank
();
const
auto
&
rank_
=
process_group_
->
GetRank
();
const
auto
&
size_
=
process_group_
->
GetSize
();
const
auto
&
size_
=
process_group_
->
GetSize
();
framework
::
Vector
<
int64_t
>
rows_num_vector
(
size_
);
phi
::
Vector
<
int64_t
>
rows_num_vector
(
size_
);
rows_num_vector
[
rank_
]
=
static_cast
<
int64_t
>
(
src_rows
.
size
());
rows_num_vector
[
rank_
]
=
static_cast
<
int64_t
>
(
src_rows
.
size
());
Tensor
rows_num_tensor
=
paddle
::
experimental
::
empty
(
Tensor
rows_num_tensor
=
paddle
::
experimental
::
empty
(
...
@@ -1183,7 +1183,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
...
@@ -1183,7 +1183,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
}
}
process_group_
->
AllGather
(
in
,
out
)
->
Synchronize
();
process_group_
->
AllGather
(
in
,
out
)
->
Synchronize
();
framework
::
Vector
<
int64_t
>
dst_rows_vector
(
rows_num
,
0
);
phi
::
Vector
<
int64_t
>
dst_rows_vector
(
rows_num
,
0
);
auto
*
dst_rows_dense_tensor
=
auto
*
dst_rows_dense_tensor
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
dst_rows_tensor
.
impl
())
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
dst_rows_tensor
.
impl
())
.
get
();
.
get
();
...
@@ -1262,7 +1262,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
...
@@ -1262,7 +1262,7 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
Tensor
dst_rows_tensor
=
Tensor
dst_rows_tensor
=
paddle
::
experimental
::
concat
(
rows_tensors
,
phi
::
Scalar
(
0
));
paddle
::
experimental
::
concat
(
rows_tensors
,
phi
::
Scalar
(
0
));
framework
::
Vector
<
int64_t
>
dst_rows_vector
(
rows_num
,
0
);
phi
::
Vector
<
int64_t
>
dst_rows_vector
(
rows_num
,
0
);
auto
*
dst_rows_dense_tensor
=
auto
*
dst_rows_dense_tensor
=
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
dst_rows_tensor
.
impl
())
std
::
dynamic_pointer_cast
<
phi
::
DenseTensor
>
(
dst_rows_tensor
.
impl
())
.
get
();
.
get
();
...
...
paddle/fluid/distributed/ps/service/brpc_utils.cc
浏览文件 @
35d7d1f0
...
@@ -236,7 +236,7 @@ void DeserializeLodTensor(framework::Variable* var,
...
@@ -236,7 +236,7 @@ void DeserializeLodTensor(framework::Variable* var,
framework
::
LoD
lod
;
framework
::
LoD
lod
;
for
(
int
i
=
0
;
i
<
msg
.
lod_level
();
++
i
)
{
for
(
int
i
=
0
;
i
<
msg
.
lod_level
();
++
i
)
{
framework
::
Vector
<
size_t
>
v
;
phi
::
Vector
<
size_t
>
v
;
for
(
int
j
=
0
;
j
<
msg
.
lod
(
i
).
lod_data_size
();
++
j
)
{
for
(
int
j
=
0
;
j
<
msg
.
lod
(
i
).
lod_data_size
();
++
j
)
{
v
.
push_back
(
msg
.
lod
(
i
).
lod_data
(
j
));
v
.
push_back
(
msg
.
lod
(
i
).
lod_data
(
j
));
}
}
...
...
paddle/fluid/distributed/test/brpc_utils_test.cc
浏览文件 @
35d7d1f0
...
@@ -39,7 +39,7 @@ void CreateVarsOnScope(framework::Scope* scope,
...
@@ -39,7 +39,7 @@ void CreateVarsOnScope(framework::Scope* scope,
auto
*
tensor1
=
var1
->
GetMutable
<
phi
::
DenseTensor
>
();
auto
*
tensor1
=
var1
->
GetMutable
<
phi
::
DenseTensor
>
();
tensor1
->
Resize
(
phi
::
make_ddim
({
512
,
8
,
4
,
2
}));
tensor1
->
Resize
(
phi
::
make_ddim
({
512
,
8
,
4
,
2
}));
framework
::
LoD
lod1
;
framework
::
LoD
lod1
;
lod1
.
push_back
(
framework
::
Vector
<
size_t
>
({
1
,
3
,
8
}));
lod1
.
push_back
(
phi
::
Vector
<
size_t
>
({
1
,
3
,
8
}));
tensor1
->
set_lod
(
lod1
);
tensor1
->
set_lod
(
lod1
);
tensor1
->
mutable_data
<
float
>
(
*
place
);
tensor1
->
mutable_data
<
float
>
(
*
place
);
phi
::
funcs
::
set_constant
(
ctx
,
tensor1
,
31.9
);
phi
::
funcs
::
set_constant
(
ctx
,
tensor1
,
31.9
);
...
@@ -49,7 +49,7 @@ void CreateVarsOnScope(framework::Scope* scope,
...
@@ -49,7 +49,7 @@ void CreateVarsOnScope(framework::Scope* scope,
auto
*
tensor2
=
var2
->
GetMutable
<
phi
::
DenseTensor
>
();
auto
*
tensor2
=
var2
->
GetMutable
<
phi
::
DenseTensor
>
();
tensor2
->
Resize
(
phi
::
make_ddim
({
1000
,
64
}));
tensor2
->
Resize
(
phi
::
make_ddim
({
1000
,
64
}));
framework
::
LoD
lod2
;
framework
::
LoD
lod2
;
lod2
.
push_back
(
framework
::
Vector
<
size_t
>
({
1
,
1
}));
lod2
.
push_back
(
phi
::
Vector
<
size_t
>
({
1
,
1
}));
tensor2
->
set_lod
(
lod2
);
tensor2
->
set_lod
(
lod2
);
tensor2
->
mutable_data
<
int
>
(
*
place
);
tensor2
->
mutable_data
<
int
>
(
*
place
);
phi
::
funcs
::
set_constant
(
ctx
,
tensor2
,
100
);
phi
::
funcs
::
set_constant
(
ctx
,
tensor2
,
100
);
...
@@ -98,7 +98,7 @@ void RunMultiVarMsg(platform::Place place) {
...
@@ -98,7 +98,7 @@ void RunMultiVarMsg(platform::Place place) {
framework
::
Variable
*
var1
=
scope_recv
.
FindVar
(
"x1"
);
framework
::
Variable
*
var1
=
scope_recv
.
FindVar
(
"x1"
);
auto
*
tensor1
=
var1
->
GetMutable
<
phi
::
DenseTensor
>
();
auto
*
tensor1
=
var1
->
GetMutable
<
phi
::
DenseTensor
>
();
EXPECT_EQ
(
tensor1
->
dims
(),
phi
::
make_ddim
({
512
,
8
,
4
,
2
}));
EXPECT_EQ
(
tensor1
->
dims
(),
phi
::
make_ddim
({
512
,
8
,
4
,
2
}));
// EXPECT_EQ(tensor1->lod(),
framework
::Vector<size_t>({1, 3, 8}));
// EXPECT_EQ(tensor1->lod(),
phi
::Vector<size_t>({1, 3, 8}));
auto
*
tensor_data1
=
const_cast
<
float
*>
(
tensor1
->
data
<
float
>
());
auto
*
tensor_data1
=
const_cast
<
float
*>
(
tensor1
->
data
<
float
>
());
int
tensor_numel1
=
512
*
8
*
4
*
2
;
int
tensor_numel1
=
512
*
8
*
4
*
2
;
for
(
int
i
=
0
;
i
<
tensor_numel1
;
++
i
)
for
(
int
i
=
0
;
i
<
tensor_numel1
;
++
i
)
...
@@ -108,7 +108,7 @@ void RunMultiVarMsg(platform::Place place) {
...
@@ -108,7 +108,7 @@ void RunMultiVarMsg(platform::Place place) {
framework
::
Variable
*
var2
=
scope_recv
.
FindVar
(
"x2"
);
framework
::
Variable
*
var2
=
scope_recv
.
FindVar
(
"x2"
);
auto
*
tensor2
=
var2
->
GetMutable
<
phi
::
DenseTensor
>
();
auto
*
tensor2
=
var2
->
GetMutable
<
phi
::
DenseTensor
>
();
EXPECT_EQ
(
tensor2
->
dims
(),
phi
::
make_ddim
({
1000
,
64
}));
EXPECT_EQ
(
tensor2
->
dims
(),
phi
::
make_ddim
({
1000
,
64
}));
// EXPECT_EQ(tensor2->lod(),
framework
::Vector<size_t>({1, 1}));
// EXPECT_EQ(tensor2->lod(),
phi
::Vector<size_t>({1, 1}));
auto
*
tensor_data2
=
const_cast
<
int
*>
(
tensor2
->
data
<
int
>
());
auto
*
tensor_data2
=
const_cast
<
int
*>
(
tensor2
->
data
<
int
>
());
int
tensor_numel2
=
1000
*
64
;
int
tensor_numel2
=
1000
*
64
;
for
(
int
i
=
0
;
i
<
tensor_numel2
;
++
i
)
EXPECT_EQ
(
tensor_data2
[
i
],
100
);
for
(
int
i
=
0
;
i
<
tensor_numel2
;
++
i
)
EXPECT_EQ
(
tensor_data2
[
i
],
100
);
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
35d7d1f0
...
@@ -162,27 +162,7 @@ cc_test(
...
@@ -162,27 +162,7 @@ cc_test(
eigen_test
eigen_test
SRCS eigen_test.cc
SRCS eigen_test.cc
DEPS tensor
)
DEPS tensor
)
cc_library
(
mixed_vector
SRCS mixed_vector.cc
DEPS device_context place memory
)
if
(
WITH_GPU
)
nv_test
(
mixed_vector_test
SRCS mixed_vector_test.cc mixed_vector_test.cu
DEPS mixed_vector place memory device_context tensor
)
elseif
(
WITH_ROCM
)
hip_test
(
mixed_vector_test
SRCS mixed_vector_test.cc mixed_vector_test.cu
DEPS mixed_vector place memory device_context tensor
)
else
()
cc_test
(
mixed_vector_test
SRCS mixed_vector_test.cc
DEPS mixed_vector place memory device_context tensor
)
endif
()
cc_library
(
cc_library
(
lod_tensor
lod_tensor
SRCS lod_tensor.cc
SRCS lod_tensor.cc
...
...
paddle/fluid/framework/data_feed.cc
浏览文件 @
35d7d1f0
...
@@ -2815,7 +2815,7 @@ void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) {
...
@@ -2815,7 +2815,7 @@ void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) {
LoD
&
lod
=
(
*
feed
->
mutable_lod
());
LoD
&
lod
=
(
*
feed
->
mutable_lod
());
lod
.
resize
(
1
);
lod
.
resize
(
1
);
lod
[
0
].
resize
(
offset_cols_size
);
lod
[
0
].
resize
(
offset_cols_size
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_lod
(
&
lod
[
0
]);
p
hi
::
MixVector
<
size_t
>
mixv_lod
(
&
lod
[
0
]);
memcpy
(
mixv_lod
.
MutableData
(
platform
::
CPUPlace
()),
memcpy
(
mixv_lod
.
MutableData
(
platform
::
CPUPlace
()),
off_start_ptr
,
off_start_ptr
,
offset_cols_size
*
sizeof
(
size_t
));
offset_cols_size
*
sizeof
(
size_t
));
...
...
paddle/fluid/framework/data_type_test.cc
浏览文件 @
35d7d1f0
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/place.h"
TEST
(
DataType
,
float16
)
{
TEST
(
DataType
,
float16
)
{
using
paddle
::
platform
::
CPUPlace
;
using
paddle
::
platform
::
CPUPlace
;
...
...
paddle/fluid/framework/dlpack_tensor.cc
浏览文件 @
35d7d1f0
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/convert_utils.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/dlpack_tensor_test.cc
浏览文件 @
35d7d1f0
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
...
paddle/fluid/framework/eigen_test.cc
浏览文件 @
35d7d1f0
...
@@ -12,10 +12,11 @@
...
@@ -12,10 +12,11 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "paddle/fluid/framework/eigen.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/ddim.h"
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/framework/fleet/heter_wrapper.cc
浏览文件 @
35d7d1f0
...
@@ -158,7 +158,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
...
@@ -158,7 +158,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
LoD
lod
;
LoD
lod
;
for
(
int
i
=
0
;
i
<
req_var
.
lod_level
();
++
i
)
{
for
(
int
i
=
0
;
i
<
req_var
.
lod_level
();
++
i
)
{
framework
::
Vector
<
size_t
>
v
;
phi
::
Vector
<
size_t
>
v
;
for
(
int
j
=
0
;
j
<
req_var
.
lod
(
i
).
lod_data_size
();
++
j
)
{
for
(
int
j
=
0
;
j
<
req_var
.
lod
(
i
).
lod_data_size
();
++
j
)
{
v
.
push_back
(
req_var
.
lod
(
i
).
lod_data
(
j
));
v
.
push_back
(
req_var
.
lod
(
i
).
lod_data
(
j
));
}
}
...
@@ -203,7 +203,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
...
@@ -203,7 +203,7 @@ void HeterWrapper::DeSerializeToTensor(Scope* scope,
LoD
lod
;
LoD
lod
;
for
(
int
i
=
0
;
i
<
req_var
.
lod_level
();
++
i
)
{
for
(
int
i
=
0
;
i
<
req_var
.
lod_level
();
++
i
)
{
framework
::
Vector
<
size_t
>
v
;
phi
::
Vector
<
size_t
>
v
;
for
(
int
j
=
0
;
j
<
req_var
.
lod
(
i
).
lod_data_size
();
++
j
)
{
for
(
int
j
=
0
;
j
<
req_var
.
lod
(
i
).
lod_data_size
();
++
j
)
{
v
.
push_back
(
req_var
.
lod
(
i
).
lod_data
(
j
));
v
.
push_back
(
req_var
.
lod
(
i
).
lod_data
(
j
));
}
}
...
...
paddle/fluid/framework/lod_tensor.h
浏览文件 @
35d7d1f0
...
@@ -21,12 +21,12 @@ limitations under the License. */
...
@@ -21,12 +21,12 @@ limitations under the License. */
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/ddim.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/mixed_vector.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
...
@@ -54,7 +54,7 @@ void MergeLoDTensor(phi::DenseTensor* target,
...
@@ -54,7 +54,7 @@ void MergeLoDTensor(phi::DenseTensor* target,
* 0 2 4 7
* 0 2 4 7
* 0 2 5 7 10 12 15 20
* 0 2 5 7 10 12 15 20
*/
*/
using
LoD
=
std
::
vector
<
Vector
<
size_t
>>
;
using
LoD
=
std
::
vector
<
phi
::
Vector
<
size_t
>>
;
std
::
string
LoDToString
(
const
LoD
&
lod
);
std
::
string
LoDToString
(
const
LoD
&
lod
);
...
...
paddle/fluid/framework/lod_tensor_test.cu
浏览文件 @
35d7d1f0
...
@@ -31,7 +31,7 @@ TEST(LoD, data) {
...
@@ -31,7 +31,7 @@ TEST(LoD, data) {
lod
.
push_back
(
std
::
vector
<
size_t
>
({
0
,
1
,
6
,
8
,
10
,
11
}));
lod
.
push_back
(
std
::
vector
<
size_t
>
({
0
,
1
,
6
,
8
,
10
,
11
}));
auto
&
v
=
lod
[
0
];
auto
&
v
=
lod
[
0
];
p
addle
::
framework
::
MixVector
<
size_t
>
mix_vector_v
(
&
v
);
p
hi
::
MixVector
<
size_t
>
mix_vector_v
(
&
v
);
paddle
::
platform
::
CUDAPlace
gpu
(
0
);
paddle
::
platform
::
CUDAPlace
gpu
(
0
);
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL
(
test
,
hipLaunchKernelGGL
(
test
,
...
@@ -69,7 +69,7 @@ TEST(DenseTensor, LoDInGPU) {
...
@@ -69,7 +69,7 @@ TEST(DenseTensor, LoDInGPU) {
EXPECT_EQ
(
lod_tensor
.
lod_element
(
0
,
4
).
first
,
8UL
);
EXPECT_EQ
(
lod_tensor
.
lod_element
(
0
,
4
).
first
,
8UL
);
auto
lod
=
lod_tensor
.
lod
();
auto
lod
=
lod_tensor
.
lod
();
p
addle
::
framework
::
MixVector
<
size_t
>
mix_vector
(
&
(
lod
[
0
]));
p
hi
::
MixVector
<
size_t
>
mix_vector
(
&
(
lod
[
0
]));
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL
(
test
,
hipLaunchKernelGGL
(
test
,
...
...
paddle/fluid/framework/tensor.h
浏览文件 @
35d7d1f0
...
@@ -15,15 +15,15 @@ limitations under the License. */
...
@@ -15,15 +15,15 @@ limitations under the License. */
#pragma once
#pragma once
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
#include "paddle/phi/core/sparse_csr_tensor.h"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
using
LoD
=
std
::
vector
<
p
addle
::
framework
::
Vector
<
size_t
>>
;
using
LoD
=
std
::
vector
<
p
hi
::
Vector
<
size_t
>>
;
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/imperative/all_reduce.cc
浏览文件 @
35d7d1f0
...
@@ -104,10 +104,10 @@ static void AllReduce(const phi::SelectedRows &src,
...
@@ -104,10 +104,10 @@ static void AllReduce(const phi::SelectedRows &src,
// 1. Gather rows number from all workers. Here use ncclAllGather to do this,
// 1. Gather rows number from all workers. Here use ncclAllGather to do this,
// but we can use other ways to implement is in the future
// but we can use other ways to implement is in the future
const
auto
&
src_rows
=
src
.
rows
();
const
auto
&
src_rows
=
src
.
rows
();
framework
::
Vector
<
int64_t
>
rows_num_vector
(
strategy
.
nranks_
);
phi
::
Vector
<
int64_t
>
rows_num_vector
(
strategy
.
nranks_
);
rows_num_vector
[
strategy
.
local_rank_
]
=
static_cast
<
int64_t
>
(
src_rows
.
size
());
rows_num_vector
[
strategy
.
local_rank_
]
=
static_cast
<
int64_t
>
(
src_rows
.
size
());
// CUDAMutableData use CalStream
// CUDAMutableData use CalStream
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_rows_num_vector
(
&
rows_num_vector
);
p
hi
::
MixVector
<
int64_t
>
mixv_rows_num_vector
(
&
rows_num_vector
);
auto
*
gpu_rows_num_ptr
=
mixv_rows_num_vector
.
CUDAMutableData
(
place
);
auto
*
gpu_rows_num_ptr
=
mixv_rows_num_vector
.
CUDAMutableData
(
place
);
VLOG
(
4
)
<<
"start dev_ctx->wait"
;
VLOG
(
4
)
<<
"start dev_ctx->wait"
;
if
(
!
use_calc_stream
)
{
if
(
!
use_calc_stream
)
{
...
@@ -138,9 +138,9 @@ static void AllReduce(const phi::SelectedRows &src,
...
@@ -138,9 +138,9 @@ static void AllReduce(const phi::SelectedRows &src,
auto
*
dst_rows
=
dst
->
mutable_rows
();
auto
*
dst_rows
=
dst
->
mutable_rows
();
dst_rows
->
resize
(
rows_num
);
dst_rows
->
resize
(
rows_num
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_dst_rows
(
dst_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_dst_rows
(
dst_rows
);
auto
*
dst_rows_ptr
=
mixv_dst_rows
.
CUDAMutableData
(
place
);
auto
*
dst_rows_ptr
=
mixv_dst_rows
.
CUDAMutableData
(
place
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_src_rows
(
&
src_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_src_rows
(
&
src_rows
);
const
auto
*
src_rows_ptr
=
mixv_src_rows
.
CUDAData
(
place
);
const
auto
*
src_rows_ptr
=
mixv_src_rows
.
CUDAData
(
place
);
auto
*
dst_tensor
=
dst
->
mutable_value
();
auto
*
dst_tensor
=
dst
->
mutable_value
();
...
...
paddle/fluid/imperative/gloo_context.cc
浏览文件 @
35d7d1f0
...
@@ -158,9 +158,9 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src,
...
@@ -158,9 +158,9 @@ void GLOOParallelContext::AllReduce(const phi::SelectedRows &src,
<<
", height: "
<<
src
.
height
();
<<
", height: "
<<
src
.
height
();
auto
*
dst_rows
=
dst
->
mutable_rows
();
auto
*
dst_rows
=
dst
->
mutable_rows
();
dst_rows
->
resize
(
rows_num
);
dst_rows
->
resize
(
rows_num
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_dst_rows
(
dst_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_dst_rows
(
dst_rows
);
auto
*
dst_rows_ptr
=
mixv_dst_rows
.
MutableData
(
place
);
auto
*
dst_rows_ptr
=
mixv_dst_rows
.
MutableData
(
place
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_src_rows
(
&
src_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_src_rows
(
&
src_rows
);
const
int64_t
*
src_rows_ptr
=
mixv_src_rows
.
Data
(
place
);
const
int64_t
*
src_rows_ptr
=
mixv_src_rows
.
Data
(
place
);
auto
*
dst_tensor
=
dst
->
mutable_value
();
auto
*
dst_tensor
=
dst
->
mutable_value
();
...
...
paddle/fluid/operators/assign_op_test.cc
浏览文件 @
35d7d1f0
...
@@ -98,7 +98,7 @@ TEST(AssignOp, AssignSelectedRows) {
...
@@ -98,7 +98,7 @@ TEST(AssignOp, AssignSelectedRows) {
assign_functor
(
input
);
assign_functor
(
input
);
auto
&
out_selected_row
=
output
.
Get
<
phi
::
SelectedRows
>
();
auto
&
out_selected_row
=
output
.
Get
<
phi
::
SelectedRows
>
();
const
p
addle
::
framework
::
Vector
<
int64_t
>&
out_rows
=
out_selected_row
.
rows
();
const
p
hi
::
Vector
<
int64_t
>&
out_rows
=
out_selected_row
.
rows
();
EXPECT_EQ
(
rows
.
size
(),
out_rows
.
size
());
EXPECT_EQ
(
rows
.
size
(),
out_rows
.
size
());
for
(
size_t
i
=
0
;
i
<
rows
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
rows
.
size
();
++
i
)
{
EXPECT_EQ
(
rows
[
i
],
out_rows
[
i
]);
EXPECT_EQ
(
rows
[
i
],
out_rows
[
i
]);
...
...
paddle/fluid/operators/ctc_align_op.cu
浏览文件 @
35d7d1f0
...
@@ -129,7 +129,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -129,7 +129,7 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
// merge elements and delete blank
// merge elements and delete blank
T
*
output_data
=
output
->
mutable_data
<
T
>
({
num_tokens
,
1
},
ctx
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
({
num_tokens
,
1
},
ctx
.
GetPlace
());
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_input_lod
(
&
input_lod
[
level
]);
p
hi
::
MixVector
<
size_t
>
mixv_input_lod
(
&
input_lod
[
level
]);
MergeAndDelCudaKernel
<
T
>
MergeAndDelCudaKernel
<
T
>
<<<
1
,
1
,
0
,
stream
>>>
(
num_tokens
,
<<<
1
,
1
,
0
,
stream
>>>
(
num_tokens
,
tokens
,
tokens
,
...
...
paddle/fluid/operators/cvm_op.cu
浏览文件 @
35d7d1f0
...
@@ -166,7 +166,7 @@ class CVMGradCUDAKernel : public framework::OpKernel<T> {
...
@@ -166,7 +166,7 @@ class CVMGradCUDAKernel : public framework::OpKernel<T> {
lod
[
lod
.
size
()
-
1
],
lod
[
lod
.
size
()
-
1
],
platform
::
errors
::
PreconditionNotMet
(
platform
::
errors
::
PreconditionNotMet
(
"Output(X@GRAD)'s dim[0] must be equal to last element of lod"
));
"Output(X@GRAD)'s dim[0] must be equal to last element of lod"
));
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_lod
(
&
lod
);
p
hi
::
MixVector
<
size_t
>
mixv_lod
(
&
lod
);
CvmGradComputeKernel
<<<
(
dx_numel
+
PADDLE_CUDA_NUM_THREADS
-
1
)
/
CvmGradComputeKernel
<<<
(
dx_numel
+
PADDLE_CUDA_NUM_THREADS
-
1
)
/
PADDLE_CUDA_NUM_THREADS
,
PADDLE_CUDA_NUM_THREADS
,
PADDLE_CUDA_NUM_THREADS
,
PADDLE_CUDA_NUM_THREADS
,
...
...
paddle/fluid/operators/detection/box_clip_op.cu
浏览文件 @
35d7d1f0
...
@@ -59,7 +59,7 @@ class GPUBoxClipKernel : public framework::OpKernel<T> {
...
@@ -59,7 +59,7 @@ class GPUBoxClipKernel : public framework::OpKernel<T> {
auto
stream
=
dev_ctx
.
stream
();
auto
stream
=
dev_ctx
.
stream
();
const
size_t
batch_size
=
lod
.
back
().
size
()
-
1
;
const
size_t
batch_size
=
lod
.
back
().
size
()
-
1
;
T
*
output_data
=
output
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
T
*
output_data
=
output
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
p
addle
::
framework
::
MixVector
<
size_t
>
mix_vector
(
&
abs_offset_lod
[
0
]);
p
hi
::
MixVector
<
size_t
>
mix_vector
(
&
abs_offset_lod
[
0
]);
GPUBoxClip
<
T
,
512
><<<
batch_size
,
512
,
0
,
stream
>>>
(
GPUBoxClip
<
T
,
512
><<<
batch_size
,
512
,
0
,
stream
>>>
(
input
->
data
<
T
>
(),
input
->
data
<
T
>
(),
mix_vector
.
CUDAMutableData
(
dev_ctx
.
GetPlace
()),
mix_vector
.
CUDAMutableData
(
dev_ctx
.
GetPlace
()),
...
...
paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
浏览文件 @
35d7d1f0
...
@@ -19,7 +19,6 @@ namespace cub = hipcub;
...
@@ -19,7 +19,6 @@ namespace cub = hipcub;
#include <paddle/fluid/memory/allocation/allocator.h>
#include <paddle/fluid/memory/allocation/allocator.h>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
#include "paddle/fluid/operators/detection/bbox_util.h"
...
@@ -28,6 +27,7 @@ namespace cub = hipcub;
...
@@ -28,6 +27,7 @@ namespace cub = hipcub;
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
namespace
paddle
{
namespace
paddle
{
...
...
paddle/fluid/operators/detection/generate_proposals_op.cu
浏览文件 @
35d7d1f0
...
@@ -18,10 +18,10 @@ limitations under the License. */
...
@@ -18,10 +18,10 @@ limitations under the License. */
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/operators/detection/bbox_util.cu.h"
#include "paddle/fluid/operators/detection/bbox_util.cu.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/gather.cu.h"
#include "paddle/phi/kernels/funcs/math_function.h"
#include "paddle/phi/kernels/funcs/math_function.h"
...
...
paddle/fluid/operators/detection/target_assign_op.h
浏览文件 @
35d7d1f0
...
@@ -121,7 +121,7 @@ class TargetAssignKernel : public framework::OpKernel<T> {
...
@@ -121,7 +121,7 @@ class TargetAssignKernel : public framework::OpKernel<T> {
auto
x_lod
=
x
->
lod
().
back
();
auto
x_lod
=
x
->
lod
().
back
();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_x_lod
(
&
x_lod
);
p
hi
::
MixVector
<
size_t
>
mixv_x_lod
(
&
x_lod
);
size_t
*
x_lod_data
=
mixv_x_lod
.
MutableData
(
ctx
.
GetPlace
());
size_t
*
x_lod_data
=
mixv_x_lod
.
MutableData
(
ctx
.
GetPlace
());
#else
#else
size_t
*
x_lod_data
=
x_lod
.
data
();
size_t
*
x_lod_data
=
x_lod
.
data
();
...
@@ -155,7 +155,7 @@ class TargetAssignKernel : public framework::OpKernel<T> {
...
@@ -155,7 +155,7 @@ class TargetAssignKernel : public framework::OpKernel<T> {
const
int
*
neg_idx_data
=
neg_indices
->
data
<
int
>
();
const
int
*
neg_idx_data
=
neg_indices
->
data
<
int
>
();
auto
neg_lod
=
neg_indices
->
lod
().
back
();
auto
neg_lod
=
neg_indices
->
lod
().
back
();
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_neg_lod
(
&
neg_lod
);
p
hi
::
MixVector
<
size_t
>
mixv_neg_lod
(
&
neg_lod
);
size_t
*
neg_lod_data
=
mixv_neg_lod
.
MutableData
(
ctx
.
GetPlace
());
size_t
*
neg_lod_data
=
mixv_neg_lod
.
MutableData
(
ctx
.
GetPlace
());
#else
#else
size_t
*
neg_lod_data
=
neg_lod
.
data
();
size_t
*
neg_lod_data
=
neg_lod
.
data
();
...
...
paddle/fluid/operators/filter_by_instag_op.cu
浏览文件 @
35d7d1f0
...
@@ -30,11 +30,11 @@
...
@@ -30,11 +30,11 @@
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/phi/core/mixed_vector.h"
#if defined(PADDLE_WITH_CUDA)
#if defined(PADDLE_WITH_CUDA)
namespace
cg
=
cooperative_groups
;
namespace
cg
=
cooperative_groups
;
...
@@ -46,7 +46,7 @@ namespace operators {
...
@@ -46,7 +46,7 @@ namespace operators {
using
SelectedRows
=
phi
::
SelectedRows
;
using
SelectedRows
=
phi
::
SelectedRows
;
template
<
typename
T
>
template
<
typename
T
>
using
Vector
=
framework
::
Vector
<
T
>
;
using
Vector
=
phi
::
Vector
<
T
>
;
#define WARP_SIZE 32
#define WARP_SIZE 32
#define MAX_WARP_NUM 32
#define MAX_WARP_NUM 32
...
@@ -376,7 +376,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
...
@@ -376,7 +376,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
}
}
const
size_t
x2_lods_size
=
x2_lods
.
size
()
-
1
;
const
size_t
x2_lods_size
=
x2_lods
.
size
()
-
1
;
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_x2_lods
(
&
x2_lods
);
p
hi
::
MixVector
<
size_t
>
mixv_x2_lods
(
&
x2_lods
);
size_t
*
x2_lods_data
=
mixv_x2_lods
.
CUDAMutableData
(
gpu_place
);
size_t
*
x2_lods_data
=
mixv_x2_lods
.
CUDAMutableData
(
gpu_place
);
...
@@ -401,7 +401,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
...
@@ -401,7 +401,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
}
}
}
}
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_x1_lods
(
&
x1_lods
);
p
hi
::
MixVector
<
size_t
>
mixv_x1_lods
(
&
x1_lods
);
size_t
*
x1_lods_data
=
mixv_x1_lods
.
CUDAMutableData
(
gpu_place
);
size_t
*
x1_lods_data
=
mixv_x1_lods
.
CUDAMutableData
(
gpu_place
);
auto
*
x1_data
=
x1
->
data
<
T
>
();
auto
*
x1_data
=
x1
->
data
<
T
>
();
...
@@ -433,12 +433,12 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
...
@@ -433,12 +433,12 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
Vector
<
size_t
>
out_lods
(
x2_lods_size
+
1
,
0
);
Vector
<
size_t
>
out_lods
(
x2_lods_size
+
1
,
0
);
Vector
<
size_t
>
map_lods
(
x2_lods_size
+
1
,
0
);
Vector
<
size_t
>
map_lods
(
x2_lods_size
+
1
,
0
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_out_lods
(
&
out_lods
);
p
hi
::
MixVector
<
size_t
>
mixv_out_lods
(
&
out_lods
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_map_lods
(
&
map_lods
);
p
hi
::
MixVector
<
size_t
>
mixv_map_lods
(
&
map_lods
);
// thrust::device_vector<size_t> out_idx(1);
// thrust::device_vector<size_t> out_idx(1);
Vector
<
size_t
>
out_idx
(
1
,
0
);
Vector
<
size_t
>
out_idx
(
1
,
0
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_out_idx
(
&
out_idx
);
p
hi
::
MixVector
<
size_t
>
mixv_out_idx
(
&
out_idx
);
size_t
*
out_idx_data
=
mixv_out_idx
.
CUDAMutableData
(
gpu_place
);
size_t
*
out_idx_data
=
mixv_out_idx
.
CUDAMutableData
(
gpu_place
);
size_t
*
out_lods_data
=
mixv_out_lods
.
CUDAMutableData
(
gpu_place
);
size_t
*
out_lods_data
=
mixv_out_lods
.
CUDAMutableData
(
gpu_place
);
...
@@ -500,7 +500,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
...
@@ -500,7 +500,7 @@ class FilterByInstagGPUKernel : public framework::OpKernel<T> {
}
else
{
}
else
{
Vector
<
size_t
>
map_lods
(
2
,
0
);
Vector
<
size_t
>
map_lods
(
2
,
0
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_map_lods
(
&
map_lods
);
p
hi
::
MixVector
<
size_t
>
mixv_map_lods
(
&
map_lods
);
thrust
::
device_ptr
<
int64_t
>
map_data_ptr
(
map_data
);
thrust
::
device_ptr
<
int64_t
>
map_data_ptr
(
map_data
);
map_data_ptr
[
0
]
=
0
;
map_data_ptr
[
0
]
=
0
;
...
...
paddle/fluid/operators/filter_by_instag_op.h
浏览文件 @
35d7d1f0
...
@@ -23,16 +23,16 @@
...
@@ -23,16 +23,16 @@
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/core/mixed_vector.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
SelectedRows
=
phi
::
SelectedRows
;
using
SelectedRows
=
phi
::
SelectedRows
;
template
<
typename
T
>
template
<
typename
T
>
using
Vector
=
framework
::
Vector
<
T
>
;
using
Vector
=
phi
::
Vector
<
T
>
;
template
<
typename
T
>
template
<
typename
T
>
class
FilterByInstagKernel
:
public
framework
::
OpKernel
<
T
>
{
class
FilterByInstagKernel
:
public
framework
::
OpKernel
<
T
>
{
...
...
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
浏览文件 @
35d7d1f0
...
@@ -256,7 +256,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
...
@@ -256,7 +256,7 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
auto
lod
=
ids
->
lod
()[
0
];
auto
lod
=
ids
->
lod
()[
0
];
int64_t
out_width
=
d_output
->
dims
()[
1
];
int64_t
out_width
=
d_output
->
dims
()[
1
];
framework
::
Vector
<
int64_t
>
*
new_rows
=
d_table
->
mutable_rows
();
phi
::
Vector
<
int64_t
>
*
new_rows
=
d_table
->
mutable_rows
();
new_rows
->
resize
(
ids_num
);
new_rows
->
resize
(
ids_num
);
std
::
memcpy
(
&
(
*
new_rows
)[
0
],
ids_data
,
ids_num
*
sizeof
(
int64_t
));
std
::
memcpy
(
&
(
*
new_rows
)[
0
],
ids_data
,
ids_num
*
sizeof
(
int64_t
));
...
...
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
浏览文件 @
35d7d1f0
...
@@ -14,16 +14,16 @@
...
@@ -14,16 +14,16 @@
#include <string>
#include <string>
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/operators/fused/fused_seqpool_cvm_op.h"
#include "paddle/fluid/operators/fused/fused_seqpool_cvm_op.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/fluid/platform/device/gpu/gpu_launch_config.h"
#include "paddle/phi/core/mixed_vector.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
>
using
Vector
=
framework
::
Vector
<
T
>
;
using
Vector
=
phi
::
Vector
<
T
>
;
#define CUDA_KERNEL_LOOP(i, n) \
#define CUDA_KERNEL_LOOP(i, n) \
for (auto i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
for (auto i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
...
@@ -441,7 +441,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
...
@@ -441,7 +441,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
int
embedding_size
=
inputs
[
0
]
->
numel
()
/
inputs
[
0
]
->
dims
()[
0
];
int
embedding_size
=
inputs
[
0
]
->
numel
()
/
inputs
[
0
]
->
dims
()[
0
];
int
batch_size
=
-
1
;
int
batch_size
=
-
1
;
std
::
vector
<
p
addle
::
framework
::
MixVector
<
size_t
>
*>
mix_lods_v
(
slot_size
);
std
::
vector
<
p
hi
::
MixVector
<
size_t
>
*>
mix_lods_v
(
slot_size
);
for
(
size_t
i
=
0
;
i
<
slot_size
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
slot_size
;
++
i
)
{
const
auto
*
input
=
inputs
[
i
];
const
auto
*
input
=
inputs
[
i
];
...
@@ -480,7 +480,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
...
@@ -480,7 +480,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
}
}
output_data
[
i
]
=
reinterpret_cast
<
T
*>
(
output_data
[
i
]
=
reinterpret_cast
<
T
*>
(
dev_ctx
.
Alloc
<
T
>
(
output
,
output
->
numel
()
*
sizeof
(
T
)));
dev_ctx
.
Alloc
<
T
>
(
output
,
output
->
numel
()
*
sizeof
(
T
)));
mix_lods_v
[
i
]
=
new
p
addle
::
framework
::
MixVector
<
size_t
>
(
&
lods
);
mix_lods_v
[
i
]
=
new
p
hi
::
MixVector
<
size_t
>
(
&
lods
);
lods_data
[
i
]
=
mix_lods_v
[
i
]
->
CUDAData
(
ctx
.
GetPlace
());
lods_data
[
i
]
=
mix_lods_v
[
i
]
->
CUDAData
(
ctx
.
GetPlace
());
seqpool_outputs
[
i
].
Resize
({
batch_size
,
embedding_size
});
seqpool_outputs
[
i
].
Resize
({
batch_size
,
embedding_size
});
seqpool_output_data
[
i
]
=
reinterpret_cast
<
T
*>
(
dev_ctx
.
Alloc
<
T
>
(
seqpool_output_data
[
i
]
=
reinterpret_cast
<
T
*>
(
dev_ctx
.
Alloc
<
T
>
(
...
@@ -527,7 +527,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
...
@@ -527,7 +527,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
int
embedding_size
=
in_grads
[
0
]
->
numel
()
/
in_grads
[
0
]
->
dims
()[
0
];
int
embedding_size
=
in_grads
[
0
]
->
numel
()
/
in_grads
[
0
]
->
dims
()[
0
];
int
batch_size
=
-
1
;
int
batch_size
=
-
1
;
std
::
vector
<
p
addle
::
framework
::
MixVector
<
size_t
>
*>
mix_lods_v
(
slot_size
);
std
::
vector
<
p
hi
::
MixVector
<
size_t
>
*>
mix_lods_v
(
slot_size
);
for
(
size_t
i
=
0
;
i
<
slot_size
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
slot_size
;
++
i
)
{
auto
*
in_grad
=
in_grads
[
i
];
auto
*
in_grad
=
in_grads
[
i
];
...
@@ -563,7 +563,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
...
@@ -563,7 +563,7 @@ class FusedSeqpoolCVMGradCUDAKernel : public framework::OpKernel<T> {
in_grads_data
[
i
]
=
reinterpret_cast
<
T
*>
(
in_grads_data
[
i
]
=
reinterpret_cast
<
T
*>
(
dev_ctx
.
Alloc
<
T
>
(
in_grad
,
in_grad
->
numel
()
*
sizeof
(
T
)));
dev_ctx
.
Alloc
<
T
>
(
in_grad
,
in_grad
->
numel
()
*
sizeof
(
T
)));
mix_lods_v
[
i
]
=
new
p
addle
::
framework
::
MixVector
<
size_t
>
(
&
lods
);
mix_lods_v
[
i
]
=
new
p
hi
::
MixVector
<
size_t
>
(
&
lods
);
lods_data
[
i
]
=
mix_lods_v
[
i
]
->
CUDAData
(
ctx
.
GetPlace
());
lods_data
[
i
]
=
mix_lods_v
[
i
]
->
CUDAData
(
ctx
.
GetPlace
());
cvm_data
[
i
]
=
reinterpret_cast
<
const
T
*>
(
cvm
->
data
<
T
>
());
cvm_data
[
i
]
=
reinterpret_cast
<
const
T
*>
(
cvm
->
data
<
T
>
());
}
}
...
...
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
浏览文件 @
35d7d1f0
...
@@ -17,11 +17,11 @@ limitations under the License. */
...
@@ -17,11 +17,11 @@ limitations under the License. */
#include <memory>
#include <memory>
#include "dnnl.hpp" // NOLINT
#include "dnnl.hpp" // NOLINT
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/fused/multi_gru_op.h"
#include "paddle/fluid/operators/fused/multi_gru_op.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/backends/onednn/onednn_reuse.h"
#include "paddle/phi/core/mixed_vector.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -678,7 +678,7 @@ class MultiGRUHandler {
...
@@ -678,7 +678,7 @@ class MultiGRUHandler {
const
std
::
vector
<
const
phi
::
DenseTensor
*>
biases_
;
const
std
::
vector
<
const
phi
::
DenseTensor
*>
biases_
;
phi
::
DenseTensor
*
hidden_
;
phi
::
DenseTensor
*
hidden_
;
std
::
vector
<
dnnl
::
primitive_attr
>
attrs_
;
std
::
vector
<
dnnl
::
primitive_attr
>
attrs_
;
const
p
addle
::
framework
::
Vector
<
size_t
>&
x_lod_
;
const
p
hi
::
Vector
<
size_t
>&
x_lod_
;
};
};
template
<
typename
T
>
template
<
typename
T
>
...
...
paddle/fluid/operators/gru_op.cc
浏览文件 @
35d7d1f0
...
@@ -372,7 +372,7 @@ class GRUCPUKernel : public framework::OpKernel<T> {
...
@@ -372,7 +372,7 @@ class GRUCPUKernel : public framework::OpKernel<T> {
const_cast
<
T
*>
(
weight_data
+
2
*
frame_size
*
frame_size
);
const_cast
<
T
*>
(
weight_data
+
2
*
frame_size
*
frame_size
);
phi
::
DenseTensor
ordered_h0
;
phi
::
DenseTensor
ordered_h0
;
framework
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
phi
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
if
(
h0
)
{
if
(
h0
)
{
// Since the batch computing for GRU reorders the input sequences
// Since the batch computing for GRU reorders the input sequences
...
...
paddle/fluid/operators/gru_op.cu.cc
浏览文件 @
35d7d1f0
...
@@ -75,7 +75,7 @@ class GRUKernel : public framework::OpKernel<T> {
...
@@ -75,7 +75,7 @@ class GRUKernel : public framework::OpKernel<T> {
const_cast
<
T
*>
(
weight_data
+
2
*
frame_size
*
frame_size
);
const_cast
<
T
*>
(
weight_data
+
2
*
frame_size
*
frame_size
);
phi
::
DenseTensor
ordered_h0
;
phi
::
DenseTensor
ordered_h0
;
framework
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
phi
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
if
(
h0
)
{
if
(
h0
)
{
// Since the batch computing for GRU reorders the input sequences
// Since the batch computing for GRU reorders the input sequences
...
...
paddle/fluid/operators/gru_op.h
浏览文件 @
35d7d1f0
...
@@ -28,7 +28,7 @@ namespace operators {
...
@@ -28,7 +28,7 @@ namespace operators {
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
inline
void
ReorderInitState
(
const
DeviceContext
&
ctx
,
inline
void
ReorderInitState
(
const
DeviceContext
&
ctx
,
const
phi
::
DenseTensor
&
src
,
const
phi
::
DenseTensor
&
src
,
framework
::
Vector
<
size_t
>
index_lod
,
phi
::
Vector
<
size_t
>
index_lod
,
phi
::
DenseTensor
*
dst
,
phi
::
DenseTensor
*
dst
,
bool
indexed_src
)
{
bool
indexed_src
)
{
phi
::
funcs
::
CopyMatrixRowsFunctor
<
DeviceContext
,
T
>
row_shuffle
;
phi
::
funcs
::
CopyMatrixRowsFunctor
<
DeviceContext
,
T
>
row_shuffle
;
...
@@ -79,7 +79,7 @@ class GRUGradKernel : public framework::OpKernel<T> {
...
@@ -79,7 +79,7 @@ class GRUGradKernel : public framework::OpKernel<T> {
phi
::
DenseTensor
ordered_h0
,
ordered_h0_grad
;
phi
::
DenseTensor
ordered_h0
,
ordered_h0_grad
;
framework
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
phi
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
if
(
h0
)
{
if
(
h0
)
{
ReorderInitState
<
DeviceContext
,
T
>
(
ReorderInitState
<
DeviceContext
,
T
>
(
...
...
paddle/fluid/operators/lookup_table_op.cu
浏览文件 @
35d7d1f0
...
@@ -169,12 +169,12 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
...
@@ -169,12 +169,12 @@ class LookupTableGradCUDAKernel : public framework::OpKernel<T> {
auto
stream
=
dev_ctx
.
stream
();
auto
stream
=
dev_ctx
.
stream
();
// copy GPU memory to CPU pinned memory
// copy GPU memory to CPU pinned memory
framework
::
Vector
<
int64_t
>
new_rows
;
phi
::
Vector
<
int64_t
>
new_rows
;
new_rows
.
resize
(
ids_num
);
new_rows
.
resize
(
ids_num
);
auto
gpu_place
=
context
.
GetPlace
();
auto
gpu_place
=
context
.
GetPlace
();
// TODO(yuyang18): Strange code here.
// TODO(yuyang18): Strange code here.
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_new_rows
(
&
new_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_new_rows
(
&
new_rows
);
memory
::
Copy
(
gpu_place
,
memory
::
Copy
(
gpu_place
,
mixv_new_rows
.
CUDAMutableData
(
context
.
GetPlace
()),
mixv_new_rows
.
CUDAMutableData
(
context
.
GetPlace
()),
gpu_place
,
gpu_place
,
...
...
paddle/fluid/operators/lookup_table_v2_op.cu
浏览文件 @
35d7d1f0
...
@@ -159,11 +159,11 @@ struct LookupTableV2GradCUDAFunctor {
...
@@ -159,11 +159,11 @@ struct LookupTableV2GradCUDAFunctor {
dim3
threads
(
128
,
8
);
dim3
threads
(
128
,
8
);
dim3
grids
(
8
,
1
);
dim3
grids
(
8
,
1
);
auto
stream
=
dev_ctx
.
stream
();
auto
stream
=
dev_ctx
.
stream
();
framework
::
Vector
<
int64_t
>
new_rows
;
phi
::
Vector
<
int64_t
>
new_rows
;
new_rows
.
resize
(
ids_num
);
new_rows
.
resize
(
ids_num
);
auto
gpu_place
=
context_
.
GetPlace
();
auto
gpu_place
=
context_
.
GetPlace
();
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_new_rows
(
&
new_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_new_rows
(
&
new_rows
);
if
(
!
std
::
is_same
<
IdT
,
int64_t
>::
value
)
{
if
(
!
std
::
is_same
<
IdT
,
int64_t
>::
value
)
{
InputTypeConvert
<<<
grids
,
threads
,
0
,
stream
>>>
(
InputTypeConvert
<<<
grids
,
threads
,
0
,
stream
>>>
(
ids_data
,
ids_num
,
mixv_new_rows
.
MutableData
(
gpu_place
));
ids_data
,
ids_num
,
mixv_new_rows
.
MutableData
(
gpu_place
));
...
...
paddle/fluid/operators/lstm_op.h
浏览文件 @
35d7d1f0
...
@@ -27,7 +27,7 @@ namespace operators {
...
@@ -27,7 +27,7 @@ namespace operators {
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
inline
void
ReorderInitState
(
const
DeviceContext
&
ctx
,
inline
void
ReorderInitState
(
const
DeviceContext
&
ctx
,
const
phi
::
DenseTensor
&
src
,
const
phi
::
DenseTensor
&
src
,
framework
::
Vector
<
size_t
>
index_lod
,
phi
::
Vector
<
size_t
>
index_lod
,
phi
::
DenseTensor
*
dst
,
phi
::
DenseTensor
*
dst
,
bool
indexed_src
)
{
bool
indexed_src
)
{
phi
::
funcs
::
CopyMatrixRowsFunctor
<
DeviceContext
,
T
>
row_shuffle
;
phi
::
funcs
::
CopyMatrixRowsFunctor
<
DeviceContext
,
T
>
row_shuffle
;
...
@@ -95,7 +95,7 @@ class LSTMKernel : public framework::OpKernel<T> {
...
@@ -95,7 +95,7 @@ class LSTMKernel : public framework::OpKernel<T> {
lstm_value
.
prev_state_value
=
nullptr
;
lstm_value
.
prev_state_value
=
nullptr
;
phi
::
DenseTensor
ordered_c0
;
phi
::
DenseTensor
ordered_c0
;
framework
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
phi
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
if
(
cell_t0
)
{
if
(
cell_t0
)
{
// Since the batch computing for LSTM reorders the input sequence
// Since the batch computing for LSTM reorders the input sequence
...
@@ -236,7 +236,7 @@ class LSTMGradKernel : public framework::OpKernel<T> {
...
@@ -236,7 +236,7 @@ class LSTMGradKernel : public framework::OpKernel<T> {
// ordered_h0_g/c0_g is the reordered gradient of hidden/cell
// ordered_h0_g/c0_g is the reordered gradient of hidden/cell
// initialization.
// initialization.
phi
::
DenseTensor
ordered_h0
,
ordered_c0
,
ordered_h0_g
,
ordered_c0_g
;
phi
::
DenseTensor
ordered_h0
,
ordered_c0
,
ordered_h0_g
,
ordered_c0_g
;
framework
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
phi
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
if
(
c0
)
{
if
(
c0
)
{
ReorderInitState
<
DeviceContext
,
T
>
(
ReorderInitState
<
DeviceContext
,
T
>
(
...
...
paddle/fluid/operators/lstmp_op.h
浏览文件 @
35d7d1f0
...
@@ -70,7 +70,7 @@ class _ClipGradFunctor {
...
@@ -70,7 +70,7 @@ class _ClipGradFunctor {
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
inline
void
ReorderInitState
(
const
DeviceContext
&
ctx
,
inline
void
ReorderInitState
(
const
DeviceContext
&
ctx
,
const
phi
::
DenseTensor
&
src
,
const
phi
::
DenseTensor
&
src
,
framework
::
Vector
<
size_t
>
index
,
phi
::
Vector
<
size_t
>
index
,
phi
::
DenseTensor
*
dst
,
phi
::
DenseTensor
*
dst
,
bool
indexed_src
)
{
bool
indexed_src
)
{
phi
::
funcs
::
CopyMatrixRowsFunctor
<
DeviceContext
,
T
>
row_shuffle
;
phi
::
funcs
::
CopyMatrixRowsFunctor
<
DeviceContext
,
T
>
row_shuffle
;
...
@@ -158,7 +158,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
...
@@ -158,7 +158,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
phi
::
DenseTensor
ordered_c0
;
phi
::
DenseTensor
ordered_c0
;
phi
::
DenseTensor
ordered_h0
;
phi
::
DenseTensor
ordered_h0
;
framework
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
phi
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
if
(
cell_t0
)
{
if
(
cell_t0
)
{
// Since the batch computing for LSTMP reorders the input sequence
// Since the batch computing for LSTMP reorders the input sequence
...
@@ -350,7 +350,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
...
@@ -350,7 +350,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
// initialization.
// initialization.
phi
::
DenseTensor
ordered_h0
,
ordered_c0
,
ordered_h0_g
,
ordered_c0_g
;
phi
::
DenseTensor
ordered_h0
,
ordered_c0
,
ordered_h0_g
,
ordered_c0_g
;
framework
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
phi
::
Vector
<
size_t
>
order
(
batch_gate
->
lod
()[
2
]);
if
(
c0
)
{
if
(
c0
)
{
ReorderInitState
<
DeviceContext
,
T
>
(
ReorderInitState
<
DeviceContext
,
T
>
(
...
...
paddle/fluid/operators/math/beam_search.cu
浏览文件 @
35d7d1f0
...
@@ -446,8 +446,8 @@ class BeamSearchFunctor<phi::GPUContext, T> {
...
@@ -446,8 +446,8 @@ class BeamSearchFunctor<phi::GPUContext, T> {
framework
::
LoD
selected_lod
(
2
);
framework
::
LoD
selected_lod
(
2
);
selected_lod
[
0
].
assign
(
abs_lod
[
level
].
begin
(),
abs_lod
[
level
].
end
());
selected_lod
[
0
].
assign
(
abs_lod
[
level
].
begin
(),
abs_lod
[
level
].
end
());
selected_lod
[
1
].
resize
(
scores
->
dims
()[
0
]
+
1
);
selected_lod
[
1
].
resize
(
scores
->
dims
()[
0
]
+
1
);
p
addle
::
framework
::
MixVector
<
size_t
>
mix_vector
(
&
selected_lod
[
1
]);
p
hi
::
MixVector
<
size_t
>
mix_vector
(
&
selected_lod
[
1
]);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_abs
(
&
abs_lod
[
level
]);
p
hi
::
MixVector
<
size_t
>
mixv_abs
(
&
abs_lod
[
level
]);
size_t
*
selected_offsets
=
mix_vector
.
CUDAMutableData
(
context
.
GetPlace
());
size_t
*
selected_offsets
=
mix_vector
.
CUDAMutableData
(
context
.
GetPlace
());
if
(
num_seqs
==
1
)
{
if
(
num_seqs
==
1
)
{
...
...
paddle/fluid/operators/math/sequence_padding.cc
浏览文件 @
35d7d1f0
...
@@ -28,7 +28,7 @@ namespace math {
...
@@ -28,7 +28,7 @@ namespace math {
template
<
typename
T
>
template
<
typename
T
>
void
CopyValidData
(
phi
::
DenseTensor
*
dst_tensor
,
void
CopyValidData
(
phi
::
DenseTensor
*
dst_tensor
,
const
phi
::
DenseTensor
*
src_tensor
,
const
phi
::
DenseTensor
*
src_tensor
,
const
framework
::
Vector
<
size_t
>&
seq_offsets
,
const
phi
::
Vector
<
size_t
>&
seq_offsets
,
int
pad_seq_len
,
int
pad_seq_len
,
int
step_width
,
int
step_width
,
bool
norm_by_len
,
bool
norm_by_len
,
...
...
paddle/fluid/operators/math/sequence_padding.cu
浏览文件 @
35d7d1f0
...
@@ -124,7 +124,7 @@ class PaddingLoDTensorFunctor<phi::GPUContext, T> {
...
@@ -124,7 +124,7 @@ class PaddingLoDTensorFunctor<phi::GPUContext, T> {
T
*
pad_data
=
pad_tensor
->
data
<
T
>
();
T
*
pad_data
=
pad_tensor
->
data
<
T
>
();
const
T
*
pad_value_data
=
pad_value
.
data
<
T
>
();
const
T
*
pad_value_data
=
pad_value
.
data
<
T
>
();
p
addle
::
framework
::
MixVector
<
size_t
>
mix_vector_seq_offsets
(
&
seq_offsets
);
p
hi
::
MixVector
<
size_t
>
mix_vector_seq_offsets
(
&
seq_offsets
);
SequencePaddingKernel
<
T
,
kSeqToPad
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
SequencePaddingKernel
<
T
,
kSeqToPad
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
pad_data
,
pad_data
,
seq_data
,
seq_data
,
...
@@ -191,7 +191,7 @@ class UnpaddingLoDTensorFunctor<phi::GPUContext, T> {
...
@@ -191,7 +191,7 @@ class UnpaddingLoDTensorFunctor<phi::GPUContext, T> {
const
T
*
pad_data
=
pad_tensor
.
data
<
T
>
();
const
T
*
pad_data
=
pad_tensor
.
data
<
T
>
();
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
T
*
seq_data
=
seq_tensor
->
data
<
T
>
();
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_seq_offsets
(
&
seq_offsets
);
p
hi
::
MixVector
<
size_t
>
mixv_seq_offsets
(
&
seq_offsets
);
SequencePaddingKernel
<
T
,
kPadToSeq
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
SequencePaddingKernel
<
T
,
kPadToSeq
><<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
seq_data
,
seq_data
,
pad_data
,
pad_data
,
...
...
paddle/fluid/operators/math/sequence_padding.h
浏览文件 @
35d7d1f0
...
@@ -29,7 +29,7 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth };
...
@@ -29,7 +29,7 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth };
enum
CopyType
{
kSeqToPad
,
kPadToSeq
};
enum
CopyType
{
kSeqToPad
,
kPadToSeq
};
inline
static
size_t
MaximumSequenceLength
(
inline
static
size_t
MaximumSequenceLength
(
const
framework
::
Vector
<
size_t
>&
seq_offset
)
{
const
phi
::
Vector
<
size_t
>&
seq_offset
)
{
size_t
seq_num
=
seq_offset
.
size
()
-
1
;
size_t
seq_num
=
seq_offset
.
size
()
-
1
;
size_t
max_seq_len
=
0
;
size_t
max_seq_len
=
0
;
for
(
size_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
...
@@ -39,7 +39,7 @@ inline static size_t MaximumSequenceLength(
...
@@ -39,7 +39,7 @@ inline static size_t MaximumSequenceLength(
}
}
inline
static
size_t
TotalSequenceLength
(
inline
static
size_t
TotalSequenceLength
(
const
framework
::
Vector
<
size_t
>&
seq_offset
)
{
const
phi
::
Vector
<
size_t
>&
seq_offset
)
{
size_t
seq_num
=
seq_offset
.
size
()
-
1
;
size_t
seq_num
=
seq_offset
.
size
()
-
1
;
size_t
total_seq_len
=
0
;
size_t
total_seq_len
=
0
;
for
(
size_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
seq_num
;
++
i
)
{
...
@@ -50,7 +50,7 @@ inline static size_t TotalSequenceLength(
...
@@ -50,7 +50,7 @@ inline static size_t TotalSequenceLength(
inline
static
void
CheckDims
(
const
framework
::
DDim
&
seq_tensor_dims
,
inline
static
void
CheckDims
(
const
framework
::
DDim
&
seq_tensor_dims
,
const
framework
::
DDim
&
pad_tensor_dims
,
const
framework
::
DDim
&
pad_tensor_dims
,
const
framework
::
Vector
<
size_t
>&
seq_offset
,
const
phi
::
Vector
<
size_t
>&
seq_offset
,
int64_t
padded_seq_len
,
int64_t
padded_seq_len
,
int64_t
step_width
,
int64_t
step_width
,
const
PadLayout
&
layout
)
{
const
PadLayout
&
layout
)
{
...
...
paddle/fluid/operators/math/sequence_pooling.cu
浏览文件 @
35d7d1f0
...
@@ -203,7 +203,7 @@ class SequencePoolFunctor<phi::GPUContext, T> {
...
@@ -203,7 +203,7 @@ class SequencePoolFunctor<phi::GPUContext, T> {
const
size_t
item_dim
=
output
->
numel
()
/
output
->
dims
()[
0
];
const
size_t
item_dim
=
output
->
numel
()
/
output
->
dims
()[
0
];
dim3
threads
(
1024
,
1
);
dim3
threads
(
1024
,
1
);
dim3
grid
(
std
::
max
(
static_cast
<
int
>
(
lod
.
size
())
-
1
,
1
),
1
);
dim3
grid
(
std
::
max
(
static_cast
<
int
>
(
lod
.
size
())
-
1
,
1
),
1
);
p
addle
::
framework
::
MixVector
<
size_t
>
mix_vector
(
&
lod
);
p
hi
::
MixVector
<
size_t
>
mix_vector
(
&
lod
);
if
(
pooltype
==
"MAX"
)
{
if
(
pooltype
==
"MAX"
)
{
sequence_pool_kernel
<
T
,
MaxPoolFunctor
<
T
>>
sequence_pool_kernel
<
T
,
MaxPoolFunctor
<
T
>>
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
...
@@ -421,7 +421,7 @@ class SequencePoolGradFunctor<phi::GPUContext, T> {
...
@@ -421,7 +421,7 @@ class SequencePoolGradFunctor<phi::GPUContext, T> {
const
size_t
item_dim
=
in_grad
->
numel
()
/
in_grad
->
dims
()[
0
];
const
size_t
item_dim
=
in_grad
->
numel
()
/
in_grad
->
dims
()[
0
];
dim3
threads
(
1024
,
1
);
dim3
threads
(
1024
,
1
);
dim3
grid
(
std
::
max
(
static_cast
<
int
>
(
lod
.
size
())
-
1
,
1
),
1
);
dim3
grid
(
std
::
max
(
static_cast
<
int
>
(
lod
.
size
())
-
1
,
1
),
1
);
p
addle
::
framework
::
MixVector
<
size_t
>
mix_vector
(
&
lod
);
p
hi
::
MixVector
<
size_t
>
mix_vector
(
&
lod
);
if
(
pooltype
==
"MAX"
)
{
if
(
pooltype
==
"MAX"
)
{
sequence_pool_grad_kernel
<
T
,
MaxPoolGradFunctor
<
T
>>
sequence_pool_grad_kernel
<
T
,
MaxPoolGradFunctor
<
T
>>
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
...
...
paddle/fluid/operators/optimizers/ftrl_op.h
浏览文件 @
35d7d1f0
...
@@ -197,7 +197,7 @@ class FTRLOpKernel : public framework::OpKernel<T> {
...
@@ -197,7 +197,7 @@ class FTRLOpKernel : public framework::OpKernel<T> {
ctx
.
template
device_context
<
DeviceContext
>(),
*
grad
,
merged_grad
);
ctx
.
template
device_context
<
DeviceContext
>(),
*
grad
,
merged_grad
);
auto
*
merged_rows
=
merged_grad
->
mutable_rows
();
auto
*
merged_rows
=
merged_grad
->
mutable_rows
();
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_merged_rows
(
merged_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_merged_rows
(
merged_rows
);
const
int64_t
*
rows
=
mixv_merged_rows
.
Data
(
ctx
.
GetPlace
());
const
int64_t
*
rows
=
mixv_merged_rows
.
Data
(
ctx
.
GetPlace
());
auto
row_numel
=
static_cast
<
int64_t
>
(
merged_grad
->
value
().
dims
()[
1
]);
auto
row_numel
=
static_cast
<
int64_t
>
(
merged_grad
->
value
().
dims
()[
1
]);
auto
row_height
=
static_cast
<
int64_t
>
(
merged_grad
->
rows
().
size
());
auto
row_height
=
static_cast
<
int64_t
>
(
merged_grad
->
rows
().
size
());
...
...
paddle/fluid/operators/optimizers/sgd_op.cu
浏览文件 @
35d7d1f0
...
@@ -164,7 +164,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
...
@@ -164,7 +164,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
int
thread_x
=
kThreadsPerBlock
;
int
thread_x
=
kThreadsPerBlock
;
int
max_threads
=
ctx
.
cuda_device_context
().
GetMaxPhysicalThreadCount
();
int
max_threads
=
ctx
.
cuda_device_context
().
GetMaxPhysicalThreadCount
();
int
max_blocks
=
std
::
max
(
max_threads
/
kThreadsPerBlock
,
1
);
int
max_blocks
=
std
::
max
(
max_threads
/
kThreadsPerBlock
,
1
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_in_rows
(
&
in_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_in_rows
(
&
in_rows
);
SparseSGDFunctorKernel
<<<
max_blocks
,
SparseSGDFunctorKernel
<<<
max_blocks
,
thread_x
,
thread_x
,
0
,
0
,
...
...
paddle/fluid/operators/row_conv_op.cc
浏览文件 @
35d7d1f0
...
@@ -153,7 +153,7 @@ class RowConvKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
...
@@ -153,7 +153,7 @@ class RowConvKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
}
else
{
}
else
{
batch_size
=
x
->
lod
()[
0
].
size
()
-
1
;
batch_size
=
x
->
lod
()[
0
].
size
()
-
1
;
}
}
framework
::
Vector
<
size_t
>
batch_indices
(
batch_size
+
1
);
phi
::
Vector
<
size_t
>
batch_indices
(
batch_size
+
1
);
int
input_dim
=
0
;
int
input_dim
=
0
;
int
timesteps
=
0
;
int
timesteps
=
0
;
if
(
is_tensor
)
{
if
(
is_tensor
)
{
...
@@ -231,7 +231,7 @@ class RowConvGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
...
@@ -231,7 +231,7 @@ class RowConvGradKernel<phi::CPUContext, T> : public framework::OpKernel<T> {
}
else
{
}
else
{
batch_size
=
x
->
lod
()[
0
].
size
()
-
1
;
batch_size
=
x
->
lod
()[
0
].
size
()
-
1
;
}
}
framework
::
Vector
<
size_t
>
batch_indices
(
batch_size
+
1
);
phi
::
Vector
<
size_t
>
batch_indices
(
batch_size
+
1
);
int
timesteps
=
0
;
int
timesteps
=
0
;
int
input_dim
=
0
;
int
input_dim
=
0
;
if
(
is_tensor
)
{
if
(
is_tensor
)
{
...
...
paddle/fluid/operators/row_conv_op.cu
浏览文件 @
35d7d1f0
...
@@ -338,7 +338,7 @@ class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
...
@@ -338,7 +338,7 @@ class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
batch_size
=
X
->
lod
()[
0
].
size
()
-
1
;
batch_size
=
X
->
lod
()[
0
].
size
()
-
1
;
}
}
int
input_dim
=
0
;
int
input_dim
=
0
;
framework
::
Vector
<
size_t
>
batch_indices
(
batch_size
+
1
);
phi
::
Vector
<
size_t
>
batch_indices
(
batch_size
+
1
);
int
timesteps
=
X
->
dims
()[
1
];
int
timesteps
=
X
->
dims
()[
1
];
if
(
is_tensor
)
{
if
(
is_tensor
)
{
for
(
int
i
=
0
;
i
<
batch_size
+
1
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
+
1
;
i
++
)
{
...
@@ -352,7 +352,7 @@ class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
...
@@ -352,7 +352,7 @@ class RowConvKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
int
num_sequence
=
batch_indices
.
size
()
-
1
;
int
num_sequence
=
batch_indices
.
size
()
-
1
;
int
future_context
=
Filter
->
dims
()[
0
];
int
future_context
=
Filter
->
dims
()[
0
];
p
addle
::
framework
::
MixVector
<
size_t
>
mix_vector
(
&
batch_indices
);
p
hi
::
MixVector
<
size_t
>
mix_vector
(
&
batch_indices
);
size_t
*
idx
=
mix_vector
.
CUDAMutableData
(
context
.
GetPlace
());
size_t
*
idx
=
mix_vector
.
CUDAMutableData
(
context
.
GetPlace
());
auto
stream
=
context
.
cuda_device_context
().
stream
();
auto
stream
=
context
.
cuda_device_context
().
stream
();
...
@@ -397,7 +397,7 @@ class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
...
@@ -397,7 +397,7 @@ class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
}
}
int
input_dim
=
0
;
int
input_dim
=
0
;
framework
::
Vector
<
size_t
>
batch_indices
(
batch_size
+
1
);
phi
::
Vector
<
size_t
>
batch_indices
(
batch_size
+
1
);
int
timesteps
=
X
->
dims
()[
1
];
int
timesteps
=
X
->
dims
()[
1
];
if
(
is_tensor
)
{
if
(
is_tensor
)
{
for
(
int
i
=
0
;
i
<
batch_size
+
1
;
i
++
)
{
for
(
int
i
=
0
;
i
<
batch_size
+
1
;
i
++
)
{
...
@@ -411,7 +411,7 @@ class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
...
@@ -411,7 +411,7 @@ class RowConvGradKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
// int input_dim = X->dims()[1];
// int input_dim = X->dims()[1];
int
num_sequence
=
batch_indices
.
size
()
-
1
;
int
num_sequence
=
batch_indices
.
size
()
-
1
;
int
future_context
=
Filter
->
dims
()[
0
];
int
future_context
=
Filter
->
dims
()[
0
];
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_batch_indices
(
&
batch_indices
);
p
hi
::
MixVector
<
size_t
>
mixv_batch_indices
(
&
batch_indices
);
size_t
*
idx
=
mixv_batch_indices
.
CUDAMutableData
(
context
.
GetPlace
());
size_t
*
idx
=
mixv_batch_indices
.
CUDAMutableData
(
context
.
GetPlace
());
auto
&
device_ctx
=
context
.
cuda_device_context
();
auto
&
device_ctx
=
context
.
cuda_device_context
();
...
...
paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu
浏览文件 @
35d7d1f0
...
@@ -76,7 +76,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -76,7 +76,7 @@ class SequenceEnumerateOpCUDAKernel : public framework::OpKernel<T> {
out
->
Resize
({
in_dims
[
0
],
win_size
});
out
->
Resize
({
in_dims
[
0
],
win_size
});
auto
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
out_data
=
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
// Copy LoD to GPU
// Copy LoD to GPU
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_lod0
(
&
lod0
);
p
hi
::
MixVector
<
size_t
>
mixv_lod0
(
&
lod0
);
const
size_t
*
dev_in_lod_ptr
=
mixv_lod0
.
CUDAData
(
context
.
GetPlace
());
const
size_t
*
dev_in_lod_ptr
=
mixv_lod0
.
CUDAData
(
context
.
GetPlace
());
// Calc output tensor
// Calc output tensor
CalcOutPut
<<<
(
in_len
-
1
)
/
PADDLE_CUDA_NUM_THREADS
+
1
,
CalcOutPut
<<<
(
in_len
-
1
)
/
PADDLE_CUDA_NUM_THREADS
+
1
,
...
...
paddle/fluid/operators/sequence_ops/sequence_erase_op.cu
浏览文件 @
35d7d1f0
...
@@ -97,7 +97,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -97,7 +97,7 @@ class SequenceEraseOpCUDAKernel : public framework::OpKernel<T> {
// Copy LoD to GPU
// Copy LoD to GPU
auto
last_lod
=
lod
[
lod
.
size
()
-
1
];
auto
last_lod
=
lod
[
lod
.
size
()
-
1
];
auto
lod_len
=
last_lod
.
size
();
auto
lod_len
=
last_lod
.
size
();
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_last_lod
(
&
last_lod
);
p
hi
::
MixVector
<
size_t
>
mixv_last_lod
(
&
last_lod
);
const
size_t
*
dev_in_lod_ptr
=
mixv_last_lod
.
CUDAData
(
ctx
.
GetPlace
());
const
size_t
*
dev_in_lod_ptr
=
mixv_last_lod
.
CUDAData
(
ctx
.
GetPlace
());
// Calc output LoD
// Calc output LoD
thrust
::
device_vector
<
size_t
>
dev_out_lod
(
lod_len
);
thrust
::
device_vector
<
size_t
>
dev_out_lod
(
lod_len
);
...
...
paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
浏览文件 @
35d7d1f0
...
@@ -65,10 +65,9 @@ static __global__ void sequence_expand_as_grad_kernel(
...
@@ -65,10 +65,9 @@ static __global__ void sequence_expand_as_grad_kernel(
template
<
typename
T
>
template
<
typename
T
>
struct
SequenceExpandAsFunctor
<
phi
::
GPUContext
,
T
>
{
struct
SequenceExpandAsFunctor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
phi
::
GPUContext
&
context
,
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
x
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
phi
::
DenseTensor
*
out
)
{
phi
::
DenseTensor
*
out
)
{
int
height
=
x
.
dims
()[
0
];
int
height
=
x
.
dims
()[
0
];
int
width
=
phi
::
product
(
x
.
dims
())
/
height
;
int
width
=
phi
::
product
(
x
.
dims
())
/
height
;
...
@@ -84,7 +83,7 @@ struct SequenceExpandAsFunctor<phi::GPUContext, T> {
...
@@ -84,7 +83,7 @@ struct SequenceExpandAsFunctor<phi::GPUContext, T> {
dim3
block_size
(
thread_x
);
dim3
block_size
(
thread_x
);
dim3
grid_size
(
block_x
);
dim3
grid_size
(
block_x
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_ref_lod
(
&
ref_lod
);
p
hi
::
MixVector
<
size_t
>
mixv_ref_lod
(
&
ref_lod
);
sequence_expand_as_kernel
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
sequence_expand_as_kernel
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
x
.
data
<
T
>
(),
x
.
data
<
T
>
(),
mixv_ref_lod
.
CUDAData
(
context
.
GetPlace
()),
mixv_ref_lod
.
CUDAData
(
context
.
GetPlace
()),
...
@@ -98,7 +97,7 @@ template <typename T>
...
@@ -98,7 +97,7 @@ template <typename T>
struct
SequenceExpandAsGradFunctor
<
phi
::
GPUContext
,
T
>
{
struct
SequenceExpandAsGradFunctor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
phi
::
DenseTensor
&
dout
,
const
phi
::
DenseTensor
&
dout
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*expand based lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*expand based lod*/
phi
::
DenseTensor
*
dx
)
{
phi
::
DenseTensor
*
dx
)
{
int
height
=
dx
->
dims
()[
0
];
int
height
=
dx
->
dims
()[
0
];
int
width
=
phi
::
product
(
dx
->
dims
())
/
height
;
int
width
=
phi
::
product
(
dx
->
dims
())
/
height
;
...
@@ -114,7 +113,7 @@ struct SequenceExpandAsGradFunctor<phi::GPUContext, T> {
...
@@ -114,7 +113,7 @@ struct SequenceExpandAsGradFunctor<phi::GPUContext, T> {
dim3
block_size
(
thread_x
);
dim3
block_size
(
thread_x
);
dim3
grid_size
(
block_x
);
dim3
grid_size
(
block_x
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_ref_lod
(
&
ref_lod
);
p
hi
::
MixVector
<
size_t
>
mixv_ref_lod
(
&
ref_lod
);
sequence_expand_as_grad_kernel
<<<
grid_size
,
sequence_expand_as_grad_kernel
<<<
grid_size
,
block_size
,
block_size
,
0
,
0
,
...
...
paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h
浏览文件 @
35d7d1f0
...
@@ -26,28 +26,25 @@ namespace operators {
...
@@ -26,28 +26,25 @@ namespace operators {
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
struct
SequenceExpandAsFunctor
{
struct
SequenceExpandAsFunctor
{
void
operator
()(
void
operator
()(
const
DeviceContext
&
ctx
,
const
DeviceContext
&
ctx
,
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
x
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
phi
::
DenseTensor
*
out
);
phi
::
DenseTensor
*
out
);
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
struct
SequenceExpandAsGradFunctor
{
struct
SequenceExpandAsGradFunctor
{
void
operator
()(
void
operator
()(
const
DeviceContext
&
ctx
,
const
DeviceContext
&
ctx
,
const
phi
::
DenseTensor
&
dout
,
const
phi
::
DenseTensor
&
dout
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
phi
::
DenseTensor
*
dx
);
phi
::
DenseTensor
*
dx
);
};
};
template
<
typename
T
>
template
<
typename
T
>
struct
SequenceExpandAsFunctor
<
phi
::
CPUContext
,
T
>
{
struct
SequenceExpandAsFunctor
<
phi
::
CPUContext
,
T
>
{
void
operator
()(
void
operator
()(
const
phi
::
CPUContext
&
context
,
const
phi
::
CPUContext
&
context
,
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
x
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
phi
::
DenseTensor
*
out
)
{
phi
::
DenseTensor
*
out
)
{
int64_t
height
=
x
.
dims
()[
0
];
int64_t
height
=
x
.
dims
()[
0
];
int64_t
width
=
phi
::
product
(
x
.
dims
())
/
height
;
int64_t
width
=
phi
::
product
(
x
.
dims
())
/
height
;
...
@@ -122,10 +119,9 @@ class SequenceExpandAsKernel : public framework::OpKernel<T> {
...
@@ -122,10 +119,9 @@ class SequenceExpandAsKernel : public framework::OpKernel<T> {
* */
* */
template
<
typename
T
>
template
<
typename
T
>
struct
SequenceExpandAsGradFunctor
<
phi
::
CPUContext
,
T
>
{
struct
SequenceExpandAsGradFunctor
<
phi
::
CPUContext
,
T
>
{
void
operator
()(
void
operator
()(
const
phi
::
CPUContext
&
context
,
const
phi
::
CPUContext
&
context
,
const
phi
::
DenseTensor
&
dout
,
const
phi
::
DenseTensor
&
dout
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
phi
::
DenseTensor
*
dx
)
{
phi
::
DenseTensor
*
dx
)
{
int64_t
height
=
dx
->
dims
()[
0
];
int64_t
height
=
dx
->
dims
()[
0
];
int64_t
width
=
phi
::
product
(
dx
->
dims
())
/
height
;
int64_t
width
=
phi
::
product
(
dx
->
dims
())
/
height
;
...
...
paddle/fluid/operators/sequence_ops/sequence_expand_op.cu
浏览文件 @
35d7d1f0
...
@@ -82,9 +82,9 @@ __global__ void sequence_expand_grad_kernel(const T* dout_data,
...
@@ -82,9 +82,9 @@ __global__ void sequence_expand_grad_kernel(const T* dout_data,
}
}
}
}
void
GetOutputOffset
(
const
framework
::
Vector
<
size_t
>&
x_lod
,
void
GetOutputOffset
(
const
phi
::
Vector
<
size_t
>&
x_lod
,
const
framework
::
Vector
<
size_t
>&
ref_lod
,
const
phi
::
Vector
<
size_t
>&
ref_lod
,
framework
::
Vector
<
size_t
>*
out_offset
)
{
phi
::
Vector
<
size_t
>*
out_offset
)
{
size_t
offset
=
0
;
size_t
offset
=
0
;
int
lod_size
=
static_cast
<
int
>
(
x_lod
.
size
());
int
lod_size
=
static_cast
<
int
>
(
x_lod
.
size
());
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
x_lod
.
size
());
++
i
)
{
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
x_lod
.
size
());
++
i
)
{
...
@@ -99,8 +99,8 @@ template <typename T>
...
@@ -99,8 +99,8 @@ template <typename T>
static
int
ExpandByMemoryCopy
(
const
phi
::
GPUContext
&
context
,
static
int
ExpandByMemoryCopy
(
const
phi
::
GPUContext
&
context
,
const
LoDTensor
&
x
,
const
LoDTensor
&
x
,
LoDTensor
*
out
,
LoDTensor
*
out
,
const
framework
::
Vector
<
size_t
>&
x_lod
,
const
phi
::
Vector
<
size_t
>&
x_lod
,
const
framework
::
Vector
<
size_t
>&
ref_lod
,
const
phi
::
Vector
<
size_t
>&
ref_lod
,
bool
do_copy
)
{
bool
do_copy
)
{
auto
out_data
=
out
->
data
<
T
>
();
auto
out_data
=
out
->
data
<
T
>
();
auto
x_data
=
x
.
data
<
T
>
();
auto
x_data
=
x
.
data
<
T
>
();
...
@@ -143,11 +143,10 @@ static int ExpandByMemoryCopy(const phi::GPUContext& context,
...
@@ -143,11 +143,10 @@ static int ExpandByMemoryCopy(const phi::GPUContext& context,
template
<
typename
T
>
template
<
typename
T
>
struct
SequenceExpandFunctor
<
phi
::
GPUContext
,
T
>
{
struct
SequenceExpandFunctor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
phi
::
GPUContext
&
context
,
const
LoDTensor
&
x
,
const
LoDTensor
&
x
,
const
framework
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
phi
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
framework
::
Vector
<
size_t
>&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>&
ref_lod
,
/*expand referenced lod*/
LoDTensor
*
out
)
{
LoDTensor
*
out
)
{
int
num_copys
=
int
num_copys
=
ExpandByMemoryCopy
<
T
>
(
context
,
x
,
out
,
x_lod
,
ref_lod
,
false
);
ExpandByMemoryCopy
<
T
>
(
context
,
x
,
out
,
x_lod
,
ref_lod
,
false
);
...
@@ -157,7 +156,7 @@ struct SequenceExpandFunctor<phi::GPUContext, T> {
...
@@ -157,7 +156,7 @@ struct SequenceExpandFunctor<phi::GPUContext, T> {
}
else
{
}
else
{
int
x_item_length
=
x
.
numel
()
/
x
.
dims
()[
0
];
int
x_item_length
=
x
.
numel
()
/
x
.
dims
()[
0
];
size_t
x_lod_size
=
x_lod
.
size
();
size_t
x_lod_size
=
x_lod
.
size
();
framework
::
Vector
<
size_t
>
out_offset
(
x_lod_size
*
2
+
ref_lod
.
size
());
phi
::
Vector
<
size_t
>
out_offset
(
x_lod_size
*
2
+
ref_lod
.
size
());
GetOutputOffset
(
x_lod
,
ref_lod
,
&
out_offset
);
GetOutputOffset
(
x_lod
,
ref_lod
,
&
out_offset
);
for
(
size_t
i
=
0
;
i
<
x_lod_size
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
x_lod_size
;
++
i
)
{
...
@@ -167,7 +166,7 @@ struct SequenceExpandFunctor<phi::GPUContext, T> {
...
@@ -167,7 +166,7 @@ struct SequenceExpandFunctor<phi::GPUContext, T> {
out_offset
[
2
*
x_lod_size
+
i
]
=
ref_lod
[
i
];
out_offset
[
2
*
x_lod_size
+
i
]
=
ref_lod
[
i
];
}
}
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_out_offset
(
&
out_offset
);
p
hi
::
MixVector
<
size_t
>
mixv_out_offset
(
&
out_offset
);
const
size_t
*
out_offset_data
=
const
size_t
*
out_offset_data
=
mixv_out_offset
.
CUDAData
(
context
.
GetPlace
());
mixv_out_offset
.
CUDAData
(
context
.
GetPlace
());
const
size_t
*
x_lod_data
=
out_offset_data
+
x_lod_size
;
const
size_t
*
x_lod_data
=
out_offset_data
+
x_lod_size
;
...
@@ -197,11 +196,11 @@ template <typename T>
...
@@ -197,11 +196,11 @@ template <typename T>
struct
SequenceExpandGradFunctor
<
phi
::
GPUContext
,
T
>
{
struct
SequenceExpandGradFunctor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
LoDTensor
&
dout
,
const
LoDTensor
&
dout
,
const
framework
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
phi
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
framework
::
Vector
<
size_t
>&
ref_lod
,
/*expand based lod*/
const
phi
::
Vector
<
size_t
>&
ref_lod
,
/*expand based lod*/
LoDTensor
*
dx
)
{
LoDTensor
*
dx
)
{
int
x_item_length
=
phi
::
product
(
dx
->
dims
())
/
dx
->
dims
()[
0
];
int
x_item_length
=
phi
::
product
(
dx
->
dims
())
/
dx
->
dims
()[
0
];
framework
::
Vector
<
size_t
>
out_offset
(
x_lod
.
size
());
phi
::
Vector
<
size_t
>
out_offset
(
x_lod
.
size
());
GetOutputOffset
(
x_lod
,
ref_lod
,
&
out_offset
);
GetOutputOffset
(
x_lod
,
ref_lod
,
&
out_offset
);
int
thread_x
=
std
::
min
(
32
,
std
::
max
(
static_cast
<
int
>
(
ref_lod
.
size
()),
16
));
int
thread_x
=
std
::
min
(
32
,
std
::
max
(
static_cast
<
int
>
(
ref_lod
.
size
()),
16
));
...
@@ -210,9 +209,9 @@ struct SequenceExpandGradFunctor<phi::GPUContext, T> {
...
@@ -210,9 +209,9 @@ struct SequenceExpandGradFunctor<phi::GPUContext, T> {
int
block_x
=
static_cast
<
int
>
(
ref_lod
.
size
());
int
block_x
=
static_cast
<
int
>
(
ref_lod
.
size
());
dim3
block_size
(
thread_x
,
thread_y
,
thread_z
);
dim3
block_size
(
thread_x
,
thread_y
,
thread_z
);
dim3
grid_size
(
block_x
,
1
);
dim3
grid_size
(
block_x
,
1
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_ref_lod
(
&
ref_lod
);
p
hi
::
MixVector
<
size_t
>
mixv_ref_lod
(
&
ref_lod
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_x_lod
(
&
x_lod
);
p
hi
::
MixVector
<
size_t
>
mixv_x_lod
(
&
x_lod
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_out_offset
(
&
out_offset
);
p
hi
::
MixVector
<
size_t
>
mixv_out_offset
(
&
out_offset
);
sequence_expand_grad_kernel
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
sequence_expand_grad_kernel
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
dout
.
data
<
T
>
(),
dout
.
data
<
T
>
(),
mixv_ref_lod
.
CUDAData
(
context
.
GetPlace
()),
mixv_ref_lod
.
CUDAData
(
context
.
GetPlace
()),
...
...
paddle/fluid/operators/sequence_ops/sequence_expand_op.h
浏览文件 @
35d7d1f0
...
@@ -29,31 +29,28 @@ using EigenMatrix = phi::EigenMatrix<T, MajorType, IndexType>;
...
@@ -29,31 +29,28 @@ using EigenMatrix = phi::EigenMatrix<T, MajorType, IndexType>;
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
struct
SequenceExpandFunctor
{
struct
SequenceExpandFunctor
{
void
operator
()(
void
operator
()(
const
DeviceContext
&
ctx
,
const
DeviceContext
&
ctx
,
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
x
,
const
framework
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
phi
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
framework
::
Vector
<
size_t
>&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>&
ref_lod
,
/*expand referenced lod*/
phi
::
DenseTensor
*
out
);
phi
::
DenseTensor
*
out
);
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
struct
SequenceExpandGradFunctor
{
struct
SequenceExpandGradFunctor
{
void
operator
()(
void
operator
()(
const
DeviceContext
&
ctx
,
const
DeviceContext
&
ctx
,
const
phi
::
DenseTensor
&
dout
,
const
phi
::
DenseTensor
&
dout
,
const
framework
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
phi
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
framework
::
Vector
<
size_t
>&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>&
ref_lod
,
/*expand referenced lod*/
phi
::
DenseTensor
*
dx
);
phi
::
DenseTensor
*
dx
);
};
};
template
<
typename
T
>
template
<
typename
T
>
struct
SequenceExpandFunctor
<
phi
::
CPUContext
,
T
>
{
struct
SequenceExpandFunctor
<
phi
::
CPUContext
,
T
>
{
void
operator
()(
void
operator
()(
const
phi
::
CPUContext
&
context
,
const
phi
::
CPUContext
&
context
,
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
x
,
const
framework
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
phi
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
framework
::
Vector
<
size_t
>&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>&
ref_lod
,
/*expand referenced lod*/
phi
::
DenseTensor
*
out
)
{
phi
::
DenseTensor
*
out
)
{
int
out_offset
=
0
;
int
out_offset
=
0
;
int
x_item_length
=
x
.
numel
()
/
x
.
dims
()[
0
];
int
x_item_length
=
x
.
numel
()
/
x
.
dims
()[
0
];
...
@@ -112,7 +109,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
...
@@ -112,7 +109,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
}
}
// x lod level is at most 1.
// x lod level is at most 1.
framework
::
Vector
<
size_t
>
out_lod
;
phi
::
Vector
<
size_t
>
out_lod
;
if
(
x_lod
.
size
()
==
1
)
{
if
(
x_lod
.
size
()
==
1
)
{
out_lod
.
push_back
(
0
);
out_lod
.
push_back
(
0
);
int
out_offset
=
0
;
int
out_offset
=
0
;
...
@@ -130,7 +127,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
...
@@ -130,7 +127,7 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
auto
&
ref_lod
=
*
out
->
mutable_lod
();
auto
&
ref_lod
=
*
out
->
mutable_lod
();
ref_lod
[
0
]
=
out_lod
;
ref_lod
[
0
]
=
out_lod
;
}
}
framework
::
Vector
<
size_t
>
ref_x_lod
;
phi
::
Vector
<
size_t
>
ref_x_lod
;
if
(
x
->
lod
().
size
()
==
1
)
{
if
(
x
->
lod
().
size
()
==
1
)
{
ref_x_lod
=
x
->
lod
()[
0
];
ref_x_lod
=
x
->
lod
()[
0
];
}
else
{
}
else
{
...
@@ -161,11 +158,10 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
...
@@ -161,11 +158,10 @@ class SequenceExpandKernel : public framework::OpKernel<T> {
* */
* */
template
<
typename
T
>
template
<
typename
T
>
struct
SequenceExpandGradFunctor
<
phi
::
CPUContext
,
T
>
{
struct
SequenceExpandGradFunctor
<
phi
::
CPUContext
,
T
>
{
void
operator
()(
void
operator
()(
const
phi
::
CPUContext
&
context
,
const
phi
::
CPUContext
&
context
,
const
phi
::
DenseTensor
&
dout
,
const
phi
::
DenseTensor
&
dout
,
const
framework
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
phi
::
Vector
<
size_t
>&
x_lod
,
/*expand source lod*/
const
framework
::
Vector
<
size_t
>&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>&
ref_lod
,
/*expand referenced lod*/
phi
::
DenseTensor
*
dx
)
{
phi
::
DenseTensor
*
dx
)
{
int
dout_offset
=
0
;
int
dout_offset
=
0
;
for
(
size_t
i
=
1
;
i
<
ref_lod
.
size
();
++
i
)
{
for
(
size_t
i
=
1
;
i
<
ref_lod
.
size
();
++
i
)
{
...
@@ -214,8 +210,8 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
...
@@ -214,8 +210,8 @@ class SequenceExpandGradKernel : public framework::OpKernel<T> {
return
;
return
;
}
}
framework
::
Vector
<
size_t
>
ref_x_lod
;
phi
::
Vector
<
size_t
>
ref_x_lod
;
framework
::
Vector
<
size_t
>
ref_lod
=
y_lod
[
ref_level
];
phi
::
Vector
<
size_t
>
ref_lod
=
y_lod
[
ref_level
];
if
(
x
->
lod
().
size
()
==
1
)
{
if
(
x
->
lod
().
size
()
==
1
)
{
ref_x_lod
=
x
->
lod
()[
0
];
ref_x_lod
=
x
->
lod
()[
0
];
}
else
{
}
else
{
...
...
paddle/fluid/operators/sequence_ops/sequence_reverse_op.h
浏览文件 @
35d7d1f0
...
@@ -139,7 +139,7 @@ class SequenceReverseOpKernel : public framework::OpKernel<T> {
...
@@ -139,7 +139,7 @@ class SequenceReverseOpKernel : public framework::OpKernel<T> {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
auto
xlod
=
x
.
lod
()[
0
];
auto
xlod
=
x
.
lod
()[
0
];
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_xlod
(
&
xlod
);
p
hi
::
MixVector
<
size_t
>
mixv_xlod
(
&
xlod
);
lod
=
mixv_xlod
.
CUDAData
(
ctx
.
GetPlace
());
lod
=
mixv_xlod
.
CUDAData
(
ctx
.
GetPlace
());
}
else
{
}
else
{
#endif
#endif
...
...
paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
浏览文件 @
35d7d1f0
...
@@ -124,7 +124,7 @@ template <typename T>
...
@@ -124,7 +124,7 @@ template <typename T>
struct
SequenceSoftmaxFunctor
<
phi
::
GPUContext
,
T
>
{
struct
SequenceSoftmaxFunctor
<
phi
::
GPUContext
,
T
>
{
void
operator
()(
const
phi
::
GPUContext
&
context
,
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
LoDTensor
&
x
,
const
LoDTensor
&
x
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*referenced lod*/
LoDTensor
*
out
)
{
LoDTensor
*
out
)
{
int
height
=
ref_lod
.
size
()
-
1
;
int
height
=
ref_lod
.
size
()
-
1
;
...
@@ -135,7 +135,7 @@ struct SequenceSoftmaxFunctor<phi::GPUContext, T> {
...
@@ -135,7 +135,7 @@ struct SequenceSoftmaxFunctor<phi::GPUContext, T> {
dim3
block_size
(
thread_x
);
dim3
block_size
(
thread_x
);
dim3
grid_size
(
max_blocks
);
dim3
grid_size
(
max_blocks
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_ref_lod
(
&
ref_lod
);
p
hi
::
MixVector
<
size_t
>
mixv_ref_lod
(
&
ref_lod
);
sequence_softmax_kernel
<
T
,
kThreadsPerBlock
>
sequence_softmax_kernel
<
T
,
kThreadsPerBlock
>
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
x
.
data
<
T
>
(),
x
.
data
<
T
>
(),
...
@@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> {
...
@@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> {
void
operator
()(
const
phi
::
GPUContext
&
context
,
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
LoDTensor
&
dout
,
const
LoDTensor
&
dout
,
const
LoDTensor
&
out
,
const
LoDTensor
&
out
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*referenced lod*/
LoDTensor
*
dx
)
{
LoDTensor
*
dx
)
{
size_t
height
=
ref_lod
.
size
()
-
1
;
size_t
height
=
ref_lod
.
size
()
-
1
;
...
@@ -162,7 +162,7 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> {
...
@@ -162,7 +162,7 @@ struct SequenceSoftmaxGradFunctor<phi::GPUContext, T> {
dim3
block_size
(
thread_x
);
dim3
block_size
(
thread_x
);
dim3
grid_size
(
max_blocks
);
dim3
grid_size
(
max_blocks
);
p
addle
::
framework
::
MixVector
<
size_t
>
mixv_ref_lod
(
&
ref_lod
);
p
hi
::
MixVector
<
size_t
>
mixv_ref_lod
(
&
ref_lod
);
sequence_softmax_grad_kernel
<
T
,
kThreadsPerBlock
>
sequence_softmax_grad_kernel
<
T
,
kThreadsPerBlock
>
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
<<<
grid_size
,
block_size
,
0
,
context
.
stream
()
>>>
(
dout
.
data
<
T
>
(),
dout
.
data
<
T
>
(),
...
...
paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
浏览文件 @
35d7d1f0
...
@@ -21,10 +21,9 @@ namespace operators {
...
@@ -21,10 +21,9 @@ namespace operators {
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
struct
SequenceSoftmaxFunctor
{
struct
SequenceSoftmaxFunctor
{
void
operator
()(
void
operator
()(
const
DeviceContext
&
ctx
,
const
DeviceContext
&
ctx
,
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
x
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*expand referenced lod*/
phi
::
DenseTensor
*
out
);
phi
::
DenseTensor
*
out
);
};
};
...
@@ -33,7 +32,7 @@ struct SequenceSoftmaxGradFunctor {
...
@@ -33,7 +32,7 @@ struct SequenceSoftmaxGradFunctor {
void
operator
()(
const
DeviceContext
&
ctx
,
void
operator
()(
const
DeviceContext
&
ctx
,
const
phi
::
DenseTensor
&
dout
,
const
phi
::
DenseTensor
&
dout
,
const
phi
::
DenseTensor
&
out
,
const
phi
::
DenseTensor
&
out
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*referenced lod*/
phi
::
DenseTensor
*
dx
);
phi
::
DenseTensor
*
dx
);
};
};
...
@@ -41,7 +40,7 @@ template <typename T>
...
@@ -41,7 +40,7 @@ template <typename T>
struct
SequenceSoftmaxFunctor
<
phi
::
CPUContext
,
T
>
{
struct
SequenceSoftmaxFunctor
<
phi
::
CPUContext
,
T
>
{
void
operator
()(
const
phi
::
CPUContext
&
ctx
,
void
operator
()(
const
phi
::
CPUContext
&
ctx
,
const
phi
::
DenseTensor
&
x
,
const
phi
::
DenseTensor
&
x
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*referenced lod*/
phi
::
DenseTensor
*
out
)
{
phi
::
DenseTensor
*
out
)
{
size_t
height
=
ref_lod
.
size
()
-
1
;
size_t
height
=
ref_lod
.
size
()
-
1
;
const
T
*
in_data
=
x
.
data
<
T
>
();
const
T
*
in_data
=
x
.
data
<
T
>
();
...
@@ -64,7 +63,7 @@ struct SequenceSoftmaxGradFunctor<phi::CPUContext, T> {
...
@@ -64,7 +63,7 @@ struct SequenceSoftmaxGradFunctor<phi::CPUContext, T> {
void
operator
()(
const
phi
::
CPUContext
&
ctx
,
void
operator
()(
const
phi
::
CPUContext
&
ctx
,
const
phi
::
DenseTensor
&
dout
,
const
phi
::
DenseTensor
&
dout
,
const
phi
::
DenseTensor
&
out
,
const
phi
::
DenseTensor
&
out
,
const
framework
::
Vector
<
size_t
>
&
ref_lod
,
/*referenced lod*/
const
phi
::
Vector
<
size_t
>
&
ref_lod
,
/*referenced lod*/
phi
::
DenseTensor
*
dx
)
{
phi
::
DenseTensor
*
dx
)
{
size_t
height
=
ref_lod
.
size
()
-
1
;
size_t
height
=
ref_lod
.
size
()
-
1
;
...
...
paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h
浏览文件 @
35d7d1f0
...
@@ -116,7 +116,7 @@ class SequenceTopkAvgPoolingKernel : public framework::OpKernel<T> {
...
@@ -116,7 +116,7 @@ class SequenceTopkAvgPoolingKernel : public framework::OpKernel<T> {
auto
pos_data
=
pos
->
mutable_data
<
int
>
(
context
.
GetPlace
());
auto
pos_data
=
pos
->
mutable_data
<
int
>
(
context
.
GetPlace
());
int
offset
=
0
;
int
offset
=
0
;
framework
::
Vector
<
size_t
>
vec_out_lod
;
phi
::
Vector
<
size_t
>
vec_out_lod
;
vec_out_lod
.
reserve
(
batch_size
+
1
);
vec_out_lod
.
reserve
(
batch_size
+
1
);
for
(
int
i
=
0
;
i
<=
batch_size
;
++
i
)
{
for
(
int
i
=
0
;
i
<=
batch_size
;
++
i
)
{
offset
=
row_lod
[
i
];
offset
=
row_lod
[
i
];
...
...
paddle/fluid/operators/shuffle_batch_op.h
浏览文件 @
35d7d1f0
...
@@ -25,16 +25,16 @@
...
@@ -25,16 +25,16 @@
#include "glog/logging.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/fluid/platform/timer.h"
#include "paddle/phi/core/mixed_vector.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
template
<
typename
T
>
template
<
typename
T
>
using
Vector
=
framework
::
Vector
<
T
>
;
using
Vector
=
phi
::
Vector
<
T
>
;
template
<
typename
T
>
template
<
typename
T
>
class
ShuffleBatchKernel
:
public
framework
::
OpKernel
<
T
>
{
class
ShuffleBatchKernel
:
public
framework
::
OpKernel
<
T
>
{
...
...
paddle/fluid/operators/tdm_child_op.h
浏览文件 @
35d7d1f0
...
@@ -22,8 +22,8 @@
...
@@ -22,8 +22,8 @@
#include <vector>
#include <vector>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/core/mixed_vector.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/fluid/operators/tdm_sampler_op.h
浏览文件 @
35d7d1f0
...
@@ -22,9 +22,9 @@
...
@@ -22,9 +22,9 @@
#include <vector>
#include <vector>
#include "gflags/gflags.h"
#include "gflags/gflags.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "paddle/fluid/operators/math/sampler.h"
#include "paddle/phi/core/mixed_vector.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
...
paddle/fluid/pybind/tensor.cc
浏览文件 @
35d7d1f0
...
@@ -1095,7 +1095,7 @@ void BindTensor(pybind11::module &m) { // NOLINT
...
@@ -1095,7 +1095,7 @@ void BindTensor(pybind11::module &m) { // NOLINT
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
#if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
self
.
set_rows
(
rows
);
self
.
set_rows
(
rows
);
#else
#else
V
ector
<
int64_t
>
new_rows
(
rows
);
std
::
v
ector
<
int64_t
>
new_rows
(
rows
);
self
.
set_rows
(
new_rows
);
self
.
set_rows
(
new_rows
);
#endif
#endif
})
})
...
...
paddle/phi/core/CMakeLists.txt
浏览文件 @
35d7d1f0
...
@@ -114,6 +114,11 @@ cc_library(
...
@@ -114,6 +114,11 @@ cc_library(
SRCS custom_kernel.cc
SRCS custom_kernel.cc
DEPS kernel_factory
)
DEPS kernel_factory
)
cc_library
(
mixed_vector
SRCS mixed_vector.cc
DEPS device_context place memory
)
# Will remove once we implemented MKLDNN_Tensor
# Will remove once we implemented MKLDNN_Tensor
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
add_dependencies
(
dense_tensor mkldnn
)
add_dependencies
(
dense_tensor mkldnn
)
...
...
paddle/
fluid/framework
/mixed_vector.cc
→
paddle/
phi/core
/mixed_vector.cc
浏览文件 @
35d7d1f0
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/
fluid/framework
/mixed_vector.h"
#include "paddle/
phi/core
/mixed_vector.h"
#include <algorithm>
#include <algorithm>
#include <initializer_list>
#include <initializer_list>
...
@@ -22,28 +22,26 @@ limitations under the License. */
...
@@ -22,28 +22,26 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "glog/logging.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/details/cow_ptr.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/
fluid/platform/device
_context.h"
#include "paddle/
phi/backends/all
_context.h"
#include "paddle/utils/none.h"
#include "paddle/utils/none.h"
#include "paddle/utils/optional.h"
#include "paddle/utils/optional.h"
namespace
paddle
{
namespace
phi
{
namespace
framework
{
template
<
typename
T
>
template
<
typename
T
>
void
CopyToCPUHelper
(
std
::
vector
<
T
>
*
cpu_
,
void
CopyToCPUHelper
(
std
::
vector
<
T
>
*
cpu_
,
p
addle
::
memory
::
AllocationPtr
*
gpu_
,
p
hi
::
Allocator
::
AllocationPtr
*
gpu_
,
size_t
*
gpu_memory_size_
)
{
size_t
*
gpu_memory_size_
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// COPY GPU Data To CPU
// COPY GPU Data To CPU
auto
*
dev_ctx
=
static_cast
<
phi
::
GPUContext
*>
(
auto
*
dev_ctx
=
static_cast
<
phi
::
GPUContext
*>
(
p
latform
::
DeviceContextPool
::
Instance
().
Get
((
*
gpu_
)
->
place
()));
p
hi
::
DeviceContextPool
::
Instance
().
Get
((
*
gpu_
)
->
place
()));
auto
stream
=
dev_ctx
->
stream
();
auto
stream
=
dev_ctx
->
stream
();
void
*
src
=
(
*
gpu_
)
->
ptr
();
void
*
src
=
(
*
gpu_
)
->
ptr
();
void
*
dst
=
cpu_
->
data
();
void
*
dst
=
cpu_
->
data
();
paddle
::
memory
::
Copy
(
p
latform
::
CPUPlace
(),
paddle
::
memory
::
Copy
(
p
hi
::
CPUPlace
(),
dst
,
dst
,
OptionalCUDAPlace
(
*
gpu_
).
get
(),
OptionalCUDAPlace
(
*
gpu_
).
get
(),
src
,
src
,
...
@@ -55,20 +53,20 @@ void CopyToCPUHelper(std::vector<T> *cpu_,
...
@@ -55,20 +53,20 @@ void CopyToCPUHelper(std::vector<T> *cpu_,
template
<
typename
T
>
template
<
typename
T
>
void
CopyCPUDataToCUDAHelper
(
std
::
vector
<
T
>
*
cpu_
,
void
CopyCPUDataToCUDAHelper
(
std
::
vector
<
T
>
*
cpu_
,
p
addle
::
memory
::
AllocationPtr
*
gpu_
,
p
hi
::
Allocator
::
AllocationPtr
*
gpu_
,
size_t
*
gpu_memory_size_
,
size_t
*
gpu_memory_size_
,
const
p
latform
::
Place
&
place
)
{
const
p
hi
::
Place
&
place
)
{
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void
*
src
=
cpu_
->
data
();
void
*
src
=
cpu_
->
data
();
*
gpu_memory_size_
=
cpu_
->
size
()
*
sizeof
(
T
);
// sizeof(T)
*
gpu_memory_size_
=
cpu_
->
size
()
*
sizeof
(
T
);
// sizeof(T)
(
*
gpu_
)
=
memory
::
Alloc
(
place
,
*
gpu_memory_size_
);
(
*
gpu_
)
=
paddle
::
memory
::
Alloc
(
place
,
*
gpu_memory_size_
);
void
*
dst
=
(
*
gpu_
)
->
ptr
();
void
*
dst
=
(
*
gpu_
)
->
ptr
();
auto
*
dev_ctx
=
static_cast
<
phi
::
GPUContext
*>
(
auto
*
dev_ctx
=
static_cast
<
phi
::
GPUContext
*>
(
p
latform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
p
hi
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
stream
=
dev_ctx
->
stream
();
auto
stream
=
dev_ctx
->
stream
();
paddle
::
memory
::
Copy
(
OptionalCUDAPlace
(
*
gpu_
).
get
(),
paddle
::
memory
::
Copy
(
OptionalCUDAPlace
(
*
gpu_
).
get
(),
dst
,
dst
,
p
latform
::
CPUPlace
(),
p
hi
::
CPUPlace
(),
src
,
src
,
*
gpu_memory_size_
,
*
gpu_memory_size_
,
stream
);
stream
);
...
@@ -84,7 +82,7 @@ void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_,
...
@@ -84,7 +82,7 @@ void CopyCPUDataToCUDAHelper(std::vector<T> *cpu_,
\
\
template <> \
template <> \
void MixVector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \
void MixVector<__TYPE__>::VectorData::CopyCPUDataToCUDA( \
const p
latform::Place &place) const {
\
const p
hi::Place &place) const {
\
CopyCPUDataToCUDAHelper<__TYPE__>(cpu_, &gpu_, &gpu_memory_size_, place); \
CopyCPUDataToCUDAHelper<__TYPE__>(cpu_, &gpu_, &gpu_memory_size_, place); \
}
}
...
@@ -92,5 +90,4 @@ INSTANTIATE_VECTOR_FOR_TYPE(size_t)
...
@@ -92,5 +90,4 @@ INSTANTIATE_VECTOR_FOR_TYPE(size_t)
INSTANTIATE_VECTOR_FOR_TYPE
(
int
)
INSTANTIATE_VECTOR_FOR_TYPE
(
int
)
INSTANTIATE_VECTOR_FOR_TYPE
(
int64_t
)
INSTANTIATE_VECTOR_FOR_TYPE
(
int64_t
)
};
// namespace framework
};
// namespace phi
}
// namespace paddle
paddle/
fluid/framework
/mixed_vector.h
→
paddle/
phi/core
/mixed_vector.h
浏览文件 @
35d7d1f0
...
@@ -22,20 +22,22 @@ limitations under the License. */
...
@@ -22,20 +22,22 @@ limitations under the License. */
#include <vector>
#include <vector>
#include "glog/logging.h"
#include "glog/logging.h"
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/allocator.h"
#include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/errors.h"
#include "paddle/utils/none.h"
#include "paddle/utils/none.h"
#include "paddle/utils/optional.h"
#include "paddle/utils/optional.h"
namespace
paddle
{
namespace
phi
{
namespace
framework
{
template
<
class
T
>
template
<
class
T
>
using
Vector
=
std
::
vector
<
T
>
;
using
Vector
=
std
::
vector
<
T
>
;
inline
paddle
::
optional
<
p
latform
::
CUDA
Place
>
OptionalCUDAPlace
(
inline
paddle
::
optional
<
p
hi
::
GPU
Place
>
OptionalCUDAPlace
(
const
p
addle
::
memory
::
allocation
::
AllocationPtr
&
gpu_
)
{
const
p
hi
::
Allocator
::
AllocationPtr
&
gpu_
)
{
return
gpu_
==
nullptr
?
paddle
::
none
return
gpu_
==
nullptr
?
paddle
::
none
:
paddle
::
optional
<
p
latform
::
CUDA
Place
>
(
gpu_
->
place
());
:
paddle
::
optional
<
p
hi
::
GPU
Place
>
(
gpu_
->
place
());
}
}
// Vector<T> implements the std::vector interface, and can get Data or
// Vector<T> implements the std::vector interface, and can get Data or
...
@@ -146,18 +148,18 @@ class MixVector {
...
@@ -146,18 +148,18 @@ class MixVector {
}
}
// get cuda ptr. immutable
// get cuda ptr. immutable
const
T
*
CUDAData
(
p
latform
::
Place
place
)
const
{
const
T
*
CUDAData
(
p
hi
::
Place
place
)
const
{
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
pla
tform
::
is_gpu_place
(
place
)
,
pla
ce
.
GetType
()
==
phi
::
AllocationType
::
GPU
,
true
,
true
,
p
latform
::
errors
::
Unavailable
(
p
hi
::
errors
::
Unavailable
(
"Place mismatch, CUDA Data must be on CUDA place."
));
"Place mismatch, CUDA Data must be on CUDA place."
));
ImmutableCUDA
(
place
);
ImmutableCUDA
(
place
);
return
reinterpret_cast
<
T
*>
(
gpu_
->
ptr
());
return
reinterpret_cast
<
T
*>
(
gpu_
->
ptr
());
}
}
// get cuda ptr. mutable
// get cuda ptr. mutable
T
*
CUDAMutableData
(
p
latform
::
Place
place
)
{
T
*
CUDAMutableData
(
p
hi
::
Place
place
)
{
const
T
*
ptr
=
CUDAData
(
place
);
const
T
*
ptr
=
CUDAData
(
place
);
flag_
=
kDirty
|
kDataInCUDA
;
flag_
=
kDirty
|
kDataInCUDA
;
return
const_cast
<
T
*>
(
ptr
);
return
const_cast
<
T
*>
(
ptr
);
...
@@ -178,7 +180,7 @@ class MixVector {
...
@@ -178,7 +180,7 @@ class MixVector {
std
::
mutex
&
Mutex
()
const
{
return
mtx_
;
}
std
::
mutex
&
Mutex
()
const
{
return
mtx_
;
}
paddle
::
optional
<
p
latform
::
CUDA
Place
>
CUDAPlace
()
const
{
paddle
::
optional
<
p
hi
::
GPU
Place
>
CUDAPlace
()
const
{
return
OptionalCUDAPlace
(
gpu_
);
return
OptionalCUDAPlace
(
gpu_
);
}
}
...
@@ -199,7 +201,7 @@ class MixVector {
...
@@ -199,7 +201,7 @@ class MixVector {
void
CopyToCPU
()
const
;
void
CopyToCPU
()
const
;
void
ImmutableCUDA
(
p
latform
::
Place
place
)
const
{
void
ImmutableCUDA
(
p
hi
::
Place
place
)
const
{
if
(
IsDirty
())
{
if
(
IsDirty
())
{
if
(
IsInCPU
())
{
if
(
IsInCPU
())
{
CopyCPUDataToCUDA
(
place
);
CopyCPUDataToCUDA
(
place
);
...
@@ -207,7 +209,7 @@ class MixVector {
...
@@ -207,7 +209,7 @@ class MixVector {
SetFlag
(
kDataInCUDA
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
IsInCUDA
()
&&
!
(
place
==
gpu_
->
place
()))
{
}
else
if
(
IsInCUDA
()
&&
!
(
place
==
gpu_
->
place
()))
{
PADDLE_THROW
(
PADDLE_THROW
(
p
latform
::
errors
::
Unavailable
(
"Unexpected data place mismatch."
));
p
hi
::
errors
::
Unavailable
(
"Unexpected data place mismatch."
));
// Still dirty
// Still dirty
}
else
{
}
else
{
// Dirty && DataInCUDA && Device is same
// Dirty && DataInCUDA && Device is same
...
@@ -220,7 +222,7 @@ class MixVector {
...
@@ -220,7 +222,7 @@ class MixVector {
SetFlag
(
kDataInCUDA
);
SetFlag
(
kDataInCUDA
);
}
else
if
(
!
(
place
==
gpu_
->
place
()))
{
}
else
if
(
!
(
place
==
gpu_
->
place
()))
{
PADDLE_THROW
(
PADDLE_THROW
(
p
latform
::
errors
::
Unavailable
(
"Unexpected data place mismatch."
));
p
hi
::
errors
::
Unavailable
(
"Unexpected data place mismatch."
));
}
else
{
}
else
{
// Not Dirty && DataInCUDA && Device is same
// Not Dirty && DataInCUDA && Device is same
// Do nothing.
// Do nothing.
...
@@ -228,7 +230,7 @@ class MixVector {
...
@@ -228,7 +230,7 @@ class MixVector {
}
}
}
}
void
CopyCPUDataToCUDA
(
const
p
latform
::
Place
&
place
)
const
;
void
CopyCPUDataToCUDA
(
const
p
hi
::
Place
&
place
)
const
;
void
ImmutableCPU
()
const
{
void
ImmutableCPU
()
const
{
if
(
IsDirty
()
&&
!
IsInCPU
())
{
// If data has been changed in CUDA, or
if
(
IsDirty
()
&&
!
IsInCPU
())
{
// If data has been changed in CUDA, or
...
@@ -249,7 +251,7 @@ class MixVector {
...
@@ -249,7 +251,7 @@ class MixVector {
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
bool
IsInCPU
()
const
{
return
flag_
&
kDataInCPU
;
}
std
::
vector
<
T
>
*
cpu_
;
std
::
vector
<
T
>
*
cpu_
;
mutable
p
addle
::
memory
::
allocation
::
AllocationPtr
gpu_
;
mutable
p
hi
::
Allocator
::
AllocationPtr
gpu_
;
mutable
size_t
gpu_memory_size_
{
0
};
mutable
size_t
gpu_memory_size_
{
0
};
mutable
int
flag_
;
mutable
int
flag_
;
...
@@ -332,9 +334,9 @@ class MixVector {
...
@@ -332,9 +334,9 @@ class MixVector {
}
}
// get cuda ptr. immutable
// get cuda ptr. immutable
const
T
*
CUDAData
(
p
latform
::
Place
place
)
const
{
const
T
*
CUDAData
(
p
hi
::
Place
place
)
const
{
{
{
p
latform
::
CUDA
Place
p
(
place
.
GetDeviceId
());
p
hi
::
GPU
Place
p
(
place
.
GetDeviceId
());
auto
&
mtx
=
m_
->
Mutex
();
auto
&
mtx
=
m_
->
Mutex
();
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx
);
auto
cuda_place
=
m_
->
CUDAPlace
();
auto
cuda_place
=
m_
->
CUDAPlace
();
...
@@ -348,9 +350,9 @@ class MixVector {
...
@@ -348,9 +350,9 @@ class MixVector {
}
}
// get cuda ptr. mutable
// get cuda ptr. mutable
T
*
CUDAMutableData
(
p
latform
::
Place
place
)
{
T
*
CUDAMutableData
(
p
hi
::
Place
place
)
{
{
{
p
latform
::
CUDA
Place
p
(
place
.
GetDeviceId
());
p
hi
::
GPU
Place
p
(
place
.
GetDeviceId
());
auto
&
mtx
=
m_
->
Mutex
();
auto
&
mtx
=
m_
->
Mutex
();
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx
);
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx
);
auto
cuda_place
=
m_
->
CUDAPlace
();
auto
cuda_place
=
m_
->
CUDAPlace
();
...
@@ -372,8 +374,8 @@ class MixVector {
...
@@ -372,8 +374,8 @@ class MixVector {
void
reserve
(
size_t
size
)
{
m_
->
reserve
(
size
);
}
void
reserve
(
size_t
size
)
{
m_
->
reserve
(
size
);
}
// the unify method to access CPU or CUDA data. immutable.
// the unify method to access CPU or CUDA data. immutable.
const
T
*
Data
(
p
latform
::
Place
place
)
const
{
const
T
*
Data
(
p
hi
::
Place
place
)
const
{
if
(
pla
tform
::
is_gpu_place
(
place
)
)
{
if
(
pla
ce
.
GetType
()
==
phi
::
AllocationType
::
GPU
)
{
return
CUDAData
(
place
);
return
CUDAData
(
place
);
}
else
{
}
else
{
return
data
();
return
data
();
...
@@ -381,8 +383,8 @@ class MixVector {
...
@@ -381,8 +383,8 @@ class MixVector {
}
}
// the unify method to access CPU or CUDA data. mutable.
// the unify method to access CPU or CUDA data. mutable.
T
*
MutableData
(
p
latform
::
Place
place
)
{
T
*
MutableData
(
p
hi
::
Place
place
)
{
if
(
pla
tform
::
is_gpu_place
(
place
)
)
{
if
(
pla
ce
.
GetType
()
==
phi
::
AllocationType
::
GPU
)
{
return
CUDAMutableData
(
place
);
return
CUDAMutableData
(
place
);
}
else
{
}
else
{
return
data
();
return
data
();
...
@@ -397,5 +399,4 @@ class MixVector {
...
@@ -397,5 +399,4 @@ class MixVector {
mutable
std
::
unique_ptr
<
VectorData
>
m_
;
mutable
std
::
unique_ptr
<
VectorData
>
m_
;
};
};
};
// namespace framework
};
// namespace phi
}
// namespace paddle
paddle/phi/kernels/cpu/edit_distance_kernel.cc
浏览文件 @
35d7d1f0
...
@@ -14,10 +14,10 @@
...
@@ -14,10 +14,10 @@
#include "paddle/phi/kernels/edit_distance_kernel.h"
#include "paddle/phi/kernels/edit_distance_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
namespace
phi
{
namespace
phi
{
...
@@ -34,8 +34,8 @@ void EditDistanceKernel(const Context& ctx,
...
@@ -34,8 +34,8 @@ void EditDistanceKernel(const Context& ctx,
int64_t
*
seq_num_data
=
ctx
.
template
Alloc
<
int64_t
>(
sequencenum
);
int64_t
*
seq_num_data
=
ctx
.
template
Alloc
<
int64_t
>(
sequencenum
);
auto
batch_size
=
hyps
.
dims
()[
0
];
auto
batch_size
=
hyps
.
dims
()[
0
];
p
addle
::
framework
::
Vector
<
size_t
>
hyp_lod
(
batch_size
+
1
);
p
hi
::
Vector
<
size_t
>
hyp_lod
(
batch_size
+
1
);
p
addle
::
framework
::
Vector
<
size_t
>
ref_lod
(
batch_size
+
1
);
p
hi
::
Vector
<
size_t
>
ref_lod
(
batch_size
+
1
);
bool
use_length
=
hypslength
.
get_ptr
()
!=
nullptr
;
bool
use_length
=
hypslength
.
get_ptr
()
!=
nullptr
;
...
...
paddle/phi/kernels/funcs/selected_rows_functor.cc
浏览文件 @
35d7d1f0
...
@@ -14,8 +14,8 @@ limitations under the License. */
...
@@ -14,8 +14,8 @@ limitations under the License. */
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/phi/kernels/funcs/selected_rows_functor.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/phi/core/mixed_vector.h"
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/phi/backends/onednn/axpy_handler.h"
#include "paddle/phi/backends/onednn/axpy_handler.h"
...
@@ -200,7 +200,7 @@ struct SelectedRowsAddTo<phi::CPUContext, T> {
...
@@ -200,7 +200,7 @@ struct SelectedRowsAddTo<phi::CPUContext, T> {
auto
*
in2_value
=
input2
->
mutable_value
();
auto
*
in2_value
=
input2
->
mutable_value
();
// concat rows
// concat rows
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_in2_rows
(
&
in2_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_in2_rows
(
&
in2_rows
);
mixv_in2_rows
.
Extend
(
in1_rows
.
begin
(),
in1_rows
.
end
());
mixv_in2_rows
.
Extend
(
in1_rows
.
begin
(),
in1_rows
.
end
());
auto
in1_place
=
input1
.
place
();
auto
in1_place
=
input1
.
place
();
...
@@ -254,7 +254,7 @@ struct SelectedRowsSumTo<phi::CPUContext, T> {
...
@@ -254,7 +254,7 @@ struct SelectedRowsSumTo<phi::CPUContext, T> {
std
::
vector
<
int64_t
>
in2_rows
;
std
::
vector
<
int64_t
>
in2_rows
;
in2_rows
.
reserve
(
in2_rows
.
size
()
+
size
);
in2_rows
.
reserve
(
in2_rows
.
size
()
+
size
);
for
(
auto
iter
=
input1
.
begin
();
iter
!=
input1
.
end
();
++
iter
)
{
for
(
auto
iter
=
input1
.
begin
();
iter
!=
input1
.
end
();
++
iter
)
{
const
p
addle
::
framework
::
Vector
<
int64_t
>&
in_rows
=
(
*
iter
)
->
rows
();
const
p
hi
::
Vector
<
int64_t
>&
in_rows
=
(
*
iter
)
->
rows
();
in2_rows
.
insert
(
in2_rows
.
end
(),
in_rows
.
begin
(),
in_rows
.
end
());
in2_rows
.
insert
(
in2_rows
.
end
(),
in_rows
.
begin
(),
in_rows
.
end
());
}
}
input2
->
set_rows
(
in2_rows
);
input2
->
set_rows
(
in2_rows
);
...
@@ -646,7 +646,7 @@ struct MergeAdd<phi::XPUContext, T> {
...
@@ -646,7 +646,7 @@ struct MergeAdd<phi::XPUContext, T> {
const
phi
::
SelectedRows
&
input
,
const
phi
::
SelectedRows
&
input
,
phi
::
SelectedRows
*
output
,
phi
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
)
{
const
bool
sorted_result
=
false
)
{
p
addle
::
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
p
hi
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
if
(
input_rows
.
size
()
==
0
)
{
if
(
input_rows
.
size
()
==
0
)
{
return
;
return
;
}
}
...
...
paddle/phi/kernels/funcs/selected_rows_functor.cu
浏览文件 @
35d7d1f0
...
@@ -40,7 +40,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
...
@@ -40,7 +40,7 @@ struct SelectedRowsAdd<phi::GPUContext, T> {
input2
.
height
()));
input2
.
height
()));
output
->
set_height
(
in1_height
);
output
->
set_height
(
in1_height
);
p
addle
::
framework
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
());
p
hi
::
Vector
<
int64_t
>
in1_rows
(
input1
.
rows
());
auto
&
in2_rows
=
input2
.
rows
();
auto
&
in2_rows
=
input2
.
rows
();
std
::
vector
<
int64_t
>
out_rows
;
std
::
vector
<
int64_t
>
out_rows
;
out_rows
.
reserve
(
in1_rows
.
size
()
+
in2_rows
.
size
());
out_rows
.
reserve
(
in1_rows
.
size
()
+
in2_rows
.
size
());
...
@@ -189,7 +189,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
...
@@ -189,7 +189,7 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
const
int
block_size
=
256
;
const
int
block_size
=
256
;
dim3
threads
(
block_size
,
1
);
dim3
threads
(
block_size
,
1
);
dim3
grid
(
in1_rows
.
size
(),
1
);
dim3
grid
(
in1_rows
.
size
(),
1
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_in1_rows
(
&
in1_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_in1_rows
(
&
in1_rows
);
SelectedRowsAddTensorKernel
<
T
,
block_size
>
SelectedRowsAddTensorKernel
<
T
,
block_size
>
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
in1_data
,
in1_data
,
...
@@ -231,7 +231,7 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
...
@@ -231,7 +231,7 @@ struct SelectedRowsAddTo<phi::GPUContext, T> {
auto
*
in2_value
=
input2
->
mutable_value
();
auto
*
in2_value
=
input2
->
mutable_value
();
// concat rows
// concat rows
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_in2_rows
(
&
in2_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_in2_rows
(
&
in2_rows
);
if
(
in1_rows
.
size
())
{
if
(
in1_rows
.
size
())
{
mixv_in2_rows
.
Extend
(
in1_rows
.
begin
(),
in1_rows
.
end
());
mixv_in2_rows
.
Extend
(
in1_rows
.
begin
(),
in1_rows
.
end
());
}
}
...
@@ -318,7 +318,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
...
@@ -318,7 +318,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
const
int
block_size
=
256
;
const
int
block_size
=
256
;
dim3
threads
(
block_size
,
1
);
dim3
threads
(
block_size
,
1
);
dim3
grid
(
in1_rows
.
size
(),
1
);
dim3
grid
(
in1_rows
.
size
(),
1
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_in1_rows
(
&
in1_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_in1_rows
(
&
in1_rows
);
SelectedRowsAddToTensorKernel
<
T
,
block_size
>
SelectedRowsAddToTensorKernel
<
T
,
block_size
>
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
<<<
grid
,
threads
,
0
,
context
.
stream
()
>>>
(
in1_data
,
in1_data
,
...
@@ -378,7 +378,7 @@ struct MergeAddImpl {
...
@@ -378,7 +378,7 @@ struct MergeAddImpl {
const
phi
::
SelectedRows
&
input
,
const
phi
::
SelectedRows
&
input
,
phi
::
SelectedRows
*
output
,
phi
::
SelectedRows
*
output
,
const
bool
sorted_result
=
false
)
{
const
bool
sorted_result
=
false
)
{
p
addle
::
framework
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
p
hi
::
Vector
<
int64_t
>
input_rows
(
input
.
rows
());
if
(
input_rows
.
size
()
==
0
)
{
if
(
input_rows
.
size
()
==
0
)
{
return
;
return
;
}
}
...
@@ -386,7 +386,7 @@ struct MergeAddImpl {
...
@@ -386,7 +386,7 @@ struct MergeAddImpl {
phi
::
SelectedRows
&
out
=
*
output
;
phi
::
SelectedRows
&
out
=
*
output
;
std
::
set
<
int64_t
>
row_set
(
input_rows
.
begin
(),
input_rows
.
end
());
std
::
set
<
int64_t
>
row_set
(
input_rows
.
begin
(),
input_rows
.
end
());
std
::
vector
<
int64_t
>
merge_rows_cpu
(
row_set
.
begin
(),
row_set
.
end
());
std
::
vector
<
int64_t
>
merge_rows_cpu
(
row_set
.
begin
(),
row_set
.
end
());
p
addle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
p
hi
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
auto
input_width
=
input
.
value
().
dims
()[
1
];
auto
input_width
=
input
.
value
().
dims
()[
1
];
...
@@ -407,8 +407,8 @@ struct MergeAddImpl {
...
@@ -407,8 +407,8 @@ struct MergeAddImpl {
dim3
threads
(
block_size
,
1
);
dim3
threads
(
block_size
,
1
);
dim3
grid1
(
input_rows
.
size
(),
1
);
dim3
grid1
(
input_rows
.
size
(),
1
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mix_vector_input
(
&
input_rows
);
p
hi
::
MixVector
<
int64_t
>
mix_vector_input
(
&
input_rows
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mix_vector_out
(
out
.
mutable_rows
());
p
hi
::
MixVector
<
int64_t
>
mix_vector_out
(
out
.
mutable_rows
());
MergeAddKernel
<
T
,
256
><<<
grid1
,
threads
,
0
,
context
.
stream
()
>>>
(
MergeAddKernel
<
T
,
256
><<<
grid1
,
threads
,
0
,
context
.
stream
()
>>>
(
input_data
,
input_data
,
mix_vector_input
.
CUDAData
(
context
.
GetPlace
()),
mix_vector_input
.
CUDAData
(
context
.
GetPlace
()),
...
@@ -459,7 +459,7 @@ struct MergeAddImpl {
...
@@ -459,7 +459,7 @@ struct MergeAddImpl {
}
}
std
::
vector
<
int64_t
>
merge_rows_cpu
(
merged_row_set
.
begin
(),
std
::
vector
<
int64_t
>
merge_rows_cpu
(
merged_row_set
.
begin
(),
merged_row_set
.
end
());
merged_row_set
.
end
());
p
addle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
p
hi
::
Vector
<
int64_t
>
merge_rows
(
merge_rows_cpu
);
out
.
set_rows
(
merge_rows
);
out
.
set_rows
(
merge_rows
);
out
.
set_height
(
input_height
);
out
.
set_height
(
input_height
);
...
@@ -485,8 +485,8 @@ struct MergeAddImpl {
...
@@ -485,8 +485,8 @@ struct MergeAddImpl {
auto
&
input_rows
=
input
->
rows
();
auto
&
input_rows
=
input
->
rows
();
dim3
grid1
(
input_rows
.
size
(),
1
);
dim3
grid1
(
input_rows
.
size
(),
1
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mix_vector_input
(
&
input_rows
);
p
hi
::
MixVector
<
int64_t
>
mix_vector_input
(
&
input_rows
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mix_vector_out
(
out
.
mutable_rows
());
p
hi
::
MixVector
<
int64_t
>
mix_vector_out
(
out
.
mutable_rows
());
MergeAddKernel
<
T
,
256
><<<
grid1
,
threads
,
0
,
context
.
stream
()
>>>
(
MergeAddKernel
<
T
,
256
><<<
grid1
,
threads
,
0
,
context
.
stream
()
>>>
(
input_data
,
input_data
,
mix_vector_input
.
CUDAData
(
context
.
GetPlace
()),
mix_vector_input
.
CUDAData
(
context
.
GetPlace
()),
...
...
paddle/phi/kernels/funcs/sequence2batch.cc
浏览文件 @
35d7d1f0
...
@@ -22,7 +22,7 @@ class CopyMatrixRowsFunctor<phi::CPUContext, T> {
...
@@ -22,7 +22,7 @@ class CopyMatrixRowsFunctor<phi::CPUContext, T> {
public:
public:
void
operator
()(
const
phi
::
CPUContext
&
context
,
void
operator
()(
const
phi
::
CPUContext
&
context
,
const
phi
::
DenseTensor
&
src
,
const
phi
::
DenseTensor
&
src
,
p
addle
::
framework
::
Vector
<
size_t
>
index_lod
,
p
hi
::
Vector
<
size_t
>
index_lod
,
phi
::
DenseTensor
*
dst
,
phi
::
DenseTensor
*
dst
,
bool
is_src_index
)
{
bool
is_src_index
)
{
size_t
*
index
=
index_lod
.
data
();
size_t
*
index
=
index_lod
.
data
();
...
...
paddle/phi/kernels/funcs/sequence2batch.cu
浏览文件 @
35d7d1f0
...
@@ -43,7 +43,7 @@ class CopyMatrixRowsFunctor<phi::GPUContext, T> {
...
@@ -43,7 +43,7 @@ class CopyMatrixRowsFunctor<phi::GPUContext, T> {
public:
public:
void
operator
()(
const
phi
::
GPUContext
&
context
,
void
operator
()(
const
phi
::
GPUContext
&
context
,
const
phi
::
DenseTensor
&
src
,
const
phi
::
DenseTensor
&
src
,
p
addle
::
framework
::
Vector
<
size_t
>
index_lod
,
p
hi
::
Vector
<
size_t
>
index_lod
,
phi
::
DenseTensor
*
dst
,
phi
::
DenseTensor
*
dst
,
bool
is_src_index
)
{
bool
is_src_index
)
{
auto
src_dims
=
src
.
dims
();
auto
src_dims
=
src
.
dims
();
...
@@ -79,7 +79,7 @@ class CopyMatrixRowsFunctor<phi::GPUContext, T> {
...
@@ -79,7 +79,7 @@ class CopyMatrixRowsFunctor<phi::GPUContext, T> {
dim3
threads
(
128
,
8
);
dim3
threads
(
128
,
8
);
dim3
grid
(
8
,
1
);
dim3
grid
(
8
,
1
);
auto
stream
=
context
.
stream
();
auto
stream
=
context
.
stream
();
p
addle
::
framework
::
MixVector
<
size_t
>
mix_index_lod
(
&
index_lod
);
p
hi
::
MixVector
<
size_t
>
mix_index_lod
(
&
index_lod
);
CopyMatrixRowsKernel
<
T
,
128
,
8
,
8
><<<
grid
,
threads
,
0
,
stream
>>>
(
CopyMatrixRowsKernel
<
T
,
128
,
8
,
8
><<<
grid
,
threads
,
0
,
stream
>>>
(
src_data
,
src_data
,
dst_data
,
dst_data
,
...
...
paddle/phi/kernels/funcs/sequence2batch.h
浏览文件 @
35d7d1f0
...
@@ -38,7 +38,7 @@ class CopyMatrixRowsFunctor {
...
@@ -38,7 +38,7 @@ class CopyMatrixRowsFunctor {
// The indexed rows are based on the input index.
// The indexed rows are based on the input index.
void
operator
()(
const
DeviceContext
&
context
,
void
operator
()(
const
DeviceContext
&
context
,
const
phi
::
DenseTensor
&
src
,
const
phi
::
DenseTensor
&
src
,
p
addle
::
framework
::
Vector
<
size_t
>
index_lod
,
p
hi
::
Vector
<
size_t
>
index_lod
,
phi
::
DenseTensor
*
dst
,
phi
::
DenseTensor
*
dst
,
bool
is_src_index
);
bool
is_src_index
);
};
};
...
...
paddle/phi/kernels/funcs/sequence_scale.cu
浏览文件 @
35d7d1f0
...
@@ -46,7 +46,7 @@ class ScaleLoDTensorFunctor<phi::GPUContext, T> {
...
@@ -46,7 +46,7 @@ class ScaleLoDTensorFunctor<phi::GPUContext, T> {
const
size_t
seq_width
=
seq
->
numel
()
/
seq
->
dims
()[
0
];
const
size_t
seq_width
=
seq
->
numel
()
/
seq
->
dims
()[
0
];
auto
abs_offset_lod
=
paddle
::
framework
::
ToAbsOffset
(
lod
);
auto
abs_offset_lod
=
paddle
::
framework
::
ToAbsOffset
(
lod
);
T
*
seq_data
=
context
.
template
Alloc
<
T
>(
seq
);
T
*
seq_data
=
context
.
template
Alloc
<
T
>(
seq
);
p
addle
::
framework
::
MixVector
<
size_t
>
mix_vector
(
&
(
abs_offset_lod
[
level
]));
p
hi
::
MixVector
<
size_t
>
mix_vector
(
&
(
abs_offset_lod
[
level
]));
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL
(
hipLaunchKernelGGL
(
...
...
paddle/phi/kernels/gpu/adagrad_kernel.cu
浏览文件 @
35d7d1f0
...
@@ -88,7 +88,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
...
@@ -88,7 +88,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
phi
::
funcs
::
scatter
::
MergeAdd
<
phi
::
GPUContext
,
T
>
merge_func
;
auto
grad_merge
=
merge_func
(
context
,
grad
);
auto
grad_merge
=
merge_func
(
context
,
grad
);
auto
*
grad_merge_data
=
grad_merge
.
mutable_value
()
->
template
data
<
T
>();
auto
*
grad_merge_data
=
grad_merge
.
mutable_value
()
->
template
data
<
T
>();
p
addle
::
framework
::
Vector
<
int64_t
>
merge_rows
(
grad_merge
.
rows
());
p
hi
::
Vector
<
int64_t
>
merge_rows
(
grad_merge
.
rows
());
// 2. m += g_m * g_m
// 2. m += g_m * g_m
auto
grad_square
=
auto
grad_square
=
SquareSelectedRows
<
phi
::
GPUContext
,
T
>
(
context
,
grad_merge
);
SquareSelectedRows
<
phi
::
GPUContext
,
T
>
(
context
,
grad_merge
);
...
@@ -104,7 +104,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
...
@@ -104,7 +104,7 @@ struct SparseAdagradFunctor<phi::GPUContext, T> {
const
int
block_size
=
256
;
const
int
block_size
=
256
;
dim3
threads
(
block_size
,
1
);
dim3
threads
(
block_size
,
1
);
dim3
grid2
(
1
,
merge_rows
.
size
());
dim3
grid2
(
1
,
merge_rows
.
size
());
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_merge_rows
(
&
merge_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_merge_rows
(
&
merge_rows
);
SparseAdagradFunctorKernel
<
T
,
256
>
SparseAdagradFunctorKernel
<
T
,
256
>
<<<
grid2
,
<<<
grid2
,
threads
,
threads
,
...
...
paddle/phi/kernels/gpu/edit_distance_kernel.cu
浏览文件 @
35d7d1f0
...
@@ -87,8 +87,8 @@ void EditDistanceKernel(const Context& ctx,
...
@@ -87,8 +87,8 @@ void EditDistanceKernel(const Context& ctx,
auto
stream
=
reinterpret_cast
<
const
phi
::
GPUContext
&>
(
ctx
).
stream
();
auto
stream
=
reinterpret_cast
<
const
phi
::
GPUContext
&>
(
ctx
).
stream
();
p
addle
::
framework
::
Vector
<
size_t
>
hyp_lod
(
batch_size
+
1
);
p
hi
::
Vector
<
size_t
>
hyp_lod
(
batch_size
+
1
);
p
addle
::
framework
::
Vector
<
size_t
>
ref_lod
(
batch_size
+
1
);
p
hi
::
Vector
<
size_t
>
ref_lod
(
batch_size
+
1
);
bool
use_length
=
hypslength
.
get_ptr
()
!=
nullptr
;
bool
use_length
=
hypslength
.
get_ptr
()
!=
nullptr
;
...
...
paddle/phi/kernels/gpu/embedding_grad_kernel.cu
浏览文件 @
35d7d1f0
...
@@ -14,12 +14,12 @@
...
@@ -14,12 +14,12 @@
#include "paddle/phi/kernels/embedding_grad_kernel.h"
#include "paddle/phi/kernels/embedding_grad_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/embedding_util.h"
#include "paddle/phi/kernels/funcs/embedding_util.h"
...
@@ -173,11 +173,11 @@ struct EmbeddingSparseGradCUDAFunctor {
...
@@ -173,11 +173,11 @@ struct EmbeddingSparseGradCUDAFunctor {
dim3
threads
(
128
,
8
);
dim3
threads
(
128
,
8
);
dim3
grids
(
8
,
1
);
dim3
grids
(
8
,
1
);
auto
stream
=
dev_ctx_
.
stream
();
auto
stream
=
dev_ctx_
.
stream
();
p
addle
::
framework
::
Vector
<
int64_t
>
new_rows
;
p
hi
::
Vector
<
int64_t
>
new_rows
;
new_rows
.
resize
(
ids_num
);
new_rows
.
resize
(
ids_num
);
auto
gpu_place
=
dev_ctx_
.
GetPlace
();
auto
gpu_place
=
dev_ctx_
.
GetPlace
();
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_new_rows
(
&
new_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_new_rows
(
&
new_rows
);
if
(
!
std
::
is_same
<
IdT
,
int64_t
>::
value
)
{
if
(
!
std
::
is_same
<
IdT
,
int64_t
>::
value
)
{
InputTypeConvert
<<<
grids
,
threads
,
0
,
stream
>>>
(
InputTypeConvert
<<<
grids
,
threads
,
0
,
stream
>>>
(
ids_data
,
ids_num
,
mixv_new_rows
.
MutableData
(
gpu_place
));
ids_data
,
ids_num
,
mixv_new_rows
.
MutableData
(
gpu_place
));
...
...
paddle/phi/kernels/gpu/sgd_kernel.cu
浏览文件 @
35d7d1f0
...
@@ -14,12 +14,12 @@
...
@@ -14,12 +14,12 @@
#include "paddle/phi/kernels/sgd_kernel.h"
#include "paddle/phi/kernels/sgd_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/backends/gpu/gpu_helper.h"
#include "paddle/phi/backends/gpu/gpu_helper.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_primitives.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
namespace
phi
{
namespace
phi
{
...
@@ -156,7 +156,7 @@ void SGDDenseParamSparseGradKernel(
...
@@ -156,7 +156,7 @@ void SGDDenseParamSparseGradKernel(
int
thread_x
=
kThreadsPerBlock
;
int
thread_x
=
kThreadsPerBlock
;
int
max_threads
=
dev_ctx
.
GetMaxPhysicalThreadCount
();
int
max_threads
=
dev_ctx
.
GetMaxPhysicalThreadCount
();
int
max_blocks
=
std
::
max
(
max_threads
/
kThreadsPerBlock
,
1
);
int
max_blocks
=
std
::
max
(
max_threads
/
kThreadsPerBlock
,
1
);
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_in_rows
(
&
in_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_in_rows
(
&
in_rows
);
SparseSGDFunctorKernel
<<<
max_blocks
,
thread_x
,
0
,
dev_ctx
.
stream
()
>>>
(
SparseSGDFunctorKernel
<<<
max_blocks
,
thread_x
,
0
,
dev_ctx
.
stream
()
>>>
(
in_data
,
in_data
,
mixv_in_rows
.
CUDAData
(
dev_ctx
.
GetPlace
()),
mixv_in_rows
.
CUDAData
(
dev_ctx
.
GetPlace
()),
...
...
paddle/phi/kernels/impl/momentum_kernel_impl.h
浏览文件 @
35d7d1f0
...
@@ -551,7 +551,7 @@ void MomentumSparseImpl(const Context& ctx,
...
@@ -551,7 +551,7 @@ void MomentumSparseImpl(const Context& ctx,
merge_func
(
ctx
,
grad
,
merged_grad
);
merge_func
(
ctx
,
grad
,
merged_grad
);
auto
*
grad_merge_rows
=
merged_grad
->
mutable_rows
();
auto
*
grad_merge_rows
=
merged_grad
->
mutable_rows
();
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
ctx
.
GetPlace
());
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
ctx
.
GetPlace
());
int64_t
row_numel
=
merged_grad
->
value
().
numel
()
/
merged_grad
->
rows
().
size
();
int64_t
row_numel
=
merged_grad
->
value
().
numel
()
/
merged_grad
->
rows
().
size
();
funcs
::
ForRange
<
Context
>
for_range
(
ctx
,
param
.
numel
());
funcs
::
ForRange
<
Context
>
for_range
(
ctx
,
param
.
numel
());
...
...
paddle/phi/kernels/impl/rmsprop_kernel_impl.h
浏览文件 @
35d7d1f0
...
@@ -309,7 +309,7 @@ void RmspropSparseKernel(const Context &ctx,
...
@@ -309,7 +309,7 @@ void RmspropSparseKernel(const Context &ctx,
funcs
::
ForRange
<
Context
>
for_range
(
ctx
,
limit
);
funcs
::
ForRange
<
Context
>
for_range
(
ctx
,
limit
);
auto
&
grad_merge_rows
=
merged_grad
->
rows
();
auto
&
grad_merge_rows
=
merged_grad
->
rows
();
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
&
grad_merge_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
&
grad_merge_rows
);
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
ctx
.
GetPlace
());
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
ctx
.
GetPlace
());
auto
&
merged_tensor
=
merged_grad
->
value
();
auto
&
merged_tensor
=
merged_grad
->
value
();
...
...
paddle/phi/kernels/impl/warpctc_kernel_impl.h
浏览文件 @
35d7d1f0
...
@@ -236,8 +236,8 @@ void WarpctcKernel(const Context& dev_ctx,
...
@@ -236,8 +236,8 @@ void WarpctcKernel(const Context& dev_ctx,
DenseTensor
*
loss
,
DenseTensor
*
loss
,
DenseTensor
*
warpctcgrad
)
{
DenseTensor
*
warpctcgrad
)
{
size_t
num_sequences
,
sequence_width
,
max_sequence_length
;
size_t
num_sequences
,
sequence_width
,
max_sequence_length
;
p
addle
::
framework
::
Vector
<
size_t
>
logits_lod
;
p
hi
::
Vector
<
size_t
>
logits_lod
;
p
addle
::
framework
::
Vector
<
size_t
>
label_lod
;
p
hi
::
Vector
<
size_t
>
label_lod
;
if
(
logits_length
.
is_initialized
()
&&
labels_length
.
is_initialized
())
{
if
(
logits_length
.
is_initialized
()
&&
labels_length
.
is_initialized
())
{
num_sequences
=
logits
.
dims
()[
1
];
num_sequences
=
logits
.
dims
()[
1
];
sequence_width
=
logits
.
dims
()[
2
];
sequence_width
=
logits
.
dims
()[
2
];
...
@@ -397,7 +397,7 @@ void WarpctcKernel(const Context& dev_ctx,
...
@@ -397,7 +397,7 @@ void WarpctcKernel(const Context& dev_ctx,
paddle
::
operators
::
math
::
TotalSequenceLength
(
label_lod
)),
paddle
::
operators
::
math
::
TotalSequenceLength
(
label_lod
)),
1
});
1
});
dev_ctx
.
template
HostAlloc
<
int
>(
&
warpctc_label
);
dev_ctx
.
template
HostAlloc
<
int
>(
&
warpctc_label
);
std
::
vector
<
p
addle
::
framework
::
Vector
<
size_t
>>
lod
;
std
::
vector
<
p
hi
::
Vector
<
size_t
>>
lod
;
lod
.
push_back
(
label_lod
);
lod
.
push_back
(
label_lod
);
warpctc_label
.
set_lod
(
lod
);
warpctc_label
.
set_lod
(
lod
);
...
...
paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
浏览文件 @
35d7d1f0
...
@@ -126,7 +126,7 @@ void AdamDenseParamSparseGradKernel(
...
@@ -126,7 +126,7 @@ void AdamDenseParamSparseGradKernel(
auto
&
grad_tensor
=
grad_merge
.
value
();
auto
&
grad_tensor
=
grad_merge
.
value
();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
auto
*
grad_merge_rows
=
&
grad_merge
.
rows
();
auto
*
grad_merge_rows
=
&
grad_merge
.
rows
();
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
dev_ctx
.
GetPlace
());
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
dev_ctx
.
GetPlace
());
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
...
...
paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
浏览文件 @
35d7d1f0
...
@@ -198,7 +198,7 @@ void AdamDenseParamSparseGradKernel(
...
@@ -198,7 +198,7 @@ void AdamDenseParamSparseGradKernel(
auto
&
grad_tensor
=
grad_merge
.
value
();
auto
&
grad_tensor
=
grad_merge
.
value
();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
auto
*
grad_merge_rows
=
&
grad_merge
.
rows
();
auto
*
grad_merge_rows
=
&
grad_merge
.
rows
();
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
dev_ctx
.
GetPlace
());
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
dev_ctx
.
GetPlace
());
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
...
...
paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
浏览文件 @
35d7d1f0
...
@@ -222,7 +222,7 @@ void AdamwDenseParamSparseGradKernel(
...
@@ -222,7 +222,7 @@ void AdamwDenseParamSparseGradKernel(
auto
&
grad_tensor
=
grad_merge
.
value
();
auto
&
grad_tensor
=
grad_merge
.
value
();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
auto
*
grad_merge_rows
=
&
grad_merge
.
rows
();
auto
*
grad_merge_rows
=
&
grad_merge
.
rows
();
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
dev_ctx
.
GetPlace
());
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
dev_ctx
.
GetPlace
());
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
...
...
paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc
浏览文件 @
35d7d1f0
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
#include "paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h"
#include "paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/mixed_vector.h"
#include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h"
#include "paddle/phi/kernels/cpu/hsigmoid_loss_grad.h"
namespace
phi
{
namespace
phi
{
...
@@ -54,7 +54,7 @@ void HSigmoidLossGradKernel(const Context& ctx,
...
@@ -54,7 +54,7 @@ void HSigmoidLossGradKernel(const Context& ctx,
PADDLE_ENFORCE_NOT_NULL
(
PADDLE_ENFORCE_NOT_NULL
(
path
.
get_ptr
(),
path
.
get_ptr
(),
errors
::
NotFound
(
"Custom tree must be set for sparse mode!"
));
errors
::
NotFound
(
"Custom tree must be set for sparse mode!"
));
p
addle
::
framework
::
Vector
<
int64_t
>
real_rows
=
PathToRows
(
*
path
);
p
hi
::
Vector
<
int64_t
>
real_rows
=
PathToRows
(
*
path
);
w_grad
->
set_rows
(
real_rows
);
w_grad
->
set_rows
(
real_rows
);
// Build a map of id -> row_index to speed up finding the index of one id
// Build a map of id -> row_index to speed up finding the index of one id
w_grad
->
set_height
(
w
.
dims
()[
0
]);
w_grad
->
set_height
(
w
.
dims
()[
0
]);
...
...
paddle/phi/kernels/selected_rows/impl/lamb_kernel_impl.h
浏览文件 @
35d7d1f0
...
@@ -221,7 +221,7 @@ void ComputeRowImpl(const Context& dev_ctx,
...
@@ -221,7 +221,7 @@ void ComputeRowImpl(const Context& dev_ctx,
auto
&
grad_tensor
=
grad_merge
.
value
();
auto
&
grad_tensor
=
grad_merge
.
value
();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
const
T
*
grad_data
=
grad_tensor
.
template
data
<
T
>();
auto
*
grad_merge_rows
=
&
grad_merge
.
rows
();
auto
*
grad_merge_rows
=
&
grad_merge
.
rows
();
p
addle
::
framework
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
p
hi
::
MixVector
<
int64_t
>
mixv_grad_merge_rows
(
grad_merge_rows
);
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
dev_ctx
.
GetPlace
());
const
int64_t
*
rows
=
mixv_grad_merge_rows
.
Data
(
dev_ctx
.
GetPlace
());
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
auto
row_numel
=
grad_tensor
.
numel
()
/
grad_merge
.
rows
().
size
();
if
(
paddle
::
platform
::
is_gpu_place
(
dev_ctx
.
GetPlace
())
&&
if
(
paddle
::
platform
::
is_gpu_place
(
dev_ctx
.
GetPlace
())
&&
...
...
paddle/phi/tests/core/CMakeLists.txt
浏览文件 @
35d7d1f0
...
@@ -70,3 +70,20 @@ cc_test(
...
@@ -70,3 +70,20 @@ cc_test(
test_tensor_array
test_tensor_array
SRCS test_tensor_array.cc
SRCS test_tensor_array.cc
DEPS tensor_array
)
DEPS tensor_array
)
if
(
WITH_GPU
)
nv_test
(
test_mixed_vector
SRCS test_mixed_vector.cc test_mixed_vector.cu
DEPS mixed_vector place memory device_context tensor
)
elseif
(
WITH_ROCM
)
hip_test
(
test_mixed_vector
SRCS test_mixed_vector.cc test_mixed_vector.cu
DEPS mixed_vector place memory device_context tensor
)
else
()
cc_test
(
test_mixed_vector
SRCS test_mixed_vector.cc
DEPS mixed_vector place memory device_context tensor
)
endif
()
paddle/
fluid/framework/mixed_vector_test
.cc
→
paddle/
phi/tests/core/test_mixed_vector
.cc
浏览文件 @
35d7d1f0
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/
fluid/framework
/mixed_vector.h"
#include "paddle/
phi/core
/mixed_vector.h"
#include "glog/logging.h"
#include "glog/logging.h"
#include "gtest/gtest-message.h"
#include "gtest/gtest-message.h"
...
@@ -21,7 +21,7 @@
...
@@ -21,7 +21,7 @@
#include "gtest/gtest_pred_impl.h"
#include "gtest/gtest_pred_impl.h"
template
<
typename
T
>
template
<
typename
T
>
using
vec
=
p
addle
::
framework
::
Vector
<
T
>
;
using
vec
=
p
hi
::
Vector
<
T
>
;
TEST
(
mixed_vector
,
CPU_VECTOR
)
{
TEST
(
mixed_vector
,
CPU_VECTOR
)
{
vec
<
int
>
tmp
;
vec
<
int
>
tmp
;
...
@@ -44,7 +44,7 @@ TEST(mixed_vector, CPU_VECTOR) {
...
@@ -44,7 +44,7 @@ TEST(mixed_vector, CPU_VECTOR) {
}
}
TEST
(
mixed_vector
,
InitWithCount
)
{
TEST
(
mixed_vector
,
InitWithCount
)
{
p
addle
::
framework
::
Vector
<
int
>
vec
(
10
,
10
);
p
hi
::
Vector
<
int
>
vec
(
10
,
10
);
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
ASSERT_EQ
(
vec
[
i
],
10
);
ASSERT_EQ
(
vec
[
i
],
10
);
}
}
...
@@ -58,7 +58,7 @@ TEST(mixed_vector, ForEach) {
...
@@ -58,7 +58,7 @@ TEST(mixed_vector, ForEach) {
}
}
TEST
(
mixed_vector
,
Reserve
)
{
TEST
(
mixed_vector
,
Reserve
)
{
p
addle
::
framework
::
Vector
<
int
>
vec
;
p
hi
::
Vector
<
int
>
vec
;
vec
.
reserve
(
1
);
vec
.
reserve
(
1
);
vec
.
push_back
(
0
);
vec
.
push_back
(
0
);
vec
.
push_back
(
0
);
vec
.
push_back
(
0
);
...
@@ -66,7 +66,7 @@ TEST(mixed_vector, Reserve) {
...
@@ -66,7 +66,7 @@ TEST(mixed_vector, Reserve) {
}
}
TEST
(
mixed_vector
,
Resize
)
{
TEST
(
mixed_vector
,
Resize
)
{
p
addle
::
framework
::
Vector
<
int
>
vec
;
p
hi
::
Vector
<
int
>
vec
;
vec
.
resize
(
1
);
vec
.
resize
(
1
);
vec
.
push_back
(
0
);
vec
.
push_back
(
0
);
vec
.
push_back
(
0
);
vec
.
push_back
(
0
);
...
...
paddle/
fluid/framework/mixed_vector_test
.cu
→
paddle/
phi/tests/core/test_mixed_vector
.cu
浏览文件 @
35d7d1f0
...
@@ -23,13 +23,14 @@
...
@@ -23,13 +23,14 @@
#include "glog/logging.h"
#include "glog/logging.h"
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "paddle/fluid/framework/mixed_vector.h"
#include "paddle/phi/backends/all_context.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/mixed_vector.h"
template
<
typename
T
>
template
<
typename
T
>
using
vec
=
p
addle
::
framework
::
MixVector
<
T
>
;
using
vec
=
p
hi
::
MixVector
<
T
>
;
using
gpuStream_t
=
p
addle
::
gpuStream_t
;
using
gpuStream_t
=
p
hi
::
gpuStream_t
;
static
__global__
void
multiply_10
(
int
*
ptr
)
{
static
__global__
void
multiply_10
(
int
*
ptr
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
for
(
int
i
=
0
;
i
<
10
;
++
i
)
{
...
@@ -37,9 +38,9 @@ static __global__ void multiply_10(int* ptr) {
...
@@ -37,9 +38,9 @@ static __global__ void multiply_10(int* ptr) {
}
}
}
}
gpuStream_t
GetCUDAStream
(
p
addle
::
platform
::
CUDA
Place
place
)
{
gpuStream_t
GetCUDAStream
(
p
hi
::
GPU
Place
place
)
{
return
reinterpret_cast
<
const
phi
::
GPUContext
*>
(
return
reinterpret_cast
<
const
phi
::
GPUContext
*>
(
p
addle
::
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
))
p
hi
::
DeviceContextPool
::
Instance
().
Get
(
place
))
->
stream
();
->
stream
();
}
}
...
@@ -50,7 +51,7 @@ TEST(mixed_vector, GPU_VECTOR) {
...
@@ -50,7 +51,7 @@ TEST(mixed_vector, GPU_VECTOR) {
}
}
vec
<
int
>
tmp
(
&
x
);
vec
<
int
>
tmp
(
&
x
);
ASSERT_EQ
(
tmp
.
size
(),
10UL
);
ASSERT_EQ
(
tmp
.
size
(),
10UL
);
p
addle
::
platform
::
CUDA
Place
gpu
(
0
);
p
hi
::
GPU
Place
gpu
(
0
);
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL
(
multiply_10
,
hipLaunchKernelGGL
(
multiply_10
,
...
@@ -69,7 +70,7 @@ TEST(mixed_vector, GPU_VECTOR) {
...
@@ -69,7 +70,7 @@ TEST(mixed_vector, GPU_VECTOR) {
}
}
TEST
(
mixed_vector
,
MultiGPU
)
{
TEST
(
mixed_vector
,
MultiGPU
)
{
if
(
p
addle
::
platform
::
GetGPUDeviceCount
()
<
2
)
{
if
(
p
hi
::
backends
::
gpu
::
GetGPUDeviceCount
()
<
2
)
{
LOG
(
WARNING
)
<<
"Skip mixed_vector.MultiGPU since there are not multiple "
LOG
(
WARNING
)
<<
"Skip mixed_vector.MultiGPU since there are not multiple "
"GPUs in your machine."
;
"GPUs in your machine."
;
return
;
return
;
...
@@ -81,8 +82,8 @@ TEST(mixed_vector, MultiGPU) {
...
@@ -81,8 +82,8 @@ TEST(mixed_vector, MultiGPU) {
}
}
vec
<
int
>
tmp
(
&
x
);
vec
<
int
>
tmp
(
&
x
);
ASSERT_EQ
(
tmp
.
size
(),
10UL
);
ASSERT_EQ
(
tmp
.
size
(),
10UL
);
p
addle
::
platform
::
CUDA
Place
gpu0
(
0
);
p
hi
::
GPU
Place
gpu0
(
0
);
p
addle
::
platform
::
SetDeviceId
(
0
);
p
hi
::
backends
::
gpu
::
SetDeviceId
(
0
);
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL
(
multiply_10
,
hipLaunchKernelGGL
(
multiply_10
,
...
@@ -94,9 +95,9 @@ TEST(mixed_vector, MultiGPU) {
...
@@ -94,9 +95,9 @@ TEST(mixed_vector, MultiGPU) {
#else
#else
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu0
)
>>>
(
tmp
.
MutableData
(
gpu0
));
multiply_10
<<<
1
,
1
,
0
,
GetCUDAStream
(
gpu0
)
>>>
(
tmp
.
MutableData
(
gpu0
));
#endif
#endif
p
addle
::
platform
::
CUDA
Place
gpu1
(
1
);
p
hi
::
GPU
Place
gpu1
(
1
);
auto
*
gpu1_ptr
=
tmp
.
MutableData
(
gpu1
);
auto
*
gpu1_ptr
=
tmp
.
MutableData
(
gpu1
);
p
addle
::
platform
::
SetDeviceId
(
1
);
p
hi
::
backends
::
gpu
::
SetDeviceId
(
1
);
#ifdef PADDLE_WITH_HIP
#ifdef PADDLE_WITH_HIP
hipLaunchKernelGGL
(
hipLaunchKernelGGL
(
...
...
tools/parallel_UT_rule.py
浏览文件 @
35d7d1f0
...
@@ -913,7 +913,7 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
...
@@ -913,7 +913,7 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
'test_mix_precision_all_reduce_fuse'
,
'test_mix_precision_all_reduce_fuse'
,
'test_spp_op'
,
'test_spp_op'
,
'test_op_converter'
,
'test_op_converter'
,
'
mixed_vector_test
'
,
'
test_mixed_vector
'
,
'test_roi_align_op'
,
'test_roi_align_op'
,
'test_pad_constant_like'
,
'test_pad_constant_like'
,
'test_mul_op'
,
'test_mul_op'
,
...
@@ -2288,7 +2288,7 @@ TETRAD_PARALLEL_JOB = [
...
@@ -2288,7 +2288,7 @@ TETRAD_PARALLEL_JOB = [
'device_context_test'
,
'device_context_test'
,
'test_reference_count_pass_last_lived_ops'
,
'test_reference_count_pass_last_lived_ops'
,
'copy_same_tensor_test'
,
'copy_same_tensor_test'
,
'
mixed_vector_test
'
,
'
test_mixed_vector
'
,
'op_registry_test'
,
'op_registry_test'
,
'test_prepare_op'
,
'test_prepare_op'
,
'data_device_transform_test'
,
'data_device_transform_test'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录