Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
30a31a53
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
30a31a53
编写于
11月 28, 2022
作者:
张
张春乔
提交者:
GitHub
11月 28, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
replace LoDTensor with phi::DenseTensor in fluid\operators\*\ except sequence_ops (#48418)
上级
8424cf28
变更
106
隐藏空白更改
内联
并排
Showing
106 changed file
with
683 addition
and
638 deletion
+683
-638
paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
+2
-2
paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
+4
-4
paddle/fluid/operators/cinn/cinn_launch_context.cc
paddle/fluid/operators/cinn/cinn_launch_context.cc
+7
-7
paddle/fluid/operators/cinn/cinn_launch_context_test.cc
paddle/fluid/operators/cinn/cinn_launch_context_test.cc
+3
-4
paddle/fluid/operators/cinn/cinn_launch_op.cc
paddle/fluid/operators/cinn/cinn_launch_op.cc
+3
-3
paddle/fluid/operators/cinn/cinn_launch_op.h
paddle/fluid/operators/cinn/cinn_launch_op.h
+7
-7
paddle/fluid/operators/cinn/cinn_launch_op_test.cc
paddle/fluid/operators/cinn/cinn_launch_op_test.cc
+2
-2
paddle/fluid/operators/cinn/test_helper.h
paddle/fluid/operators/cinn/test_helper.h
+7
-7
paddle/fluid/operators/collective/c_embedding_op.cc
paddle/fluid/operators/collective/c_embedding_op.cc
+1
-1
paddle/fluid/operators/collective/c_embedding_op.cu
paddle/fluid/operators/collective/c_embedding_op.cu
+8
-6
paddle/fluid/operators/collective/c_embedding_op.h
paddle/fluid/operators/collective/c_embedding_op.h
+9
-9
paddle/fluid/operators/collective/c_embedding_op_npu.cc
paddle/fluid/operators/collective/c_embedding_op_npu.cc
+10
-8
paddle/fluid/operators/collective/c_embedding_op_xpu.cc
paddle/fluid/operators/collective/c_embedding_op_xpu.cc
+3
-5
paddle/fluid/operators/controlflow/conditional_block_op.cc
paddle/fluid/operators/controlflow/conditional_block_op.cc
+13
-11
paddle/fluid/operators/controlflow/conditional_block_op_test.cc
.../fluid/operators/controlflow/conditional_block_op_test.cc
+2
-3
paddle/fluid/operators/controlflow/feed_op.cc
paddle/fluid/operators/controlflow/feed_op.cc
+4
-4
paddle/fluid/operators/controlflow/fetch_op.cc
paddle/fluid/operators/controlflow/fetch_op.cc
+5
-3
paddle/fluid/operators/controlflow/fetch_v2_op.cc
paddle/fluid/operators/controlflow/fetch_v2_op.cc
+4
-2
paddle/fluid/operators/controlflow/logical_op.cc
paddle/fluid/operators/controlflow/logical_op.cc
+8
-7
paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
...fluid/operators/controlflow/tensor_array_read_write_op.cc
+7
-6
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+13
-12
paddle/fluid/operators/detection/bbox_util.cu.h
paddle/fluid/operators/detection/bbox_util.cu.h
+0
-1
paddle/fluid/operators/detection/bipartite_match_op.cc
paddle/fluid/operators/detection/bipartite_match_op.cc
+5
-5
paddle/fluid/operators/detection/box_clip_op.cc
paddle/fluid/operators/detection/box_clip_op.cc
+4
-4
paddle/fluid/operators/detection/box_clip_op.cu
paddle/fluid/operators/detection/box_clip_op.cu
+2
-2
paddle/fluid/operators/detection/box_clip_op.h
paddle/fluid/operators/detection/box_clip_op.h
+3
-4
paddle/fluid/operators/detection/box_coder_op.cc
paddle/fluid/operators/detection/box_coder_op.cc
+3
-2
paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
...le/fluid/operators/detection/box_decoder_and_assign_op.cc
+6
-8
paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
+8
-6
paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
+3
-4
paddle/fluid/operators/detection/collect_fpn_proposals_op.h
paddle/fluid/operators/detection/collect_fpn_proposals_op.h
+1
-1
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
.../fluid/operators/detection/distribute_fpn_proposals_op.cc
+4
-2
paddle/fluid/operators/detection/generate_mask_labels_op.cc
paddle/fluid/operators/detection/generate_mask_labels_op.cc
+28
-19
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
.../fluid/operators/detection/generate_proposal_labels_op.cc
+39
-25
paddle/fluid/operators/detection/generate_proposals_op.cc
paddle/fluid/operators/detection/generate_proposals_op.cc
+6
-6
paddle/fluid/operators/detection/generate_proposals_op.cu
paddle/fluid/operators/detection/generate_proposals_op.cu
+2
-3
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
+4
-4
paddle/fluid/operators/detection/iou_similarity_op.cc
paddle/fluid/operators/detection/iou_similarity_op.cc
+14
-13
paddle/fluid/operators/detection/locality_aware_nms_op.cc
paddle/fluid/operators/detection/locality_aware_nms_op.cc
+10
-9
paddle/fluid/operators/detection/matrix_nms_op.cc
paddle/fluid/operators/detection/matrix_nms_op.cc
+5
-4
paddle/fluid/operators/detection/mine_hard_examples_op.cc
paddle/fluid/operators/detection/mine_hard_examples_op.cc
+9
-9
paddle/fluid/operators/detection/multiclass_nms_op.cc
paddle/fluid/operators/detection/multiclass_nms_op.cc
+13
-11
paddle/fluid/operators/detection/retinanet_detection_output_op.cc
...luid/operators/detection/retinanet_detection_output_op.cc
+7
-6
paddle/fluid/operators/detection/roi_perspective_transform_op.cc
...fluid/operators/detection/roi_perspective_transform_op.cc
+4
-5
paddle/fluid/operators/detection/rpn_target_assign_op.cc
paddle/fluid/operators/detection/rpn_target_assign_op.cc
+31
-28
paddle/fluid/operators/detection/target_assign_op.cc
paddle/fluid/operators/detection/target_assign_op.cc
+4
-2
paddle/fluid/operators/elementwise/elementwise_add_op.cc
paddle/fluid/operators/elementwise/elementwise_add_op.cc
+8
-6
paddle/fluid/operators/elementwise/elementwise_div_op.cc
paddle/fluid/operators/elementwise/elementwise_div_op.cc
+8
-6
paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
...le/fluid/operators/elementwise/elementwise_floordiv_op.cc
+8
-6
paddle/fluid/operators/elementwise/elementwise_mul_op.cc
paddle/fluid/operators/elementwise/elementwise_mul_op.cc
+8
-6
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+8
-7
paddle/fluid/operators/elementwise/elementwise_op_function.h
paddle/fluid/operators/elementwise/elementwise_op_function.h
+3
-3
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
+8
-6
paddle/fluid/operators/elementwise/elementwise_xpu.h
paddle/fluid/operators/elementwise/elementwise_xpu.h
+2
-2
paddle/fluid/operators/fused/fused_bn_activation_op.cc
paddle/fluid/operators/fused/fused_bn_activation_op.cc
+2
-4
paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
+2
-4
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
+21
-19
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
+0
-1
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
+1
-1
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+19
-15
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
+2
-2
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
+7
-5
paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
+0
-2
paddle/fluid/operators/fused/fusion_group_op.cc
paddle/fluid/operators/fused/fusion_group_op.cc
+2
-2
paddle/fluid/operators/fused/fusion_gru_op.cc
paddle/fluid/operators/fused/fusion_gru_op.cc
+15
-14
paddle/fluid/operators/fused/fusion_gru_op.h
paddle/fluid/operators/fused/fusion_gru_op.h
+0
-1
paddle/fluid/operators/fused/fusion_lstm_op.cc
paddle/fluid/operators/fused/fusion_lstm_op.cc
+40
-37
paddle/fluid/operators/fused/fusion_lstm_op.h
paddle/fluid/operators/fused/fusion_lstm_op.h
+0
-1
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
+2
-2
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
+0
-1
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
...le/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
+10
-9
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
+0
-1
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
...le/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
+5
-4
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
+0
-1
paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
+5
-4
paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
+0
-1
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
+5
-4
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
+0
-1
paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
+0
-1
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+6
-5
paddle/fluid/operators/fused/multi_gru_op.cc
paddle/fluid/operators/fused/multi_gru_op.cc
+7
-6
paddle/fluid/operators/fused/multi_gru_op.h
paddle/fluid/operators/fused/multi_gru_op.h
+0
-1
paddle/fluid/operators/math/context_project.h
paddle/fluid/operators/math/context_project.h
+4
-6
paddle/fluid/operators/math/sequence_padding.h
paddle/fluid/operators/math/sequence_padding.h
+7
-9
paddle/fluid/operators/math/sequence_pooling.cc
paddle/fluid/operators/math/sequence_pooling.cc
+0
-1
paddle/fluid/operators/math/sequence_scale.h
paddle/fluid/operators/math/sequence_scale.h
+2
-1
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+5
-6
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+0
-1
paddle/fluid/operators/nccl/nccl_op.cu.cc
paddle/fluid/operators/nccl/nccl_op.cu.cc
+6
-8
paddle/fluid/operators/optimizers/adam_op_mlu.cc
paddle/fluid/operators/optimizers/adam_op_mlu.cc
+22
-23
paddle/fluid/operators/optimizers/adam_op_npu.cc
paddle/fluid/operators/optimizers/adam_op_npu.cc
+18
-19
paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
+14
-14
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
+2
-2
paddle/fluid/operators/optimizers/dpsgd_op.cc
paddle/fluid/operators/optimizers/dpsgd_op.cc
+12
-12
paddle/fluid/operators/optimizers/dpsgd_op.h
paddle/fluid/operators/optimizers/dpsgd_op.h
+2
-2
paddle/fluid/operators/optimizers/lamb_op.cc
paddle/fluid/operators/optimizers/lamb_op.cc
+3
-3
paddle/fluid/operators/optimizers/lars_momentum_op.cc
paddle/fluid/operators/optimizers/lars_momentum_op.cc
+13
-13
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+6
-6
paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
+9
-10
paddle/fluid/operators/optimizers/sgd_op.cu
paddle/fluid/operators/optimizers/sgd_op.cu
+2
-2
paddle/fluid/operators/optimizers/sgd_op.h
paddle/fluid/operators/optimizers/sgd_op.h
+2
-2
paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
+4
-3
paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
+6
-5
paddle/fluid/operators/pscore/fake_init_op.cc
paddle/fluid/operators/pscore/fake_init_op.cc
+1
-1
paddle/fluid/operators/reader/create_py_reader_op.cc
paddle/fluid/operators/reader/create_py_reader_op.cc
+1
-1
paddle/fluid/operators/reader/read_op.cc
paddle/fluid/operators/reader/read_op.cc
+1
-1
未找到文件。
paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
浏览文件 @
30a31a53
...
@@ -68,11 +68,11 @@ class CinnInstructionRunOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -68,11 +68,11 @@ class CinnInstructionRunOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
kX
,
AddInput
(
kX
,
"(vector<
LoD
Tensor>)"
"(vector<
phi::Dense
Tensor>)"
"which are the input arguments of this cinn instruction"
)
"which are the input arguments of this cinn instruction"
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
kOutputs
,
AddOutput
(
kOutputs
,
"(vector<
LoD
Tensor>)"
"(vector<
phi::Dense
Tensor>)"
"which are the output arguments of this cinn instruction"
)
"which are the output arguments of this cinn instruction"
)
.
AsDuplicable
();
.
AsDuplicable
();
AddAttr
<
int64_t
>
(
AddAttr
<
int64_t
>
(
...
...
paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
浏览文件 @
30a31a53
...
@@ -74,7 +74,7 @@ class TestCinnInstructionRunOp : public ::testing::Test {
...
@@ -74,7 +74,7 @@ class TestCinnInstructionRunOp : public ::testing::Test {
// so a cinn_instruction_run_op will throw an error
// so a cinn_instruction_run_op will throw an error
framework
::
Scope
scope
;
framework
::
Scope
scope
;
InitVariablesWithRandomValue
<
float
>
({
"x"
,
"y"
},
{
10
,
20
},
place
,
&
scope
);
InitVariablesWithRandomValue
<
float
>
({
"x"
,
"y"
},
{
10
,
20
},
place
,
&
scope
);
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
ASSERT_THROW
(
cinn_instruction_run_op
->
Run
(
scope
,
place
),
ASSERT_THROW
(
cinn_instruction_run_op
->
Run
(
scope
,
place
),
paddle
::
platform
::
EnforceNotMet
);
paddle
::
platform
::
EnforceNotMet
);
...
@@ -83,7 +83,7 @@ class TestCinnInstructionRunOp : public ::testing::Test {
...
@@ -83,7 +83,7 @@ class TestCinnInstructionRunOp : public ::testing::Test {
// of both type float and int
// of both type float and int
cinn_launch_op
->
Run
(
scope
,
place
);
cinn_launch_op
->
Run
(
scope
,
place
);
scope
.
EraseVars
({
"x"
,
"y"
,
test_op_out_name
});
scope
.
EraseVars
({
"x"
,
"y"
,
test_op_out_name
});
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
InitVariablesWithRandomValue
<
int
>
({
"x"
,
"y"
},
{
30
,
40
},
place
,
&
scope
);
InitVariablesWithRandomValue
<
int
>
({
"x"
,
"y"
},
{
30
,
40
},
place
,
&
scope
);
cinn_launch_op
->
Run
(
scope
,
place
);
cinn_launch_op
->
Run
(
scope
,
place
);
}
}
...
@@ -92,8 +92,8 @@ class TestCinnInstructionRunOp : public ::testing::Test {
...
@@ -92,8 +92,8 @@ class TestCinnInstructionRunOp : public ::testing::Test {
// Run ops and check the computation results
// Run ops and check the computation results
framework
::
Scope
scope
;
framework
::
Scope
scope
;
InitVariablesWithRandomValue
<
float
>
({
"x"
,
"y"
},
{
10
,
20
},
place
,
&
scope
);
InitVariablesWithRandomValue
<
float
>
({
"x"
,
"y"
},
{
10
,
20
},
place
,
&
scope
);
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
scope
.
Var
(
add_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
add_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
elementwise_add_op
->
Run
(
scope
,
place
);
elementwise_add_op
->
Run
(
scope
,
place
);
cinn_launch_op
->
Run
(
scope
,
place
);
cinn_launch_op
->
Run
(
scope
,
place
);
CompareOpResult
<
float
>
(
scope
.
GetVar
(
test_op_out_name
),
CompareOpResult
<
float
>
(
scope
.
GetVar
(
test_op_out_name
),
...
...
paddle/fluid/operators/cinn/cinn_launch_context.cc
浏览文件 @
30a31a53
...
@@ -45,7 +45,6 @@
...
@@ -45,7 +45,6 @@
namespace
paddle
{
namespace
paddle
{
namespace
operators
::
details
{
namespace
operators
::
details
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
framework
::
ParallelExecutor
;
using
framework
::
ParallelExecutor
;
using
framework
::
Scope
;
using
framework
::
Scope
;
using
CinnInstruction
=
::
cinn
::
hlir
::
framework
::
Instruction
;
using
CinnInstruction
=
::
cinn
::
hlir
::
framework
::
Instruction
;
...
@@ -268,7 +267,8 @@ void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) {
...
@@ -268,7 +267,8 @@ void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) {
// assign external malloc/free callbacks of cinn_buffer_t
// assign external malloc/free callbacks of cinn_buffer_t
cinn_buffer
->
external_malloc
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
cinn_buffer
->
external_malloc
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[
this
,
var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
[
this
,
var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
auto
*
tensor
=
cached_scope_
->
GetVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
();
auto
*
tensor
=
cached_scope_
->
GetVar
(
var_name
)
->
GetMutable
<
phi
::
DenseTensor
>
();
tensor
->
Resize
(
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
));
tensor
->
Resize
(
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
));
buffer
->
memory
=
reinterpret_cast
<
uint8_t
*>
(
tensor
->
mutable_data
(
buffer
->
memory
=
reinterpret_cast
<
uint8_t
*>
(
tensor
->
mutable_data
(
*
cached_place_
,
*
cached_place_
,
...
@@ -294,7 +294,7 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
...
@@ -294,7 +294,7 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
cinn_buffer
->
external_malloc
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
cinn_buffer
->
external_malloc
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[
this
,
var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
[
this
,
var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
auto
*
tensor
=
auto
*
tensor
=
cached_temp_scope_
->
Var
(
var_name
)
->
GetMutable
<
LoD
Tensor
>
();
cached_temp_scope_
->
Var
(
var_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
tensor
->
Resize
(
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
));
tensor
->
Resize
(
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
));
buffer
->
memory
=
reinterpret_cast
<
uint8_t
*>
(
tensor
->
mutable_data
(
buffer
->
memory
=
reinterpret_cast
<
uint8_t
*>
(
tensor
->
mutable_data
(
*
cached_place_
,
*
cached_place_
,
...
@@ -306,8 +306,8 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
...
@@ -306,8 +306,8 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
// if no instruction use it
// if no instruction use it
cinn_buffer
->
external_free
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
cinn_buffer
->
external_free
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[
this
,
var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
[
this
,
var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
auto
*
tensor
=
auto
*
tensor
=
cached_temp_scope_
->
GetVar
(
var_name
)
cached_temp_scope_
->
GetVar
(
var_name
)
->
GetMutable
<
LoD
Tensor
>
();
->
GetMutable
<
phi
::
Dense
Tensor
>
();
tensor
->
clear
();
tensor
->
clear
();
return
0
;
return
0
;
});
});
...
@@ -438,8 +438,8 @@ ParallelExecutor* CinnLaunchContext::InitializePE(const platform::Place& place,
...
@@ -438,8 +438,8 @@ ParallelExecutor* CinnLaunchContext::InitializePE(const platform::Place& place,
auto
*
var
=
scope
->
GetVar
(
var_name
);
auto
*
var
=
scope
->
GetVar
(
var_name
);
auto
*
buffer
=
GetCinnBufferOfVar
(
var_name
);
auto
*
buffer
=
GetCinnBufferOfVar
(
var_name
);
auto
dim
=
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
);
auto
dim
=
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
);
var
->
GetMutable
<
LoD
Tensor
>
()
->
Resize
(
dim
);
var
->
GetMutable
<
phi
::
Dense
Tensor
>
()
->
Resize
(
dim
);
var
->
GetMutable
<
LoD
Tensor
>
()
->
mutable_data
(
var
->
GetMutable
<
phi
::
Dense
Tensor
>
()
->
mutable_data
(
place
,
framework
::
paddle2cinn
::
TransToPaddleDataType
(
buffer
->
type
));
place
,
framework
::
paddle2cinn
::
TransToPaddleDataType
(
buffer
->
type
));
}
}
return
parallel_executor_
.
get
();
return
parallel_executor_
.
get
();
...
...
paddle/fluid/operators/cinn/cinn_launch_context_test.cc
浏览文件 @
30a31a53
...
@@ -40,7 +40,6 @@ USE_OP(cinn_instruction_run);
...
@@ -40,7 +40,6 @@ USE_OP(cinn_instruction_run);
namespace
paddle
{
namespace
paddle
{
namespace
operators
::
details
{
namespace
operators
::
details
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
framework
::
OpDesc
;
using
framework
::
OpDesc
;
using
framework
::
ParallelExecutor
;
using
framework
::
ParallelExecutor
;
using
framework
::
ProgramDesc
;
using
framework
::
ProgramDesc
;
...
@@ -203,8 +202,8 @@ TEST_F(CinnLaunchContextTest, TestConstructResult) {
...
@@ -203,8 +202,8 @@ TEST_F(CinnLaunchContextTest, TestConstructResult) {
TEST_F
(
CinnLaunchContextTest
,
TestCheckTensorEquivalent
)
{
TEST_F
(
CinnLaunchContextTest
,
TestCheckTensorEquivalent
)
{
platform
::
CPUPlace
place
;
platform
::
CPUPlace
place
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
auto
*
tensor1
=
scope
.
Var
(
"var1"
)
->
GetMutable
<
LoD
Tensor
>
();
auto
*
tensor1
=
scope
.
Var
(
"var1"
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
auto
*
tensor2
=
scope
.
Var
(
"var2"
)
->
GetMutable
<
LoD
Tensor
>
();
auto
*
tensor2
=
scope
.
Var
(
"var2"
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
// dimension not equivalent
// dimension not equivalent
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
3
,
5
}),
place
);
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
3
,
5
}),
place
);
...
@@ -264,7 +263,7 @@ TEST_F(CinnLaunchContextTest, TestCallbackAssignment) {
...
@@ -264,7 +263,7 @@ TEST_F(CinnLaunchContextTest, TestCallbackAssignment) {
launch_context
->
UpdateCapturedEnv
(
scope
,
place
);
launch_context
->
UpdateCapturedEnv
(
scope
,
place
);
// assign external variables
// assign external variables
auto
*
tensor1
=
scope
.
Var
(
"var1"
)
->
GetMutable
<
LoD
Tensor
>
();
auto
*
tensor1
=
scope
.
Var
(
"var1"
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
float
*
data1
=
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
3
,
4
}),
place
);
float
*
data1
=
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
3
,
4
}),
place
);
data1
[
0
]
=
9.99
f
;
data1
[
0
]
=
9.99
f
;
data1
[
10
]
=
19.99
f
;
data1
[
10
]
=
19.99
f
;
...
...
paddle/fluid/operators/cinn/cinn_launch_op.cc
浏览文件 @
30a31a53
...
@@ -128,18 +128,18 @@ class CinnLaunchOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -128,18 +128,18 @@ class CinnLaunchOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
kX
,
AddInput
(
kX
,
"(vector<
LoD
Tensor>)"
"(vector<
phi::Dense
Tensor>)"
"which are the input of graph inside the CinnLaunchOp"
"which are the input of graph inside the CinnLaunchOp"
"excluding kNoNeedBufferX."
)
"excluding kNoNeedBufferX."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
kNoNeedBufferX
,
AddInput
(
kNoNeedBufferX
,
"(vector<
LoD
Tensor>)"
"(vector<
phi::Dense
Tensor>)"
"which are the input of graph inside the CinnLaunchOp but"
"which are the input of graph inside the CinnLaunchOp but"
"their buffer are not needed."
)
"their buffer are not needed."
)
.
AsDuplicable
()
.
AsDuplicable
()
.
AsDispensable
();
.
AsDispensable
();
AddOutput
(
kOutputs
,
AddOutput
(
kOutputs
,
"(vector<
LoD
Tensor>)"
"(vector<
phi::Dense
Tensor>)"
"which are the output of graph inside the CinnLaunchOp."
)
"which are the output of graph inside the CinnLaunchOp."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddAttr
<
int64_t
>
(
AddAttr
<
int64_t
>
(
...
...
paddle/fluid/operators/cinn/cinn_launch_op.h
浏览文件 @
30a31a53
...
@@ -34,7 +34,6 @@ DECLARE_bool(enable_pe_launch_cinn);
...
@@ -34,7 +34,6 @@ DECLARE_bool(enable_pe_launch_cinn);
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
CinnCompiler
=
framework
::
paddle2cinn
::
CinnCompiler
;
using
CinnCompiler
=
framework
::
paddle2cinn
::
CinnCompiler
;
using
CinnCompiledObject
=
framework
::
paddle2cinn
::
CinnCompiledObject
;
using
CinnCompiledObject
=
framework
::
paddle2cinn
::
CinnCompiledObject
;
...
@@ -76,29 +75,30 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
...
@@ -76,29 +75,30 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
<<
"value:
\n
"
<<
"value:
\n
"
<<
CinnCompiler
::
GetInstance
()
->
ReadableKey
(
compilation_key
);
<<
CinnCompiler
::
GetInstance
()
->
ReadableKey
(
compilation_key
);
std
::
map
<
std
::
string
,
const
LoD
Tensor
*>
inputs_name2tensor
;
std
::
map
<
std
::
string
,
const
phi
::
Dense
Tensor
*>
inputs_name2tensor
;
std
::
vector
<
std
::
string
>
input_x_variable_names
;
std
::
vector
<
std
::
string
>
input_x_variable_names
;
std
::
vector
<
std
::
string
>
input_no_need_buffer_variable_names
;
std
::
vector
<
std
::
string
>
input_no_need_buffer_variable_names
;
auto
add_name2tensor_fn
=
auto
add_name2tensor_fn
=
[
&
inputs_name2tensor
](
const
std
::
vector
<
std
::
string
>&
variable_names
,
[
&
inputs_name2tensor
](
const
std
::
vector
<
const
LoDTensor
*>&
tensors
)
{
const
std
::
vector
<
std
::
string
>&
variable_names
,
const
std
::
vector
<
const
phi
::
DenseTensor
*>&
tensors
)
{
std
::
transform
(
std
::
transform
(
variable_names
.
begin
(),
variable_names
.
begin
(),
variable_names
.
end
(),
variable_names
.
end
(),
tensors
.
begin
(),
tensors
.
begin
(),
std
::
inserter
(
inputs_name2tensor
,
inputs_name2tensor
.
end
()),
std
::
inserter
(
inputs_name2tensor
,
inputs_name2tensor
.
end
()),
[](
const
std
::
string
&
name
,
const
LoD
Tensor
*
tensor
)
{
[](
const
std
::
string
&
name
,
const
phi
::
Dense
Tensor
*
tensor
)
{
return
std
::
make_pair
(
name
,
tensor
);
return
std
::
make_pair
(
name
,
tensor
);
});
});
};
};
auto
input_x_tensors
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
kX
);
auto
input_x_tensors
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
kX
);
if
(
!
input_x_tensors
.
empty
())
{
if
(
!
input_x_tensors
.
empty
())
{
input_x_variable_names
=
std
::
move
(
ctx
.
InputNames
(
kX
));
input_x_variable_names
=
std
::
move
(
ctx
.
InputNames
(
kX
));
add_name2tensor_fn
(
input_x_variable_names
,
input_x_tensors
);
add_name2tensor_fn
(
input_x_variable_names
,
input_x_tensors
);
}
}
auto
input_no_need_buffer_tensors
=
auto
input_no_need_buffer_tensors
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
kNoNeedBufferX
);
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
kNoNeedBufferX
);
if
(
!
input_no_need_buffer_tensors
.
empty
())
{
if
(
!
input_no_need_buffer_tensors
.
empty
())
{
input_no_need_buffer_variable_names
=
input_no_need_buffer_variable_names
=
std
::
move
(
ctx
.
InputNames
(
kNoNeedBufferX
));
std
::
move
(
ctx
.
InputNames
(
kNoNeedBufferX
));
...
...
paddle/fluid/operators/cinn/cinn_launch_op_test.cc
浏览文件 @
30a31a53
...
@@ -78,8 +78,8 @@ class TestCinnLaunchOp : public ::testing::Test {
...
@@ -78,8 +78,8 @@ class TestCinnLaunchOp : public ::testing::Test {
// Run ops and check the computation results
// Run ops and check the computation results
framework
::
Scope
scope
;
framework
::
Scope
scope
;
InitVariablesWithRandomValue
<
float
>
({
"x"
,
"y"
},
{
10
,
20
},
place
,
&
scope
);
InitVariablesWithRandomValue
<
float
>
({
"x"
,
"y"
},
{
10
,
20
},
place
,
&
scope
);
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
scope
.
Var
(
add_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
add_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
elementwise_add_op
->
Run
(
scope
,
place
);
elementwise_add_op
->
Run
(
scope
,
place
);
cinn_launch_op
->
Run
(
scope
,
place
);
cinn_launch_op
->
Run
(
scope
,
place
);
CompareOpResult
<
float
>
(
scope
.
GetVar
(
test_op_out_name
),
CompareOpResult
<
float
>
(
scope
.
GetVar
(
test_op_out_name
),
...
...
paddle/fluid/operators/cinn/test_helper.h
浏览文件 @
30a31a53
...
@@ -29,7 +29,6 @@ limitations under the License. */
...
@@ -29,7 +29,6 @@ limitations under the License. */
namespace
paddle
::
operators
{
namespace
paddle
::
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Variable
=
framework
::
Variable
;
using
Variable
=
framework
::
Variable
;
using
Graph
=
framework
::
ir
::
Graph
;
using
Graph
=
framework
::
ir
::
Graph
;
using
Node
=
framework
::
ir
::
Node
;
using
Node
=
framework
::
ir
::
Node
;
...
@@ -97,11 +96,11 @@ void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
...
@@ -97,11 +96,11 @@ void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
std
::
default_random_engine
engine
(
seed
());
std
::
default_random_engine
engine
(
seed
());
std
::
uniform_real_distribution
<
float
>
dist
(
0
,
100
);
std
::
uniform_real_distribution
<
float
>
dist
(
0
,
100
);
LoD
Tensor
tmp_tensor
;
phi
::
Dense
Tensor
tmp_tensor
;
auto
*
tmp_data
=
auto
*
tmp_data
=
tmp_tensor
.
mutable_data
<
DataType
>
(
common_ddim
,
platform
::
CPUPlace
());
tmp_tensor
.
mutable_data
<
DataType
>
(
common_ddim
,
platform
::
CPUPlace
());
for
(
const
auto
&
var_name
:
var_names
)
{
for
(
const
auto
&
var_name
:
var_names
)
{
auto
*
tensor
=
scope
->
Var
(
var_name
)
->
GetMutable
<
LoD
Tensor
>
();
auto
*
tensor
=
scope
->
Var
(
var_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
tensor
->
mutable_data
<
DataType
>
(
common_ddim
,
place
);
tensor
->
mutable_data
<
DataType
>
(
common_ddim
,
place
);
for
(
auto
i
=
0
;
i
<
tensor
->
numel
();
++
i
)
{
for
(
auto
i
=
0
;
i
<
tensor
->
numel
();
++
i
)
{
tmp_data
[
i
]
=
static_cast
<
DataType
>
(
dist
(
engine
));
tmp_data
[
i
]
=
static_cast
<
DataType
>
(
dist
(
engine
));
...
@@ -112,11 +111,12 @@ void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
...
@@ -112,11 +111,12 @@ void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
template
<
typename
DataType
>
template
<
typename
DataType
>
void
CompareOpResult
(
Variable
*
test_out
,
Variable
*
expected_out
)
{
void
CompareOpResult
(
Variable
*
test_out
,
Variable
*
expected_out
)
{
LoD
Tensor
test_tensor
,
expected_tensor
;
phi
::
Dense
Tensor
test_tensor
,
expected_tensor
;
paddle
::
framework
::
TensorCopySync
(
paddle
::
framework
::
TensorCopySync
(
test_out
->
Get
<
LoDTensor
>
(),
platform
::
CPUPlace
(),
&
test_tensor
);
test_out
->
Get
<
phi
::
DenseTensor
>
(),
platform
::
CPUPlace
(),
&
test_tensor
);
paddle
::
framework
::
TensorCopySync
(
paddle
::
framework
::
TensorCopySync
(
expected_out
->
Get
<
phi
::
DenseTensor
>
(),
expected_out
->
Get
<
LoDTensor
>
(),
platform
::
CPUPlace
(),
&
expected_tensor
);
platform
::
CPUPlace
(),
&
expected_tensor
);
ASSERT_TRUE
(
test_tensor
.
IsInitialized
());
ASSERT_TRUE
(
test_tensor
.
IsInitialized
());
ASSERT_TRUE
(
expected_tensor
.
IsInitialized
());
ASSERT_TRUE
(
expected_tensor
.
IsInitialized
());
...
...
paddle/fluid/operators/collective/c_embedding_op.cc
浏览文件 @
30a31a53
...
@@ -162,7 +162,7 @@ class CEmbeddingOpGradVarTypeInference : public framework::VarTypeInference {
...
@@ -162,7 +162,7 @@ class CEmbeddingOpGradVarTypeInference : public framework::VarTypeInference {
void
operator
()(
framework
::
InferVarTypeContext
*
ctx
)
const
override
{
void
operator
()(
framework
::
InferVarTypeContext
*
ctx
)
const
override
{
auto
out_var_name
=
framework
::
GradVarName
(
"W"
);
auto
out_var_name
=
framework
::
GradVarName
(
"W"
);
VLOG
(
3
)
<<
"c_embedding_grad op "
<<
framework
::
GradVarName
(
"W"
)
VLOG
(
3
)
<<
"c_embedding_grad op "
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to
LoD
Tensor"
;
<<
" is set to
phi::Dense
Tensor"
;
ctx
->
SetOutputType
(
out_var_name
,
framework
::
proto
::
VarType
::
LOD_TENSOR
);
ctx
->
SetOutputType
(
out_var_name
,
framework
::
proto
::
VarType
::
LOD_TENSOR
);
ctx
->
SetOutputDataType
(
out_var_name
,
ctx
->
GetInputDataType
(
"W"
));
ctx
->
SetOutputDataType
(
out_var_name
,
ctx
->
GetInputDataType
(
"W"
));
}
}
...
...
paddle/fluid/operators/collective/c_embedding_op.cu
浏览文件 @
30a31a53
...
@@ -86,9 +86,9 @@ template <typename T>
...
@@ -86,9 +86,9 @@ template <typename T>
class
CEmbeddingCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CEmbeddingCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
table_t
=
context
.
Input
<
LoD
Tensor
>
(
"W"
);
auto
*
table_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"W"
);
auto
*
ids_t
=
context
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
ids_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
auto
*
output_t
=
context
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
output_t
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
const
auto
&
dev_ctx
=
context
.
template
device_context
<
phi
::
GPUContext
>();
const
auto
&
dev_ctx
=
context
.
template
device_context
<
phi
::
GPUContext
>();
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
...
@@ -142,9 +142,11 @@ class CEmbeddingGradCUDAKernel : public framework::OpKernel<T> {
...
@@ -142,9 +142,11 @@ class CEmbeddingGradCUDAKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
auto
&
dev_ctx
=
context
.
template
device_context
<
phi
::
GPUContext
>();
const
auto
&
dev_ctx
=
context
.
template
device_context
<
phi
::
GPUContext
>();
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
auto
ids_t
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
ids_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
auto
d_output_t
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
d_output_t
=
auto
d_table_t
=
context
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
context
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
d_table_t
=
context
.
Output
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"W"
));
int
N
=
d_table_t
->
dims
()[
0
];
int
N
=
d_table_t
->
dims
()[
0
];
int
D
=
d_table_t
->
dims
()[
1
];
int
D
=
d_table_t
->
dims
()[
1
];
...
...
paddle/fluid/operators/collective/c_embedding_op.h
浏览文件 @
30a31a53
...
@@ -25,8 +25,6 @@ limitations under the License. */
...
@@ -25,8 +25,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
inline
void
CheckTableValid
()
{}
inline
void
CheckTableValid
()
{}
template
<
typename
TIds
,
typename
TData
>
template
<
typename
TIds
,
typename
TData
>
...
@@ -57,9 +55,9 @@ template <typename T>
...
@@ -57,9 +55,9 @@ template <typename T>
class
CEmbeddingOpCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CEmbeddingOpCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
table_t
=
ctx
.
Input
<
LoD
Tensor
>
(
"W"
);
auto
*
table_t
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"W"
);
auto
*
ids_t
=
ctx
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
ids_t
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
auto
*
output_t
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
output_t
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
const
int64_t
start_idx
=
ctx
.
Attr
<
int64_t
>
(
"start_index"
);
const
int64_t
start_idx
=
ctx
.
Attr
<
int64_t
>
(
"start_index"
);
VLOG
(
10
)
<<
"table_dims:"
<<
table_t
->
dims
();
VLOG
(
10
)
<<
"table_dims:"
<<
table_t
->
dims
();
...
@@ -119,10 +117,12 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel<T> {
...
@@ -119,10 +117,12 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel<T> {
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
auto
ids_t
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
ids_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
auto
d_output_t
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
d_output_t
=
auto
table_t
=
context
.
Input
<
LoDTensor
>
(
"W"
);
context
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
table_grad_t
=
context
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
auto
table_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"W"
);
auto
table_grad_t
=
context
.
Output
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"W"
));
T
*
table_grad_data
=
T
*
table_grad_data
=
table_grad_t
->
mutable_data
<
T
>
(
table_t
->
dims
(),
context
.
GetPlace
());
table_grad_t
->
mutable_data
<
T
>
(
table_t
->
dims
(),
context
.
GetPlace
());
...
...
paddle/fluid/operators/collective/c_embedding_op_npu.cc
浏览文件 @
30a31a53
...
@@ -111,9 +111,9 @@ void shard_index(const Tensor &table_t,
...
@@ -111,9 +111,9 @@ void shard_index(const Tensor &table_t,
template
<
typename
TIds
,
typename
T
>
template
<
typename
TIds
,
typename
T
>
void
NPUGetIdsEmbedding
(
const
framework
::
ExecutionContext
&
context
)
{
void
NPUGetIdsEmbedding
(
const
framework
::
ExecutionContext
&
context
)
{
auto
*
table_t
=
context
.
Input
<
LoD
Tensor
>
(
"W"
);
auto
*
table_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"W"
);
auto
*
ids_t
=
context
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
ids_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
auto
*
output_t
=
context
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
output_t
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
auto
stream
=
auto
stream
=
...
@@ -165,7 +165,7 @@ template <typename T>
...
@@ -165,7 +165,7 @@ template <typename T>
class
CEmbeddingNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CEmbeddingNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
ids_t
=
context
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
ids_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
const
auto
&
index_type
=
framework
::
TransToProtoVarType
(
ids_t
->
dtype
());
const
auto
&
index_type
=
framework
::
TransToProtoVarType
(
ids_t
->
dtype
());
if
(
index_type
==
framework
::
proto
::
VarType
::
INT32
)
{
if
(
index_type
==
framework
::
proto
::
VarType
::
INT32
)
{
...
@@ -181,10 +181,12 @@ template <typename TIds, typename T>
...
@@ -181,10 +181,12 @@ template <typename TIds, typename T>
void
NPUUpdateEmbedding
(
const
framework
::
ExecutionContext
&
context
)
{
void
NPUUpdateEmbedding
(
const
framework
::
ExecutionContext
&
context
)
{
// get inputs
// get inputs
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
auto
ids_t
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
ids_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
auto
d_output_t
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
d_output_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
table_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"W"
);
auto
table_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"W"
);
auto
table_grad_t
=
context
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
auto
table_grad_t
=
context
.
Output
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"W"
));
VLOG
(
10
)
<<
"ids_t:"
<<
ids_t
<<
", d_output_t:"
<<
d_output_t
VLOG
(
10
)
<<
"ids_t:"
<<
ids_t
<<
", d_output_t:"
<<
d_output_t
<<
", table_t:"
<<
table_t
<<
", table_grad_t"
<<
table_grad_t
;
<<
", table_t:"
<<
table_t
<<
", table_grad_t"
<<
table_grad_t
;
...
@@ -243,7 +245,7 @@ template <typename T>
...
@@ -243,7 +245,7 @@ template <typename T>
class
CEmbeddingGradNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CEmbeddingGradNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
ids_t
=
context
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
ids_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
const
auto
&
index_type
=
framework
::
TransToProtoVarType
(
ids_t
->
dtype
());
const
auto
&
index_type
=
framework
::
TransToProtoVarType
(
ids_t
->
dtype
());
if
(
index_type
==
framework
::
proto
::
VarType
::
INT32
)
{
if
(
index_type
==
framework
::
proto
::
VarType
::
INT32
)
{
...
...
paddle/fluid/operators/collective/c_embedding_op_xpu.cc
浏览文件 @
30a31a53
...
@@ -18,15 +18,13 @@ limitations under the License. */
...
@@ -18,15 +18,13 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
CEmbeddingOpXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
CEmbeddingOpXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
table_t
=
ctx
.
Input
<
LoD
Tensor
>
(
"W"
);
auto
*
table_t
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"W"
);
auto
*
ids_t
=
ctx
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
ids_t
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
auto
*
output_t
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
output_t
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
const
int64_t
start_index
=
ctx
.
Attr
<
int64_t
>
(
"start_index"
);
const
int64_t
start_index
=
ctx
.
Attr
<
int64_t
>
(
"start_index"
);
const
T
*
table_data
=
table_t
->
data
<
T
>
();
const
T
*
table_data
=
table_t
->
data
<
T
>
();
T
*
output_data
=
output_t
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
output_data
=
output_t
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/controlflow/conditional_block_op.cc
浏览文件 @
30a31a53
...
@@ -363,13 +363,14 @@ class ConditionalBlockGradOp : public ConditionalOp {
...
@@ -363,13 +363,14 @@ class ConditionalBlockGradOp : public ConditionalOp {
}
}
if
(
input_var
->
IsType
<
phi
::
DenseTensor
>
())
{
if
(
input_var
->
IsType
<
phi
::
DenseTensor
>
())
{
PADDLE_ENFORCE_EQ
(
outside_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
true
,
outside_var
->
IsType
<
phi
::
DenseTensor
>
(),
platform
::
errors
::
InvalidArgument
(
true
,
"Type of outside_var %s is NOT LoDTensor, which "
platform
::
errors
::
InvalidArgument
(
"doesn't match input_var %s."
,
"Type of outside_var %s is NOT phi::DenseTensor, which "
outside_grad_name
,
"doesn't match input_var %s."
,
input_name
));
outside_grad_name
,
input_name
));
AssignZeroToOutsideTensor
(
place
,
AssignZeroToOutsideTensor
(
place
,
scope
,
scope
,
input_var
->
Get
<
phi
::
DenseTensor
>
(),
input_var
->
Get
<
phi
::
DenseTensor
>
(),
...
@@ -402,7 +403,8 @@ class ConditionalBlockGradOp : public ConditionalOp {
...
@@ -402,7 +403,8 @@ class ConditionalBlockGradOp : public ConditionalOp {
}
else
{
}
else
{
// TODO(huihuangzheng): add support for SelectedRows
// TODO(huihuangzheng): add support for SelectedRows
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Conditional block grad op doesn't support non-LoDTensor output "
"Conditional block grad op doesn't support non-phi::DenseTensor "
"output "
"now."
));
"now."
));
}
}
}
}
...
@@ -475,9 +477,9 @@ class ConditionalBlockGradInferShape : public framework::InferShapeBase {
...
@@ -475,9 +477,9 @@ class ConditionalBlockGradInferShape : public framework::InferShapeBase {
class
ConditionalBlockGradInferVarType
:
public
framework
::
VarTypeInference
{
class
ConditionalBlockGradInferVarType
:
public
framework
::
VarTypeInference
{
public:
public:
void
operator
()(
framework
::
InferVarTypeContext
*
ctx
)
const
override
{
void
operator
()(
framework
::
InferVarTypeContext
*
ctx
)
const
override
{
// NOTE(Aurelius84): VarType of Output is
LoDTensor by default. In case of
// NOTE(Aurelius84): VarType of Output is
phi::DenseTensor by default. In
//
Input is {Tensor, LoDTensorArray}, we need synchronous the Input's
//
case of Input is {Tensor, LoDTensorArray}, we need synchronous the
// VarType into Input@GRAD to avoid generating {Tensor, Tensor} as
//
Input's
VarType into Input@GRAD to avoid generating {Tensor, Tensor} as
// Input@GRAD.
// Input@GRAD.
auto
input_size
=
ctx
->
InputSize
(
ConditionalOp
::
kInputs
);
auto
input_size
=
ctx
->
InputSize
(
ConditionalOp
::
kInputs
);
auto
output_size
=
auto
output_size
=
...
...
paddle/fluid/operators/controlflow/conditional_block_op_test.cc
浏览文件 @
30a31a53
...
@@ -21,7 +21,6 @@ limitations under the License. */
...
@@ -21,7 +21,6 @@ limitations under the License. */
USE_NO_KERNEL_OP
(
conditional_block
);
USE_NO_KERNEL_OP
(
conditional_block
);
USE_NO_KERNEL_OP
(
conditional_block_grad
);
USE_NO_KERNEL_OP
(
conditional_block_grad
);
using
LoDTensor
=
phi
::
DenseTensor
;
using
LoDTensorArray
=
paddle
::
framework
::
LoDTensorArray
;
using
LoDTensorArray
=
paddle
::
framework
::
LoDTensorArray
;
using
Scope
=
paddle
::
framework
::
Scope
;
using
Scope
=
paddle
::
framework
::
Scope
;
using
Variable
=
paddle
::
framework
::
Variable
;
using
Variable
=
paddle
::
framework
::
Variable
;
...
@@ -32,7 +31,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
...
@@ -32,7 +31,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
Scope
scope
;
Scope
scope
;
Variable
*
cond_var
=
scope
.
Var
(
"condition"
);
Variable
*
cond_var
=
scope
.
Var
(
"condition"
);
LoDTensor
*
cond_tensor
=
cond_var
->
GetMutable
<
LoD
Tensor
>
();
phi
::
DenseTensor
*
cond_tensor
=
cond_var
->
GetMutable
<
phi
::
Dense
Tensor
>
();
paddle
::
framework
::
DDim
cond_dims
=
phi
::
make_ddim
({
1
});
paddle
::
framework
::
DDim
cond_dims
=
phi
::
make_ddim
({
1
});
bool
*
cond_data
=
cond_tensor
->
mutable_data
<
bool
>
(
cond_dims
,
place
);
bool
*
cond_data
=
cond_tensor
->
mutable_data
<
bool
>
(
cond_dims
,
place
);
cond_data
[
0
]
=
false
;
cond_data
[
0
]
=
false
;
...
@@ -41,7 +40,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
...
@@ -41,7 +40,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
LoDTensorArray
*
input_tensors
=
input_var
->
GetMutable
<
LoDTensorArray
>
();
LoDTensorArray
*
input_tensors
=
input_var
->
GetMutable
<
LoDTensorArray
>
();
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
paddle
::
framework
::
DDim
in_dims
=
phi
::
make_ddim
({
i
+
1
,
i
+
2
});
paddle
::
framework
::
DDim
in_dims
=
phi
::
make_ddim
({
i
+
1
,
i
+
2
});
LoD
Tensor
lod_tensor
;
phi
::
Dense
Tensor
lod_tensor
;
float
*
in_data
=
lod_tensor
.
mutable_data
<
float
>
(
in_dims
,
place
);
float
*
in_data
=
lod_tensor
.
mutable_data
<
float
>
(
in_dims
,
place
);
for
(
int
j
=
0
;
j
<
(
i
+
1
)
*
(
i
+
2
);
++
j
)
{
for
(
int
j
=
0
;
j
<
(
i
+
1
)
*
(
i
+
2
);
++
j
)
{
in_data
[
j
]
=
static_cast
<
float
>
(
j
);
in_data
[
j
]
=
static_cast
<
float
>
(
j
);
...
...
paddle/fluid/operators/controlflow/feed_op.cc
浏览文件 @
30a31a53
...
@@ -29,7 +29,7 @@ namespace paddle {
...
@@ -29,7 +29,7 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
// FeedVariableVisitor is to feed the variable data
// FeedVariableVisitor is to feed the variable data
// according to data type (
LoD
Tensor or Strings).
// according to data type (
phi::Dense
Tensor or Strings).
class
FeedVariableVisitor
{
class
FeedVariableVisitor
{
public:
public:
explicit
FeedVariableVisitor
(
framework
::
Variable
*
out_var
,
explicit
FeedVariableVisitor
(
framework
::
Variable
*
out_var
,
...
@@ -146,11 +146,11 @@ class FeedOpInfoMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -146,11 +146,11 @@ class FeedOpInfoMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"X"
,
AddInput
(
"X"
,
"(vector<
LoD
Tensor>) "
"(vector<
phi::Dense
Tensor>) "
"A feeding list of
LoD
Tensor, which may have "
"A feeding list of
phi::Dense
Tensor, which may have "
"different dimension and data type."
);
"different dimension and data type."
);
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"(
LoDTensor) The LoD
Tensor which is a copy "
"(
phi::DenseTensor) The phi::Dense
Tensor which is a copy "
"of the col-th feeding "
"of the col-th feeding "
"object."
);
"object."
);
AddAttr
<
int
>
(
"col"
,
"(int) The column index of current feeding object."
);
AddAttr
<
int
>
(
"col"
,
"(int) The column index of current feeding object."
);
...
...
paddle/fluid/operators/controlflow/fetch_op.cc
浏览文件 @
30a31a53
...
@@ -143,12 +143,14 @@ class FetchOpInfoMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -143,12 +143,14 @@ class FetchOpInfoMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"X"
,
AddInput
(
"X"
,
"(LoDTensor) The resulted LoDTensor which is expected to return "
"(phi::DenseTensor) The resulted phi::DenseTensor which is "
"expected to return "
"to users."
);
"to users."
);
AddOutput
(
AddOutput
(
"Out"
,
"Out"
,
"(vector<LoDTensor>|unordered_map<string, int32_t>) A fetching list"
"(vector<phi::DenseTensor>|unordered_map<string, int32_t>) A fetching "
" of LoDTensor|unordered_map<string, int32_t> which may have "
"list"
" of phi::DenseTensor|unordered_map<string, int32_t> which may have "
"different dimension, shape and data type."
);
"different dimension, shape and data type."
);
AddAttr
<
int
>
(
"col"
,
"(int) The column index of fetching object."
);
AddAttr
<
int
>
(
"col"
,
"(int) The column index of fetching object."
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
...
...
paddle/fluid/operators/controlflow/fetch_v2_op.cc
浏览文件 @
30a31a53
...
@@ -201,10 +201,12 @@ class FetchV2OpProtoMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -201,10 +201,12 @@ class FetchV2OpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"X"
,
AddInput
(
"X"
,
"(LoDTensor) The resulted LoDTensor which is expected to return "
"(phi::DenseTensor) The resulted phi::DenseTensor which is "
"expected to return "
"to users."
);
"to users."
);
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"(vector<LoDTensor>) A fetching list of LoDTensor which may have "
"(vector<phi::DenseTensor>) A fetching list of phi::DenseTensor "
"which may have "
"different dimension, shape and data type."
);
"different dimension, shape and data type."
);
AddAttr
<
int
>
(
"col"
,
"(int) The column index of fetching object."
);
AddAttr
<
int
>
(
"col"
,
"(int) The column index of fetching object."
);
AddAttr
<
bool
>
(
"deepcopy"
,
"(bool) Whether deep copy is required."
)
AddAttr
<
bool
>
(
"deepcopy"
,
"(bool) Whether deep copy is required."
)
...
...
paddle/fluid/operators/controlflow/logical_op.cc
浏览文件 @
30a31a53
...
@@ -35,7 +35,7 @@ class BinaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -35,7 +35,7 @@ class BinaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
comment
.
type
));
comment
.
type
));
AddOutput
(
"Out"
,
string
::
Sprintf
(
"n-dim bool Variable"
));
AddOutput
(
"Out"
,
string
::
Sprintf
(
"n-dim bool Variable"
));
AddComment
(
string
::
Sprintf
(
R"DOC(%s Operator
AddComment
(
string
::
Sprintf
(
R"DOC(%s Operator
It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim
LoD
Tensor or Tensor.
It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim
phi::Dense
Tensor or Tensor.
Each element of Out is calculated by %s
Each element of Out is calculated by %s
)DOC"
,
)DOC"
,
comment
.
type
,
comment
.
type
,
...
@@ -49,13 +49,14 @@ class UnaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -49,13 +49,14 @@ class UnaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
void
Make
()
override
{
OpComment
comment
;
OpComment
comment
;
AddInput
(
"X"
,
AddInput
(
"X"
,
string
::
Sprintf
(
"Operand of %s operator. Must be "
string
::
Sprintf
(
"a LoDTensor or Tensor of type being one of bool, "
"Operand of %s operator. Must be "
"int8, int16, int32, int64, float32, float64."
,
"a phi::DenseTensor or Tensor of type being one of bool, "
comment
.
type
));
"int8, int16, int32, int64, float32, float64."
,
AddOutput
(
"Out"
,
string
::
Sprintf
(
"n-dim bool LoDTensor or Tensor."
));
comment
.
type
));
AddOutput
(
"Out"
,
string
::
Sprintf
(
"n-dim bool phi::DenseTensor or Tensor."
));
AddComment
(
string
::
Sprintf
(
R"DOC(%s Operator
AddComment
(
string
::
Sprintf
(
R"DOC(%s Operator
It operates element-wise on X, and returns the Out. X and Out are N-dim
LoD
Tensor or Tensor.
It operates element-wise on X, and returns the Out. X and Out are N-dim
phi::Dense
Tensor or Tensor.
Each element of Out is calculated by %s
Each element of Out is calculated by %s
)DOC"
,
)DOC"
,
comment
.
type
,
comment
.
type
,
...
...
paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
浏览文件 @
30a31a53
...
@@ -67,7 +67,8 @@ class WriteToArrayOp : public ArrayOp {
...
@@ -67,7 +67,8 @@ class WriteToArrayOp : public ArrayOp {
class
WriteToArrayOpProtoMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
WriteToArrayOpProtoMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"X"
,
"(LoDTensor) the tensor will be written to tensor array"
);
AddInput
(
"X"
,
"(phi::DenseTensor) the tensor will be written to tensor array"
);
AddInput
(
AddInput
(
"I"
,
"I"
,
"(Tensor) the subscript index in tensor array. The number of element "
"(Tensor) the subscript index in tensor array. The number of element "
...
@@ -76,9 +77,9 @@ class WriteToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -76,9 +77,9 @@ class WriteToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
AddComment
(
R"DOC(
AddComment
(
R"DOC(
WriteToArray Operator.
WriteToArray Operator.
This operator writes a
LoDTensor to a LoD
Tensor array.
This operator writes a
phi::DenseTensor to a phi::Dense
Tensor array.
Assume $T$ is
LoD
Tensor, $i$ is the subscript of the array, and $A$ is the array. The
Assume $T$ is
phi::Dense
Tensor, $i$ is the subscript of the array, and $A$ is the array. The
equation is
equation is
$$A[i] = T$$
$$A[i] = T$$
...
@@ -196,13 +197,13 @@ class ReadFromArrayProtoMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -196,13 +197,13 @@ class ReadFromArrayProtoMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) the writed tensor when used as the grad op of "
"(Tensor) the writed tensor when used as the grad op of "
"write_to_array. We use this to fill zero gradient."
)
"write_to_array. We use this to fill zero gradient."
)
.
AsDispensable
();
.
AsDispensable
();
AddOutput
(
"Out"
,
"(
LoD
Tensor) the tensor will be read from."
);
AddOutput
(
"Out"
,
"(
phi::Dense
Tensor) the tensor will be read from."
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
ReadFromArray Operator.
ReadFromArray Operator.
Read a
LoDTensor from a LoD
Tensor Array.
Read a
phi::DenseTensor from a phi::Dense
Tensor Array.
Assume $T$ is
LoD
Tensor, $i$ is the subscript of the array, and $A$ is the array. The
Assume $T$ is
phi::Dense
Tensor, $i$ is the subscript of the array, and $A$ is the array. The
equation is
equation is
$$T = A[i]$$
$$T = A[i]$$
...
...
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
30a31a53
...
@@ -32,7 +32,6 @@ namespace paddle {
...
@@ -32,7 +32,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
StepScopeVar
=
std
::
vector
<
framework
::
Scope
*>
;
using
StepScopeVar
=
std
::
vector
<
framework
::
Scope
*>
;
using
LoDTensor
=
phi
::
DenseTensor
;
namespace
{
// NOLINT
namespace
{
// NOLINT
static
std
::
string
GetSkipEagerDeletionVarsDebugString
(
static
std
::
string
GetSkipEagerDeletionVarsDebugString
(
...
@@ -62,7 +61,7 @@ class WhileOp : public framework::OperatorBase {
...
@@ -62,7 +61,7 @@ class WhileOp : public framework::OperatorBase {
platform
::
errors
::
NotFound
(
platform
::
errors
::
NotFound
(
"Input(Condition) of WhileOp is not found."
));
"Input(Condition) of WhileOp is not found."
));
auto
&
cond
=
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
LoD
Tensor
>
();
auto
&
cond
=
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
phi
::
Dense
Tensor
>
();
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
cond
.
dims
(),
cond
.
dims
(),
phi
::
make_ddim
({
1
}),
phi
::
make_ddim
({
1
}),
...
@@ -149,9 +148,10 @@ class WhileOp : public framework::OperatorBase {
...
@@ -149,9 +148,10 @@ class WhileOp : public framework::OperatorBase {
framework
::
Variable
*
input_var
=
scope
.
FindVar
(
input_var_name
);
framework
::
Variable
*
input_var
=
scope
.
FindVar
(
input_var_name
);
if
(
input_var
->
IsType
<
phi
::
DenseTensor
>
())
{
if
(
input_var
->
IsType
<
phi
::
DenseTensor
>
())
{
rename_vars
.
push_back
(
input_var_rename
);
rename_vars
.
push_back
(
input_var_rename
);
auto
input_var_tensor
=
input_var
->
Get
<
LoD
Tensor
>
();
auto
input_var_tensor
=
input_var
->
Get
<
phi
::
Dense
Tensor
>
();
auto
*
rename_input_var_tensor
=
auto
*
rename_input_var_tensor
=
current_scope
.
Var
(
input_var_rename
)
->
GetMutable
<
LoDTensor
>
();
current_scope
.
Var
(
input_var_rename
)
->
GetMutable
<
phi
::
DenseTensor
>
();
framework
::
TensorCopy
(
framework
::
TensorCopy
(
input_var_tensor
,
dev_place
,
rename_input_var_tensor
);
input_var_tensor
,
dev_place
,
rename_input_var_tensor
);
rename_input_var_tensor
->
set_lod
(
input_var_tensor
.
lod
());
rename_input_var_tensor
->
set_lod
(
input_var_tensor
.
lod
());
...
@@ -166,8 +166,8 @@ class WhileOp : public framework::OperatorBase {
...
@@ -166,8 +166,8 @@ class WhileOp : public framework::OperatorBase {
var_rename
.
substr
(
0
,
var_rename
.
size
()
-
strlen
(
kSuffix
));
var_rename
.
substr
(
0
,
var_rename
.
size
()
-
strlen
(
kSuffix
));
current_scope
.
Rename
(
var_rename
,
input_var_name
);
current_scope
.
Rename
(
var_rename
,
input_var_name
);
}
}
cond_data
=
cond_data
=
GetCondData
(
GetCondData
(
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
LoD
Tensor
>
());
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
phi
::
Dense
Tensor
>
());
}
}
}
else
{
}
else
{
auto
&
current_scope
=
scope
.
NewScope
();
auto
&
current_scope
=
scope
.
NewScope
();
...
@@ -188,8 +188,8 @@ class WhileOp : public framework::OperatorBase {
...
@@ -188,8 +188,8 @@ class WhileOp : public framework::OperatorBase {
}
}
executor
.
RunPreparedContext
(
executor
.
RunPreparedContext
(
ctx
.
get
(),
&
current_scope
,
false
,
false
,
false
);
ctx
.
get
(),
&
current_scope
,
false
,
false
,
false
);
cond_data
=
cond_data
=
GetCondData
(
GetCondData
(
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
LoD
Tensor
>
());
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
phi
::
Dense
Tensor
>
());
}
}
scope
.
DeleteScope
(
&
current_scope
);
scope
.
DeleteScope
(
&
current_scope
);
}
}
...
@@ -325,7 +325,8 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -325,7 +325,8 @@ class WhileGradOp : public framework::OperatorBase {
}
}
}
else
{
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Currently only support LoDTensor and LoDTensorArray in "
"Currently only support phi::DenseTensor and "
"phi::DenseTensorArray in "
"WhileGradOp."
));
"WhileGradOp."
));
}
}
}
}
...
@@ -398,16 +399,16 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -398,16 +399,16 @@ class WhileGradOp : public framework::OperatorBase {
inside_grad_name
));
inside_grad_name
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
var
->
IsType
<
framework
::
LoDTensorArray
>
()
||
var
->
IsType
<
framework
::
LoDTensorArray
>
()
||
var
->
IsType
<
LoD
Tensor
>
(),
var
->
IsType
<
phi
::
Dense
Tensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"Currently the type of var only can be LoDTensorArray, "
"Currently the type of var only can be LoDTensorArray, "
"or
LoD
Tensor, but the received var[%s] is %s."
,
"or
phi::Dense
Tensor, but the received var[%s] is %s."
,
inside_grad_name
,
inside_grad_name
,
framework
::
ToTypeName
(
var
->
Type
())));
framework
::
ToTypeName
(
var
->
Type
())));
if
((
var_iter
==
outside_og_names
.
end
())
&&
if
((
var_iter
==
outside_og_names
.
end
())
&&
var
->
IsType
<
LoD
Tensor
>
())
{
var
->
IsType
<
phi
::
Dense
Tensor
>
())
{
auto
&
inside_tensor
=
var
->
Get
<
phi
::
DenseTensor
>
();
auto
&
inside_tensor
=
var
->
Get
<
phi
::
DenseTensor
>
();
framework
::
AttributeMap
attrs
;
framework
::
AttributeMap
attrs
;
attrs
[
"dtype"
]
=
attrs
[
"dtype"
]
=
...
...
paddle/fluid/operators/detection/bbox_util.cu.h
浏览文件 @
30a31a53
...
@@ -31,7 +31,6 @@ namespace paddle {
...
@@ -31,7 +31,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
...
...
paddle/fluid/operators/detection/bipartite_match_op.cc
浏览文件 @
30a31a53
...
@@ -19,7 +19,6 @@ namespace paddle {
...
@@ -19,7 +19,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
BipartiteMatchOp
:
public
framework
::
OperatorWithKernel
{
class
BipartiteMatchOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
...
@@ -196,7 +195,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
...
@@ -196,7 +195,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
dist_mat
=
context
.
Input
<
LoD
Tensor
>
(
"DistMat"
);
auto
*
dist_mat
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"DistMat"
);
auto
*
match_indices
=
auto
*
match_indices
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ColToRowMatchIndices"
);
context
.
Output
<
phi
::
DenseTensor
>
(
"ColToRowMatchIndices"
);
auto
*
match_dist
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ColToRowMatchDist"
);
auto
*
match_dist
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ColToRowMatchDist"
);
...
@@ -251,7 +250,8 @@ class BipartiteMatchOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -251,7 +250,8 @@ class BipartiteMatchOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
AddInput
(
"DistMat"
,
"DistMat"
,
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
"(phi::DenseTensor or Tensor) this input is a 2-D phi::DenseTensor "
"with shape "
"[K, M]. It is pair-wise distance matrix between the entities "
"[K, M]. It is pair-wise distance matrix between the entities "
"represented by each row and each column. For example, assumed one "
"represented by each row and each column. For example, assumed one "
"entity is A with shape [K], another entity is B with shape [M]. The "
"entity is A with shape [K], another entity is B with shape [M]. The "
...
@@ -302,8 +302,8 @@ row entity to the column entity and the matched indices are not duplicated
...
@@ -302,8 +302,8 @@ row entity to the column entity and the matched indices are not duplicated
in each row of ColToRowMatchIndices. If the column entity is not matched
in each row of ColToRowMatchIndices. If the column entity is not matched
any row entity, set -1 in ColToRowMatchIndices.
any row entity, set -1 in ColToRowMatchIndices.
Please note that the input DistMat can be
LoD
Tensor (with LoD) or Tensor.
Please note that the input DistMat can be
phi::Dense
Tensor (with LoD) or Tensor.
If
LoD
Tensor with LoD, the height of ColToRowMatchIndices is batch size.
If
phi::Dense
Tensor with LoD, the height of ColToRowMatchIndices is batch size.
If Tensor, the height of ColToRowMatchIndices is 1.
If Tensor, the height of ColToRowMatchIndices is 1.
)DOC"
);
)DOC"
);
...
...
paddle/fluid/operators/detection/box_clip_op.cc
浏览文件 @
30a31a53
...
@@ -66,15 +66,15 @@ class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -66,15 +66,15 @@ class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"Input"
,
AddInput
(
"Input"
,
"(
LoD
Tensor) "
"(
phi::Dense
Tensor) "
"Input is a
LoD
Tensor with shape [..., 4] holds 4 points"
"Input is a
phi::Dense
Tensor with shape [..., 4] holds 4 points"
"in last dimension in format [xmin, ymin, xmax, ymax]"
);
"in last dimension in format [xmin, ymin, xmax, ymax]"
);
AddInput
(
"ImInfo"
,
AddInput
(
"ImInfo"
,
"(Tensor) Information for image reshape is in shape (N, 3), "
"(Tensor) Information for image reshape is in shape (N, 3), "
"in format (height, width, im_scale)"
);
"in format (height, width, im_scale)"
);
AddOutput
(
"Output"
,
AddOutput
(
"Output"
,
"(
LoD
Tensor) "
"(
phi::Dense
Tensor) "
"Output is a
LoD
Tensor with the same shape as Input"
"Output is a
phi::Dense
Tensor with the same shape as Input"
"and it is the result after clip"
);
"and it is the result after clip"
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
This operator clips input boxes to original input images.
This operator clips input boxes to original input images.
...
...
paddle/fluid/operators/detection/box_clip_op.cu
浏览文件 @
30a31a53
...
@@ -49,9 +49,9 @@ template <typename DeviceContext, typename T>
...
@@ -49,9 +49,9 @@ template <typename DeviceContext, typename T>
class
GPUBoxClipKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GPUBoxClipKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
input
=
context
.
Input
<
LoD
Tensor
>
(
"Input"
);
auto
*
input
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Input"
);
auto
*
im_info
=
context
.
Input
<
phi
::
DenseTensor
>
(
"ImInfo"
);
auto
*
im_info
=
context
.
Input
<
phi
::
DenseTensor
>
(
"ImInfo"
);
auto
*
output
=
context
.
Output
<
LoD
Tensor
>
(
"Output"
);
auto
*
output
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"Output"
);
const
int64_t
num
=
input
->
dims
()[
0
];
const
int64_t
num
=
input
->
dims
()[
0
];
const
int64_t
bbox_width
=
input
->
numel
()
/
num
;
const
int64_t
bbox_width
=
input
->
numel
()
/
num
;
auto
lod
=
input
->
lod
();
auto
lod
=
input
->
lod
();
...
...
paddle/fluid/operators/detection/box_clip_op.h
浏览文件 @
30a31a53
...
@@ -20,15 +20,14 @@ namespace paddle {
...
@@ -20,15 +20,14 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
BoxClipKernel
:
public
framework
::
OpKernel
<
T
>
{
class
BoxClipKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
input_box
=
context
.
Input
<
LoD
Tensor
>
(
"Input"
);
auto
*
input_box
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Input"
);
auto
*
im_info
=
context
.
Input
<
LoD
Tensor
>
(
"ImInfo"
);
auto
*
im_info
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"ImInfo"
);
auto
*
output_box
=
context
.
Output
<
LoD
Tensor
>
(
"Output"
);
auto
*
output_box
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"Output"
);
auto
&
dev_ctx
=
context
.
template
device_context
<
phi
::
CPUContext
>();
auto
&
dev_ctx
=
context
.
template
device_context
<
phi
::
CPUContext
>();
output_box
->
mutable_data
<
T
>
(
context
.
GetPlace
());
output_box
->
mutable_data
<
T
>
(
context
.
GetPlace
());
if
(
input_box
->
lod
().
size
())
{
if
(
input_box
->
lod
().
size
())
{
...
...
paddle/fluid/operators/detection/box_coder_op.cc
浏览文件 @
30a31a53
...
@@ -44,7 +44,8 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -44,7 +44,8 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
.
AsDispensable
();
.
AsDispensable
();
AddInput
(
AddInput
(
"TargetBox"
,
"TargetBox"
,
"(LoDTensor or Tensor) This input can be a 2-D LoDTensor with shape "
"(phi::DenseTensor or Tensor) This input can be a 2-D phi::DenseTensor "
"with shape "
"[N, 4] when code_type is 'encode_center_size'. This input also can "
"[N, 4] when code_type is 'encode_center_size'. This input also can "
"be a 3-D Tensor with shape [N, M, 4] when code_type is "
"be a 3-D Tensor with shape [N, M, 4] when code_type is "
"'decode_center_size'. [N, 4], each box is represented as "
"'decode_center_size'. [N, 4], each box is represented as "
...
@@ -79,7 +80,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -79,7 +80,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
"not be provided at the same time."
)
"not be provided at the same time."
)
.
SetDefault
(
std
::
vector
<
float
>
{});
.
SetDefault
(
std
::
vector
<
float
>
{});
AddOutput
(
"OutputBox"
,
AddOutput
(
"OutputBox"
,
"(
LoD
Tensor or Tensor) "
"(
phi::Dense
Tensor or Tensor) "
"When code_type is 'encode_center_size', the output tensor of "
"When code_type is 'encode_center_size', the output tensor of "
"box_coder_op with shape [N, M, 4] representing the result of N "
"box_coder_op with shape [N, M, 4] representing the result of N "
"target boxes encoded with M Prior boxes and variances. When "
"target boxes encoded with M Prior boxes and variances. When "
...
...
paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
浏览文件 @
30a31a53
...
@@ -14,8 +14,6 @@ limitations under the License. */
...
@@ -14,8 +14,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
class
BoxDecoderAndAssignOp
:
public
framework
::
OperatorWithKernel
{
class
BoxDecoderAndAssignOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
@@ -157,12 +155,12 @@ class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -157,12 +155,12 @@ class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"default."
)
"default."
)
.
AsDispensable
();
.
AsDispensable
();
AddInput
(
"TargetBox"
,
AddInput
(
"TargetBox"
,
"(
LoD
Tensor or Tensor) "
"(
phi::Dense
Tensor or Tensor) "
"This input can be a 2-D
LoD
Tensor with shape "
"This input can be a 2-D
phi::Dense
Tensor with shape "
"[N, classnum*4]. It holds N targets for N boxes."
);
"[N, classnum*4]. It holds N targets for N boxes."
);
AddInput
(
"BoxScore"
,
AddInput
(
"BoxScore"
,
"(
LoD
Tensor or Tensor) "
"(
phi::Dense
Tensor or Tensor) "
"This input can be a 2-D
LoD
Tensor with shape "
"This input can be a 2-D
phi::Dense
Tensor with shape "
"[N, classnum], each box is represented as [classnum] which is "
"[N, classnum], each box is represented as [classnum] which is "
"the classification probabilities."
);
"the classification probabilities."
);
AddAttr
<
float
>
(
"box_clip"
,
AddAttr
<
float
>
(
"box_clip"
,
...
@@ -170,12 +168,12 @@ class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -170,12 +168,12 @@ class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"clip box to prevent overflowing"
)
"clip box to prevent overflowing"
)
.
SetDefault
(
4.135
f
);
.
SetDefault
(
4.135
f
);
AddOutput
(
"DecodeBox"
,
AddOutput
(
"DecodeBox"
,
"(
LoD
Tensor or Tensor) "
"(
phi::Dense
Tensor or Tensor) "
"the output tensor of op with shape [N, classnum * 4] "
"the output tensor of op with shape [N, classnum * 4] "
"representing the result of N target boxes decoded with "
"representing the result of N target boxes decoded with "
"M Prior boxes and variances for each class."
);
"M Prior boxes and variances for each class."
);
AddOutput
(
"OutputAssignBox"
,
AddOutput
(
"OutputAssignBox"
,
"(
LoD
Tensor or Tensor) "
"(
phi::Dense
Tensor or Tensor) "
"the output tensor of op with shape [N, 4] "
"the output tensor of op with shape [N, 4] "
"representing the result of N target boxes decoded with "
"representing the result of N target boxes decoded with "
"M Prior boxes and variances with the best non-background class "
"M Prior boxes and variances with the best non-background class "
...
...
paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
浏览文件 @
30a31a53
...
@@ -17,7 +17,6 @@ namespace paddle {
...
@@ -17,7 +17,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
CollectFpnProposalsOp
:
public
framework
::
OperatorWithKernel
{
class
CollectFpnProposalsOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
@@ -76,8 +75,8 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel {
...
@@ -76,8 +75,8 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel {
PADDLE_GET
(
framework
::
Variable
*
,
roi_inputs
[
i
]);
PADDLE_GET
(
framework
::
Variable
*
,
roi_inputs
[
i
]);
framework
::
Variable
*
score_var
=
framework
::
Variable
*
score_var
=
PADDLE_GET
(
framework
::
Variable
*
,
score_inputs
[
i
]);
PADDLE_GET
(
framework
::
Variable
*
,
score_inputs
[
i
]);
auto
&
roi_lod
=
roi_var
->
Get
<
LoD
Tensor
>
().
lod
();
auto
&
roi_lod
=
roi_var
->
Get
<
phi
::
Dense
Tensor
>
().
lod
();
auto
&
score_lod
=
score_var
->
Get
<
LoD
Tensor
>
().
lod
();
auto
&
score_lod
=
score_var
->
Get
<
phi
::
Dense
Tensor
>
().
lod
();
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
roi_lod
,
roi_lod
,
score_lod
,
score_lod
,
...
@@ -101,11 +100,13 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -101,11 +100,13 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"MultiLevelRois"
,
AddInput
(
"MultiLevelRois"
,
"(LoDTensor) Multiple roi LoDTensors from each level in shape "
"(phi::DenseTensor) Multiple roi phi::DenseTensors from each "
"level in shape "
"(N, 4), N is the number of RoIs"
)
"(N, 4), N is the number of RoIs"
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
"MultiLevelScores"
,
AddInput
(
"MultiLevelScores"
,
"(LoDTensor) Multiple score LoDTensors from each level in shape"
"(phi::DenseTensor) Multiple score phi::DenseTensors from each "
"level in shape"
" (N, 1), N is the number of RoIs."
)
" (N, 1), N is the number of RoIs."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
AddInput
(
...
@@ -115,7 +116,8 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -115,7 +116,8 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
"images."
)
"images."
)
.
AsDuplicable
()
.
AsDuplicable
()
.
AsDispensable
();
.
AsDispensable
();
AddOutput
(
"FpnRois"
,
"(LoDTensor) All selected RoIs with highest scores"
);
AddOutput
(
"FpnRois"
,
"(phi::DenseTensor) All selected RoIs with highest scores"
);
AddOutput
(
"RoisNum"
,
"(Tensor), Number of RoIs in each images."
)
AddOutput
(
"RoisNum"
,
"(Tensor), Number of RoIs in each images."
)
.
AsDispensable
();
.
AsDispensable
();
AddAttr
<
int
>
(
"post_nms_topN"
,
AddAttr
<
int
>
(
"post_nms_topN"
,
...
...
paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
浏览文件 @
30a31a53
...
@@ -34,7 +34,6 @@ namespace paddle {
...
@@ -34,7 +34,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
static
constexpr
int
kNumCUDAThreads
=
64
;
static
constexpr
int
kNumCUDAThreads
=
64
;
static
constexpr
int
kNumMaxinumNumBlocks
=
4096
;
static
constexpr
int
kNumMaxinumNumBlocks
=
4096
;
...
@@ -58,9 +57,9 @@ template <typename DeviceContext, typename T>
...
@@ -58,9 +57,9 @@ template <typename DeviceContext, typename T>
class
GPUCollectFpnProposalsOpKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GPUCollectFpnProposalsOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
roi_ins
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"MultiLevelRois"
);
const
auto
roi_ins
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"MultiLevelRois"
);
const
auto
score_ins
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"MultiLevelScores"
);
const
auto
score_ins
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"MultiLevelScores"
);
auto
fpn_rois
=
ctx
.
Output
<
LoD
Tensor
>
(
"FpnRois"
);
auto
fpn_rois
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"FpnRois"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
const
int
post_nms_topN
=
ctx
.
Attr
<
int
>
(
"post_nms_topN"
);
const
int
post_nms_topN
=
ctx
.
Attr
<
int
>
(
"post_nms_topN"
);
...
...
paddle/fluid/operators/detection/collect_fpn_proposals_op.h
浏览文件 @
30a31a53
...
@@ -91,7 +91,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel<T> {
...
@@ -91,7 +91,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel<T> {
"is %d"
,
"is %d"
,
multi_layer_rois
.
size
(),
multi_layer_rois
.
size
(),
multi_layer_scores
.
size
()));
multi_layer_scores
.
size
()));
// Check if the lod information of two
LoD
Tensor is same
// Check if the lod information of two
phi::Dense
Tensor is same
const
int
num_fpn_level
=
multi_layer_rois
.
size
();
const
int
num_fpn_level
=
multi_layer_rois
.
size
();
std
::
vector
<
int
>
integral_of_all_rois
(
num_fpn_level
+
1
,
0
);
std
::
vector
<
int
>
integral_of_all_rois
(
num_fpn_level
+
1
,
0
);
for
(
int
i
=
0
;
i
<
num_fpn_level
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_fpn_level
;
++
i
)
{
...
...
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
浏览文件 @
30a31a53
...
@@ -37,12 +37,14 @@ class DistributeFpnProposalsOp : public framework::OperatorWithKernel {
...
@@ -37,12 +37,14 @@ class DistributeFpnProposalsOp : public framework::OperatorWithKernel {
class
DistributeFpnProposalsOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
DistributeFpnProposalsOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"FpnRois"
,
"(LoDTensor) The RoIs at all levels in shape (-1, 4)"
);
AddInput
(
"FpnRois"
,
"(phi::DenseTensor) The RoIs at all levels in shape (-1, 4)"
);
AddInput
(
"RoisNum"
,
AddInput
(
"RoisNum"
,
"(Tensor) The number of RoIs in shape (B),"
"(Tensor) The number of RoIs in shape (B),"
"B is the number of images"
)
"B is the number of images"
)
.
AsDispensable
();
.
AsDispensable
();
AddOutput
(
"MultiFpnRois"
,
"(LoDTensor) Output with distribute operator"
)
AddOutput
(
"MultiFpnRois"
,
"(phi::DenseTensor) Output with distribute operator"
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
"RestoreIndex"
,
AddOutput
(
"RestoreIndex"
,
"(Tensor) An array of positive number which is "
"(Tensor) An array of positive number which is "
...
...
paddle/fluid/operators/detection/generate_mask_labels_op.cc
浏览文件 @
30a31a53
...
@@ -26,11 +26,12 @@ namespace paddle {
...
@@ -26,11 +26,12 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
const
int
kBoxDim
=
4
;
const
int
kBoxDim
=
4
;
template
<
typename
T
>
template
<
typename
T
>
void
AppendMask
(
LoDTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
void
AppendMask
(
phi
::
DenseTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
auto
*
out_data
=
out
->
data
<
T
>
();
auto
*
out_data
=
out
->
data
<
T
>
();
auto
*
to_add_data
=
to_add
->
data
<
T
>
();
auto
*
to_add_data
=
to_add
->
data
<
T
>
();
memcpy
(
out_data
+
offset
,
to_add_data
,
to_add
->
numel
()
*
sizeof
(
T
));
memcpy
(
out_data
+
offset
,
to_add_data
,
to_add
->
numel
()
*
sizeof
(
T
));
...
@@ -331,16 +332,16 @@ template <typename T>
...
@@ -331,16 +332,16 @@ template <typename T>
class
GenerateMaskLabelsKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GenerateMaskLabelsKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
im_info
=
ctx
.
Input
<
LoD
Tensor
>
(
"ImInfo"
);
auto
*
im_info
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"ImInfo"
);
auto
*
gt_classes
=
ctx
.
Input
<
LoD
Tensor
>
(
"GtClasses"
);
auto
*
gt_classes
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"GtClasses"
);
auto
*
is_crowd
=
ctx
.
Input
<
LoD
Tensor
>
(
"IsCrowd"
);
auto
*
is_crowd
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"IsCrowd"
);
auto
*
gt_segms
=
ctx
.
Input
<
LoD
Tensor
>
(
"GtSegms"
);
auto
*
gt_segms
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"GtSegms"
);
auto
*
rois
=
ctx
.
Input
<
LoD
Tensor
>
(
"Rois"
);
auto
*
rois
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Rois"
);
auto
*
label_int32
=
ctx
.
Input
<
LoD
Tensor
>
(
"LabelsInt32"
);
auto
*
label_int32
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LabelsInt32"
);
auto
*
mask_rois
=
ctx
.
Output
<
LoD
Tensor
>
(
"MaskRois"
);
auto
*
mask_rois
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"MaskRois"
);
auto
*
roi_has_mask_int32
=
ctx
.
Output
<
LoD
Tensor
>
(
"RoiHasMaskInt32"
);
auto
*
roi_has_mask_int32
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"RoiHasMaskInt32"
);
auto
*
mask_int32
=
ctx
.
Output
<
LoD
Tensor
>
(
"MaskInt32"
);
auto
*
mask_int32
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"MaskInt32"
);
int
num_classes
=
ctx
.
Attr
<
int
>
(
"num_classes"
);
int
num_classes
=
ctx
.
Attr
<
int
>
(
"num_classes"
);
int
resolution
=
ctx
.
Attr
<
int
>
(
"resolution"
);
int
resolution
=
ctx
.
Attr
<
int
>
(
"resolution"
);
...
@@ -463,17 +464,20 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -463,17 +464,20 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
"B is the number of input images, "
"B is the number of input images, "
"each element consists of im_height, im_width, im_scale."
);
"each element consists of im_height, im_width, im_scale."
);
AddInput
(
"GtClasses"
,
AddInput
(
"GtClasses"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with "
"shape [M, 1]. "
"M is the number of groundtruth, "
"M is the number of groundtruth, "
"each element is a class label of groundtruth."
);
"each element is a class label of groundtruth."
);
AddInput
(
AddInput
(
"IsCrowd"
,
"IsCrowd"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[M, 1]. "
"M is the number of groundtruth, "
"M is the number of groundtruth, "
"each element is a flag indicates whether a groundtruth is crowd."
);
"each element is a flag indicates whether a groundtruth is crowd."
);
AddInput
(
AddInput
(
"GtSegms"
,
"GtSegms"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [S, 2], it's LoD "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[S, 2], it's LoD "
"level is 3. The LoD[0] represents the gt objects number of each "
"level is 3. The LoD[0] represents the gt objects number of each "
"instance. LoD[1] represents the segmentation counts of each objects. "
"instance. LoD[1] represents the segmentation counts of each objects. "
"LoD[2] represents the polygons number of each segmentation. S the "
"LoD[2] represents the polygons number of each segmentation. S the "
...
@@ -481,24 +485,29 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -481,24 +485,29 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
"coordinate points."
);
"coordinate points."
);
AddInput
(
AddInput
(
"Rois"
,
"Rois"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [R, 4]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[R, 4]. "
"R is the number of rois which is the output of "
"R is the number of rois which is the output of "
"generate_proposal_labels, "
"generate_proposal_labels, "
"each element is a bounding box with (xmin, ymin, xmax, ymax) format."
);
"each element is a bounding box with (xmin, ymin, xmax, ymax) format."
);
AddInput
(
"LabelsInt32"
,
AddInput
(
"LabelsInt32"
,
"(LoDTensor), This intput is a 2D LoDTensor with shape [R, 1], "
"(phi::DenseTensor), This intput is a 2D phi::DenseTensor with "
"shape [R, 1], "
"each element represents a class label of a roi"
);
"each element represents a class label of a roi"
);
AddOutput
(
AddOutput
(
"MaskRois"
,
"MaskRois"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
"[P, 4]. "
"P is the number of mask, "
"P is the number of mask, "
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
AddOutput
(
"RoiHasMaskInt32"
,
AddOutput
(
"RoiHasMaskInt32"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 1], "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
"shape [P, 1], "
"each element represents the output mask rois index with regard "
"each element represents the output mask rois index with regard "
"to input rois"
);
"to input rois"
);
AddOutput
(
"MaskInt32"
,
AddOutput
(
"MaskInt32"
,
"(LoDTensor), This output is a 4D LoDTensor with shape [P, Q], "
"(phi::DenseTensor), This output is a 4D phi::DenseTensor with "
"shape [P, Q], "
"Q equal to num_classes * resolution * resolution"
);
"Q equal to num_classes * resolution * resolution"
);
AddAttr
<
int
>
(
"num_classes"
,
"Class number."
);
AddAttr
<
int
>
(
"num_classes"
,
"Class number."
);
...
...
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
浏览文件 @
30a31a53
...
@@ -26,11 +26,12 @@ namespace paddle {
...
@@ -26,11 +26,12 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
const
int
kBoxDim
=
4
;
const
int
kBoxDim
=
4
;
template
<
typename
T
>
template
<
typename
T
>
void
AppendRois
(
LoDTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
void
AppendRois
(
phi
::
DenseTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
auto
*
out_data
=
out
->
data
<
T
>
();
auto
*
out_data
=
out
->
data
<
T
>
();
auto
*
to_add_data
=
to_add
->
data
<
T
>
();
auto
*
to_add_data
=
to_add
->
data
<
T
>
();
memcpy
(
out_data
+
offset
,
to_add_data
,
to_add
->
numel
()
*
sizeof
(
T
));
memcpy
(
out_data
+
offset
,
to_add_data
,
to_add
->
numel
()
*
sizeof
(
T
));
...
@@ -513,19 +514,21 @@ template <typename T>
...
@@ -513,19 +514,21 @@ template <typename T>
class
GenerateProposalLabelsKernel
:
public
framework
::
OpKernel
<
T
>
{
class
GenerateProposalLabelsKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
rpn_rois
=
context
.
Input
<
LoDTensor
>
(
"RpnRois"
);
auto
*
rpn_rois
=
context
.
Input
<
phi
::
DenseTensor
>
(
"RpnRois"
);
auto
*
gt_classes
=
context
.
Input
<
LoDTensor
>
(
"GtClasses"
);
auto
*
gt_classes
=
context
.
Input
<
phi
::
DenseTensor
>
(
"GtClasses"
);
auto
*
is_crowd
=
context
.
Input
<
LoDTensor
>
(
"IsCrowd"
);
auto
*
is_crowd
=
context
.
Input
<
phi
::
DenseTensor
>
(
"IsCrowd"
);
auto
*
gt_boxes
=
context
.
Input
<
LoDTensor
>
(
"GtBoxes"
);
auto
*
gt_boxes
=
context
.
Input
<
phi
::
DenseTensor
>
(
"GtBoxes"
);
auto
*
im_info
=
context
.
Input
<
LoDTensor
>
(
"ImInfo"
);
auto
*
im_info
=
context
.
Input
<
phi
::
DenseTensor
>
(
"ImInfo"
);
auto
*
rois
=
context
.
Output
<
LoDTensor
>
(
"Rois"
);
auto
*
rois
=
context
.
Output
<
phi
::
DenseTensor
>
(
"Rois"
);
auto
*
labels_int32
=
context
.
Output
<
LoDTensor
>
(
"LabelsInt32"
);
auto
*
labels_int32
=
context
.
Output
<
phi
::
DenseTensor
>
(
"LabelsInt32"
);
auto
*
bbox_targets
=
context
.
Output
<
LoDTensor
>
(
"BboxTargets"
);
auto
*
bbox_targets
=
context
.
Output
<
phi
::
DenseTensor
>
(
"BboxTargets"
);
auto
*
bbox_inside_weights
=
context
.
Output
<
LoDTensor
>
(
"BboxInsideWeights"
);
auto
*
bbox_inside_weights
=
context
.
Output
<
phi
::
DenseTensor
>
(
"BboxInsideWeights"
);
auto
*
bbox_outside_weights
=
auto
*
bbox_outside_weights
=
context
.
Output
<
LoDTensor
>
(
"BboxOutsideWeights"
);
context
.
Output
<
phi
::
DenseTensor
>
(
"BboxOutsideWeights"
);
auto
*
max_overlap_with_gt
=
context
.
Output
<
LoDTensor
>
(
"MaxOverlapWithGT"
);
auto
*
max_overlap_with_gt
=
context
.
Output
<
phi
::
DenseTensor
>
(
"MaxOverlapWithGT"
);
int
batch_size_per_im
=
context
.
Attr
<
int
>
(
"batch_size_per_im"
);
int
batch_size_per_im
=
context
.
Attr
<
int
>
(
"batch_size_per_im"
);
float
fg_fraction
=
context
.
Attr
<
float
>
(
"fg_fraction"
);
float
fg_fraction
=
context
.
Attr
<
float
>
(
"fg_fraction"
);
...
@@ -685,21 +688,25 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -685,21 +688,25 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
AddInput
(
"RpnRois"
,
"RpnRois"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [N, 4]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[N, 4]. "
"N is the number of the GenerateProposalOp's output, "
"N is the number of the GenerateProposalOp's output, "
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
AddInput
(
"GtClasses"
,
AddInput
(
"GtClasses"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with "
"shape [M, 1]. "
"M is the number of groundtruth, "
"M is the number of groundtruth, "
"each element is a class label of groundtruth."
);
"each element is a class label of groundtruth."
);
AddInput
(
AddInput
(
"IsCrowd"
,
"IsCrowd"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[M, 1]. "
"M is the number of groundtruth, "
"M is the number of groundtruth, "
"each element is a flag indicates whether a groundtruth is crowd."
);
"each element is a flag indicates whether a groundtruth is crowd."
);
AddInput
(
AddInput
(
"GtBoxes"
,
"GtBoxes"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 4]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[M, 4]. "
"M is the number of groundtruth, "
"M is the number of groundtruth, "
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
AddInput
(
"ImInfo"
,
AddInput
(
"ImInfo"
,
...
@@ -707,7 +714,8 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -707,7 +714,8 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
"B is the number of input images, "
"B is the number of input images, "
"each element consists of im_height, im_width, im_scale."
);
"each element consists of im_height, im_width, im_scale."
);
AddInput
(
"MaxOverlap"
,
AddInput
(
"MaxOverlap"
,
"(LoDTensor), This input is a 1D LoDTensor with shape [N]."
"(phi::DenseTensor), This input is a 1D phi::DenseTensor with "
"shape [N]."
"N is the number of Input(RpnRois), "
"N is the number of Input(RpnRois), "
"each element is the maximum overlap between "
"each element is the maximum overlap between "
"the proposal RoI and ground-truth."
)
"the proposal RoI and ground-truth."
)
...
@@ -715,28 +723,34 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -715,28 +723,34 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
AddOutput
(
"Rois"
,
"Rois"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
"[P, 4]. "
"P usuall equal to batch_size_per_im * batch_size, "
"P usuall equal to batch_size_per_im * batch_size, "
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
AddOutput
(
"LabelsInt32"
,
AddOutput
(
"LabelsInt32"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 1], "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
"shape [P, 1], "
"each element represents a class label of a roi"
);
"each element represents a class label of a roi"
);
AddOutput
(
"BboxTargets"
,
AddOutput
(
"BboxTargets"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
"shape [P, 4 * "
"class_nums], "
"class_nums], "
"each element represents a box label of a roi"
);
"each element represents a box label of a roi"
);
AddOutput
(
AddOutput
(
"BboxInsideWeights"
,
"BboxInsideWeights"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
"[P, 4 * "
"class_nums], "
"class_nums], "
"each element indicates whether a box should contribute to loss."
);
"each element indicates whether a box should contribute to loss."
);
AddOutput
(
AddOutput
(
"BboxOutsideWeights"
,
"BboxOutsideWeights"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
"[P, 4 * "
"class_nums], "
"class_nums], "
"each element indicates whether a box should contribute to loss."
);
"each element indicates whether a box should contribute to loss."
);
AddOutput
(
"MaxOverlapWithGT"
,
AddOutput
(
"MaxOverlapWithGT"
,
"(LoDTensor), This output is a 1D LoDTensor with shape [P], "
"(phi::DenseTensor), This output is a 1D phi::DenseTensor with "
"shape [P], "
"each element indicates the maxoverlap "
"each element indicates the maxoverlap "
"between output RoIs and ground-truth. "
"between output RoIs and ground-truth. "
"The output RoIs may include ground-truth "
"The output RoIs may include ground-truth "
...
...
paddle/fluid/operators/detection/generate_proposals_op.cc
浏览文件 @
30a31a53
...
@@ -28,7 +28,6 @@ namespace paddle {
...
@@ -28,7 +28,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
GenerateProposalsOp
:
public
framework
::
OperatorWithKernel
{
class
GenerateProposalsOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
...
@@ -90,8 +89,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -90,8 +89,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
"Variances"
,
"Variances"
,
"GenerateProposals"
);
"GenerateProposals"
);
auto
*
rpn_rois
=
context
.
Output
<
LoD
Tensor
>
(
"RpnRois"
);
auto
*
rpn_rois
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"RpnRois"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
LoD
Tensor
>
(
"RpnRoiProbs"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"RpnRoiProbs"
);
int
pre_nms_top_n
=
context
.
Attr
<
int
>
(
"pre_nms_topN"
);
int
pre_nms_top_n
=
context
.
Attr
<
int
>
(
"pre_nms_topN"
);
int
post_nms_top_n
=
context
.
Attr
<
int
>
(
"post_nms_topN"
);
int
post_nms_top_n
=
context
.
Attr
<
int
>
(
"post_nms_topN"
);
...
@@ -288,9 +287,10 @@ class GenerateProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -288,9 +287,10 @@ class GenerateProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) Bounding box variances with same shape as `Anchors`."
);
"(Tensor) Bounding box variances with same shape as `Anchors`."
);
AddOutput
(
"RpnRois"
,
AddOutput
(
"RpnRois"
,
"(LoDTensor), Output proposals with shape (rois_num, 4)."
);
"(phi::DenseTensor), Output proposals with shape (rois_num, 4)."
);
AddOutput
(
"RpnRoiProbs"
,
AddOutput
(
"(LoDTensor) Scores of proposals with shape (rois_num, 1)."
);
"RpnRoiProbs"
,
"(phi::DenseTensor) Scores of proposals with shape (rois_num, 1)."
);
AddOutput
(
"RpnRoisNum"
,
"(Tensor), The number of Rpn RoIs in each image"
)
AddOutput
(
"RpnRoisNum"
,
"(Tensor), The number of Rpn RoIs in each image"
)
.
AsDispensable
();
.
AsDispensable
();
AddAttr
<
int
>
(
"pre_nms_topN"
,
AddAttr
<
int
>
(
"pre_nms_topN"
,
...
...
paddle/fluid/operators/detection/generate_proposals_op.cu
浏览文件 @
30a31a53
...
@@ -29,7 +29,6 @@ namespace paddle {
...
@@ -29,7 +29,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
namespace
{
namespace
{
template
<
typename
T
>
template
<
typename
T
>
...
@@ -144,8 +143,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
...
@@ -144,8 +143,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
"Variances"
,
"Variances"
,
"GenerateProposals"
);
"GenerateProposals"
);
auto
*
rpn_rois
=
context
.
Output
<
LoD
Tensor
>
(
"RpnRois"
);
auto
*
rpn_rois
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"RpnRois"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
LoD
Tensor
>
(
"RpnRoiProbs"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"RpnRoiProbs"
);
int
pre_nms_top_n
=
context
.
Attr
<
int
>
(
"pre_nms_topN"
);
int
pre_nms_top_n
=
context
.
Attr
<
int
>
(
"pre_nms_topN"
);
int
post_nms_top_n
=
context
.
Attr
<
int
>
(
"post_nms_topN"
);
int
post_nms_top_n
=
context
.
Attr
<
int
>
(
"post_nms_topN"
);
...
...
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
浏览文件 @
30a31a53
...
@@ -30,7 +30,6 @@ namespace paddle {
...
@@ -30,7 +30,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
GenerateProposalsV2Op
:
public
framework
::
OperatorWithKernel
{
class
GenerateProposalsV2Op
:
public
framework
::
OperatorWithKernel
{
public:
public:
...
@@ -65,9 +64,10 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -65,9 +64,10 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) Bounding box variances with same shape as `Anchors`."
);
"(Tensor) Bounding box variances with same shape as `Anchors`."
);
AddOutput
(
"RpnRois"
,
AddOutput
(
"RpnRois"
,
"(LoDTensor), Output proposals with shape (rois_num, 4)."
);
"(phi::DenseTensor), Output proposals with shape (rois_num, 4)."
);
AddOutput
(
"RpnRoiProbs"
,
AddOutput
(
"(LoDTensor) Scores of proposals with shape (rois_num, 1)."
);
"RpnRoiProbs"
,
"(phi::DenseTensor) Scores of proposals with shape (rois_num, 1)."
);
AddOutput
(
"RpnRoisNum"
,
"(Tensor), The number of Rpn RoIs in each image"
)
AddOutput
(
"RpnRoisNum"
,
"(Tensor), The number of Rpn RoIs in each image"
)
.
AsDispensable
();
.
AsDispensable
();
AddAttr
<
int
>
(
"pre_nms_topN"
,
AddAttr
<
int
>
(
"pre_nms_topN"
,
...
...
paddle/fluid/operators/detection/iou_similarity_op.cc
浏览文件 @
30a31a53
...
@@ -59,17 +59,18 @@ class IOUSimilarityOp : public framework::OperatorWithKernel {
...
@@ -59,17 +59,18 @@ class IOUSimilarityOp : public framework::OperatorWithKernel {
class
IOUSimilarityOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
IOUSimilarityOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"X"
,
AddInput
(
"(LoDTensor, default LoDTensor<float>) "
"X"
,
"Box list X is a 2-D LoDTensor with shape [N, 4] holds N boxes, "
"(phi::DenseTensor, default phi::DenseTensor<float>) "
"each box is represented as [xmin, ymin, xmax, ymax], "
"Box list X is a 2-D phi::DenseTensor with shape [N, 4] holds N boxes, "
"the shape of X is [N, 4]. [xmin, ymin] is the left top "
"each box is represented as [xmin, ymin, xmax, ymax], "
"coordinate of the box if the input is image feature map, they "
"the shape of X is [N, 4]. [xmin, ymin] is the left top "
"are close to the origin of the coordinate system. "
"coordinate of the box if the input is image feature map, they "
"[xmax, ymax] is the right bottom coordinate of the box. "
"are close to the origin of the coordinate system. "
"This tensor can contain LoD information to represent a batch "
"[xmax, ymax] is the right bottom coordinate of the box. "
"of inputs. One instance of this batch can contain different "
"This tensor can contain LoD information to represent a batch "
"numbers of entities."
);
"of inputs. One instance of this batch can contain different "
"numbers of entities."
);
AddInput
(
"Y"
,
AddInput
(
"Y"
,
"(Tensor, default Tensor<float>) "
"(Tensor, default Tensor<float>) "
"Box list Y holds M boxes, each box is represented as "
"Box list Y holds M boxes, each box is represented as "
...
@@ -82,7 +83,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -82,7 +83,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
"whether treat the priorbox as a normalized box"
)
"whether treat the priorbox as a normalized box"
)
.
SetDefault
(
true
);
.
SetDefault
(
true
);
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"(
LoD
Tensor, the lod is same as input X) The output of "
"(
phi::Dense
Tensor, the lod is same as input X) The output of "
"iou_similarity op, a tensor with shape [N, M] "
"iou_similarity op, a tensor with shape [N, M] "
"representing pairwise iou scores."
);
"representing pairwise iou scores."
);
...
@@ -90,7 +91,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -90,7 +91,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
**IOU Similarity Operator**
**IOU Similarity Operator**
Computes intersection-over-union (IOU) between two box lists.
Computes intersection-over-union (IOU) between two box lists.
Box list 'X' should be a
LoD
Tensor and 'Y' is a common Tensor,
Box list 'X' should be a
phi::Dense
Tensor and 'Y' is a common Tensor,
boxes in 'Y' are shared by all instance of the batched inputs of X.
boxes in 'Y' are shared by all instance of the batched inputs of X.
Given two boxes A and B, the calculation of IOU is as follows:
Given two boxes A and B, the calculation of IOU is as follows:
...
...
paddle/fluid/operators/detection/locality_aware_nms_op.cc
浏览文件 @
30a31a53
...
@@ -20,7 +20,6 @@ namespace paddle {
...
@@ -20,7 +20,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
LocalityAwareNMSOp
:
public
framework
::
OperatorWithKernel
{
class
LocalityAwareNMSOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
...
@@ -352,15 +351,15 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
...
@@ -352,15 +351,15 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
boxes_input
=
ctx
.
Input
<
LoD
Tensor
>
(
"BBoxes"
);
auto
*
boxes_input
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"BBoxes"
);
auto
*
scores_input
=
ctx
.
Input
<
LoD
Tensor
>
(
"Scores"
);
auto
*
scores_input
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Scores"
);
auto
*
outs
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
outs
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
auto
&
score_dims
=
scores_input
->
dims
();
auto
&
score_dims
=
scores_input
->
dims
();
auto
score_size
=
score_dims
.
size
();
auto
score_size
=
score_dims
.
size
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
CPUContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
CPUContext
>();
LoD
Tensor
scores
;
phi
::
Dense
Tensor
scores
;
LoD
Tensor
boxes
;
phi
::
Dense
Tensor
boxes
;
paddle
::
framework
::
TensorCopySync
(
paddle
::
framework
::
TensorCopySync
(
*
scores_input
,
platform
::
CPUPlace
(),
&
scores
);
*
scores_input
,
platform
::
CPUPlace
(),
&
scores
);
paddle
::
framework
::
TensorCopySync
(
paddle
::
framework
::
TensorCopySync
(
...
@@ -476,10 +475,12 @@ class LocalityAwareNMSOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -476,10 +475,12 @@ class LocalityAwareNMSOpMaker : public framework::OpProtoAndCheckerMaker {
"Whether detections are normalized."
)
"Whether detections are normalized."
)
.
SetDefault
(
true
);
.
SetDefault
(
true
);
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
"represents the "
"detections. Each row has 6 values: "
"detections. Each row has 6 values: "
"[label, confidence, xmin, ymin, xmax, ymax] or "
"[label, confidence, xmin, ymin, xmax, ymax] or "
"(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] "
"represents the "
"detections. Each row has 10 values: "
"detections. Each row has 10 values: "
"[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
"[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
"total number of detections in this mini-batch."
"total number of detections in this mini-batch."
...
@@ -501,7 +502,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
...
@@ -501,7 +502,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
per image if keep_top_k is larger than -1.
per image if keep_top_k is larger than -1.
This operator support multi-class and batched inputs. It applying NMS
This operator support multi-class and batched inputs. It applying NMS
independently for each class. The outputs is a 2-D LoDTenosr, for each
independently for each class. The outputs is a 2-D LoDTenosr, for each
image, the offsets in first dimension of
LoD
Tensor are called LoD, the number
image, the offsets in first dimension of
phi::Dense
Tensor are called LoD, the number
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
means there is no detected bbox for this image.
means there is no detected bbox for this image.
...
...
paddle/fluid/operators/detection/matrix_nms_op.cc
浏览文件 @
30a31a53
...
@@ -21,7 +21,6 @@ namespace paddle {
...
@@ -21,7 +21,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
MatrixNMSOp
:
public
framework
::
OperatorWithKernel
{
class
MatrixNMSOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
...
@@ -89,14 +88,16 @@ class MatrixNMSOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -89,14 +88,16 @@ class MatrixNMSOpMaker : public framework::OpProtoAndCheckerMaker {
"when 'use_gaussian' is enabled."
)
"when 'use_gaussian' is enabled."
)
.
SetDefault
(
2.
);
.
SetDefault
(
2.
);
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
"represents the "
"detections. Each row has 6 values: "
"detections. Each row has 6 values: "
"[label, confidence, xmin, ymin, xmax, ymax]. "
"[label, confidence, xmin, ymin, xmax, ymax]. "
"the offsets in first dimension are called LoD, the number of "
"the offsets in first dimension are called LoD, the number of "
"offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
"offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
"no detected bbox."
);
"no detected bbox."
);
AddOutput
(
"Index"
,
AddOutput
(
"Index"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 1] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 1] "
"represents the "
"index of selected bbox. The index is the absolute index cross "
"index of selected bbox. The index is the absolute index cross "
"batches."
);
"batches."
);
AddOutput
(
"RoisNum"
,
"(Tensor), Number of RoIs in each images."
)
AddOutput
(
"RoisNum"
,
"(Tensor), Number of RoIs in each images."
)
...
@@ -113,7 +114,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
...
@@ -113,7 +114,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
per image if keep_top_k is larger than -1.
per image if keep_top_k is larger than -1.
This operator support multi-class and batched inputs. It applying NMS
This operator support multi-class and batched inputs. It applying NMS
independently for each class. The outputs is a 2-D LoDTenosr, for each
independently for each class. The outputs is a 2-D LoDTenosr, for each
image, the offsets in first dimension of
LoD
Tensor are called LoD, the number
image, the offsets in first dimension of
phi::Dense
Tensor are called LoD, the number
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
means there is no detected bbox for this image. Now this operator has one more
means there is no detected bbox for this image. Now this operator has one more
output, which is RoisNum. The size of RoisNum is N, RoisNum[i] means the number of
output, which is RoisNum. The size of RoisNum is N, RoisNum[i] means the number of
...
...
paddle/fluid/operators/detection/mine_hard_examples_op.cc
浏览文件 @
30a31a53
...
@@ -363,15 +363,15 @@ class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -363,15 +363,15 @@ class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker {
.
SetDefault
(
"max_negative"
)
.
SetDefault
(
"max_negative"
)
.
InEnum
({
"hard_example"
,
"max_negative"
});
.
InEnum
({
"hard_example"
,
"max_negative"
});
AddOutput
(
AddOutput
(
"NegIndices"
,
"NegIndices"
,
"(phi::DenseTensor<int>) The output of negative example indices. "
"(LoDTensor<int>) The output of negative example indices. a LoD
Tensor "
"a phi::Dense
Tensor "
"with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
"with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
"and each element is the prior box index. "
"and each element is the prior box index. "
"For example, the batch size is 2, the lod is [[0, 1, 2]], "
"For example, the batch size is 2, the lod is [[0, 1, 2]], "
"the sample 0's box 1(MatchIndices[0][1]) is selected, "
"the sample 0's box 1(MatchIndices[0][1]) is selected, "
"and sample 1's box 0 is selected. The output NegIndices is "
"and sample 1's box 0 is selected. The output NegIndices is "
"[[1], [0]]."
);
"[[1], [0]]."
);
AddOutput
(
"UpdatedMatchIndices"
,
AddOutput
(
"UpdatedMatchIndices"
,
"(Tensor<int>) The output of updated MatchIndices, a tensor with "
"(Tensor<int>) The output of updated MatchIndices, a tensor with "
...
...
paddle/fluid/operators/detection/multiclass_nms_op.cc
浏览文件 @
30a31a53
...
@@ -22,7 +22,6 @@ namespace paddle {
...
@@ -22,7 +22,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
inline
std
::
vector
<
size_t
>
GetNmsLodFromRoisNum
(
inline
std
::
vector
<
size_t
>
GetNmsLodFromRoisNum
(
const
phi
::
DenseTensor
*
rois_num
)
{
const
phi
::
DenseTensor
*
rois_num
)
{
...
@@ -357,11 +356,11 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
...
@@ -357,11 +356,11 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
}
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
boxes
=
ctx
.
Input
<
LoD
Tensor
>
(
"BBoxes"
);
auto
*
boxes
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"BBoxes"
);
auto
*
scores
=
ctx
.
Input
<
LoD
Tensor
>
(
"Scores"
);
auto
*
scores
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Scores"
);
auto
*
outs
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
outs
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
bool
return_index
=
ctx
.
HasOutput
(
"Index"
)
?
true
:
false
;
bool
return_index
=
ctx
.
HasOutput
(
"Index"
)
?
true
:
false
;
auto
index
=
ctx
.
Output
<
LoD
Tensor
>
(
"Index"
);
auto
index
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Index"
);
bool
has_roisnum
=
ctx
.
HasInput
(
"RoisNum"
)
?
true
:
false
;
bool
has_roisnum
=
ctx
.
HasInput
(
"RoisNum"
)
?
true
:
false
;
auto
rois_num
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"RoisNum"
);
auto
rois_num
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"RoisNum"
);
auto
score_dims
=
scores
->
dims
();
auto
score_dims
=
scores
->
dims
();
...
@@ -496,7 +495,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -496,7 +495,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
"predicted locations of M bounding bboxes, N is the batch size. "
"predicted locations of M bounding bboxes, N is the batch size. "
"Each bounding box has four coordinate values and the layout is "
"Each bounding box has four coordinate values and the layout is "
"[xmin, ymin, xmax, ymax], when box size equals to 4."
"[xmin, ymin, xmax, ymax], when box size equals to 4."
"2. (
LoD
Tensor) A 3-D Tensor with shape [M, C, 4]"
"2. (
phi::Dense
Tensor) A 3-D Tensor with shape [M, C, 4]"
"M is the number of bounding boxes, C is the class number"
);
"M is the number of bounding boxes, C is the class number"
);
AddInput
(
"Scores"
,
AddInput
(
"Scores"
,
"Two types of scores are supported:"
"Two types of scores are supported:"
...
@@ -505,7 +504,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -505,7 +504,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
"class number, M is number of bounding boxes. For each category "
"class number, M is number of bounding boxes. For each category "
"there are total M scores which corresponding M bounding boxes. "
"there are total M scores which corresponding M bounding boxes. "
" Please note, M is equal to the 2nd dimension of BBoxes. "
" Please note, M is equal to the 2nd dimension of BBoxes. "
"2. (
LoDTensor) A 2-D LoD
Tensor with shape [M, C]. "
"2. (
phi::DenseTensor) A 2-D phi::Dense
Tensor with shape [M, C]. "
"M is the number of bbox, C is the class number. In this case, "
"M is the number of bbox, C is the class number. In this case, "
"Input BBoxes should be the second case with shape [M, C, 4]."
);
"Input BBoxes should be the second case with shape [M, C, 4]."
);
AddAttr
<
int
>
(
AddAttr
<
int
>
(
...
@@ -540,10 +539,12 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -540,10 +539,12 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
"Whether detections are normalized."
)
"Whether detections are normalized."
)
.
SetDefault
(
true
);
.
SetDefault
(
true
);
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
"represents the "
"detections. Each row has 6 values: "
"detections. Each row has 6 values: "
"[label, confidence, xmin, ymin, xmax, ymax] or "
"[label, confidence, xmin, ymin, xmax, ymax] or "
"(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] "
"represents the "
"detections. Each row has 10 values: "
"detections. Each row has 10 values: "
"[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
"[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
"total number of detections in this mini-batch."
"total number of detections in this mini-batch."
...
@@ -564,7 +565,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
...
@@ -564,7 +565,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
per image if keep_top_k is larger than -1.
per image if keep_top_k is larger than -1.
This operator support multi-class and batched inputs. It applying NMS
This operator support multi-class and batched inputs. It applying NMS
independently for each class. The outputs is a 2-D LoDTenosr, for each
independently for each class. The outputs is a 2-D LoDTenosr, for each
image, the offsets in first dimension of
LoD
Tensor are called LoD, the number
image, the offsets in first dimension of
phi::Dense
Tensor are called LoD, the number
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
means there is no detected bbox for this image.
means there is no detected bbox for this image.
)DOC"
);
)DOC"
);
...
@@ -600,7 +601,8 @@ class MultiClassNMS2OpMaker : public MultiClassNMSOpMaker {
...
@@ -600,7 +601,8 @@ class MultiClassNMS2OpMaker : public MultiClassNMSOpMaker {
void
Make
()
override
{
void
Make
()
override
{
MultiClassNMSOpMaker
::
Make
();
MultiClassNMSOpMaker
::
Make
();
AddOutput
(
"Index"
,
AddOutput
(
"Index"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 1] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 1] "
"represents the "
"index of selected bbox. The index is the absolute index cross "
"index of selected bbox. The index is the absolute index cross "
"batches."
)
"batches."
)
.
AsIntermediate
();
.
AsIntermediate
();
...
...
paddle/fluid/operators/detection/retinanet_detection_output_op.cc
浏览文件 @
30a31a53
...
@@ -19,7 +19,6 @@ namespace paddle {
...
@@ -19,7 +19,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
RetinanetDetectionOutputOp
:
public
framework
::
OperatorWithKernel
{
class
RetinanetDetectionOutputOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
...
@@ -490,8 +489,8 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
...
@@ -490,8 +489,8 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
auto
boxes
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"BBoxes"
);
auto
boxes
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"BBoxes"
);
auto
scores
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Scores"
);
auto
scores
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Scores"
);
auto
anchors
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Anchors"
);
auto
anchors
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Anchors"
);
auto
*
im_info
=
ctx
.
Input
<
LoD
Tensor
>
(
"ImInfo"
);
auto
*
im_info
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"ImInfo"
);
auto
*
outs
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
outs
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
std
::
vector
<
Tensor
>
boxes_list
(
boxes
.
size
());
std
::
vector
<
Tensor
>
boxes_list
(
boxes
.
size
());
std
::
vector
<
Tensor
>
scores_list
(
scores
.
size
());
std
::
vector
<
Tensor
>
scores_list
(
scores
.
size
());
...
@@ -586,7 +585,8 @@ class RetinanetDetectionOutputOpMaker
...
@@ -586,7 +585,8 @@ class RetinanetDetectionOutputOpMaker
"[xmin, ymin, xmax, ymax]."
)
"[xmin, ymin, xmax, ymax]."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
"ImInfo"
,
AddInput
(
"ImInfo"
,
"(LoDTensor) A 2-D LoDTensor with shape [N, 3] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [N, 3] "
"represents the "
"image information. N is the batch size, each image information "
"image information. N is the batch size, each image information "
"includes height, width and scale."
);
"includes height, width and scale."
);
AddAttr
<
float
>
(
"score_threshold"
,
AddAttr
<
float
>
(
"score_threshold"
,
...
@@ -609,7 +609,8 @@ class RetinanetDetectionOutputOpMaker
...
@@ -609,7 +609,8 @@ class RetinanetDetectionOutputOpMaker
"Number of total bounding boxes to be kept per image after NMS "
"Number of total bounding boxes to be kept per image after NMS "
"step."
);
"step."
);
AddOutput
(
"Out"
,
AddOutput
(
"Out"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
"represents the "
"detections. Each row has 6 values: "
"detections. Each row has 6 values: "
"[label, confidence, xmin, ymin, xmax, ymax]"
"[label, confidence, xmin, ymin, xmax, ymax]"
"No is the total number of detections in this mini-batch."
"No is the total number of detections in this mini-batch."
...
@@ -650,7 +651,7 @@ After NMS step, at most keep_top_k number of total bounding boxes are to be kept
...
@@ -650,7 +651,7 @@ After NMS step, at most keep_top_k number of total bounding boxes are to be kept
per image if keep_top_k is larger than -1.
per image if keep_top_k is larger than -1.
This operator support multi-class and batched inputs. It applying NMS
This operator support multi-class and batched inputs. It applying NMS
independently for each class. The outputs is a 2-D LoDTenosr, for each
independently for each class. The outputs is a 2-D LoDTenosr, for each
image, the offsets in first dimension of
LoD
Tensor are called LoD, the number
image, the offsets in first dimension of
phi::Dense
Tensor are called LoD, the number
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
means there is no detected bounding box for this image. If there is no detected boxes
means there is no detected bounding box for this image. If there is no detected boxes
for all images, all the elements in LoD are set to 0, and the output tensor is
for all images, all the elements in LoD are set to 0, and the output tensor is
...
...
paddle/fluid/operators/detection/roi_perspective_transform_op.cc
浏览文件 @
30a31a53
...
@@ -23,7 +23,6 @@ namespace paddle {
...
@@ -23,7 +23,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
T
>
template
<
typename
T
>
bool
GT_E
(
T
a
,
T
b
)
{
bool
GT_E
(
T
a
,
T
b
)
{
...
@@ -504,7 +503,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
...
@@ -504,7 +503,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
rois_dims
.
size
(),
rois_dims
.
size
(),
2
,
2
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"ROIs should be a 2-D
LoD
Tensor of shape (num_rois, 8)"
"ROIs should be a 2-D
phi::Dense
Tensor of shape (num_rois, 8)"
"given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]. But received "
"given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]. But received "
"rois dims is %d"
,
"rois dims is %d"
,
rois_dims
.
size
()));
rois_dims
.
size
()));
...
@@ -512,7 +511,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
...
@@ -512,7 +511,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
rois_dims
[
1
],
rois_dims
[
1
],
8
,
8
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"ROIs should be a 2-D
LoD
Tensor of shape (num_rois, 8)"
"ROIs should be a 2-D
phi::Dense
Tensor of shape (num_rois, 8)"
"given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]. But received %d"
,
"given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]. But received %d"
,
rois_dims
[
1
]));
rois_dims
[
1
]));
...
@@ -608,9 +607,9 @@ class ROIPerspectiveTransformOpMaker
...
@@ -608,9 +607,9 @@ class ROIPerspectiveTransformOpMaker
"H is the height of the feature, and "
"H is the height of the feature, and "
"W is the width of the feature."
);
"W is the width of the feature."
);
AddInput
(
"ROIs"
,
AddInput
(
"ROIs"
,
"(
LoD
Tensor), "
"(
phi::Dense
Tensor), "
"ROIs (Regions of Interest) to be transformed. "
"ROIs (Regions of Interest) to be transformed. "
"should be a 2-D
LoD
Tensor of shape (num_rois, 8)"
"should be a 2-D
phi::Dense
Tensor of shape (num_rois, 8)"
"given as [[x1, y1, x2, y2, x3, y3, x4, y4], ...]."
"given as [[x1, y1, x2, y2, x3, y3, x4, y4], ...]."
"(x1, y1) is the top left coordinates, and "
"(x1, y1) is the top left coordinates, and "
"(x2, y2) is the top right coordinates, and"
"(x2, y2) is the top right coordinates, and"
...
...
paddle/fluid/operators/detection/rpn_target_assign_op.cc
浏览文件 @
30a31a53
...
@@ -22,7 +22,6 @@ namespace paddle {
...
@@ -22,7 +22,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
T
,
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
typename
IndexType
=
Eigen
::
DenseIndex
>
...
@@ -105,7 +104,9 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
...
@@ -105,7 +104,9 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
};
};
template
<
typename
T
>
template
<
typename
T
>
void
AppendRpns
(
LoDTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
void
AppendRpns
(
phi
::
DenseTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
auto
*
out_data
=
out
->
data
<
T
>
();
auto
*
out_data
=
out
->
data
<
T
>
();
auto
*
to_add_data
=
to_add
->
data
<
T
>
();
auto
*
to_add_data
=
to_add
->
data
<
T
>
();
memcpy
(
out_data
+
offset
,
to_add_data
,
to_add
->
numel
()
*
sizeof
(
T
));
memcpy
(
out_data
+
offset
,
to_add_data
,
to_add
->
numel
()
*
sizeof
(
T
));
...
@@ -395,15 +396,16 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
...
@@ -395,15 +396,16 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
anchor
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Anchor"
);
// (H*W*A) * 4
auto
*
anchor
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Anchor"
);
// (H*W*A) * 4
auto
*
gt_boxes
=
context
.
Input
<
LoD
Tensor
>
(
"GtBoxes"
);
auto
*
gt_boxes
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"GtBoxes"
);
auto
*
is_crowd
=
context
.
Input
<
LoD
Tensor
>
(
"IsCrowd"
);
auto
*
is_crowd
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"IsCrowd"
);
auto
*
im_info
=
context
.
Input
<
LoD
Tensor
>
(
"ImInfo"
);
auto
*
im_info
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"ImInfo"
);
auto
*
loc_index
=
context
.
Output
<
LoDTensor
>
(
"LocationIndex"
);
auto
*
loc_index
=
context
.
Output
<
phi
::
DenseTensor
>
(
"LocationIndex"
);
auto
*
score_index
=
context
.
Output
<
LoDTensor
>
(
"ScoreIndex"
);
auto
*
score_index
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ScoreIndex"
);
auto
*
tgt_bbox
=
context
.
Output
<
LoDTensor
>
(
"TargetBBox"
);
auto
*
tgt_bbox
=
context
.
Output
<
phi
::
DenseTensor
>
(
"TargetBBox"
);
auto
*
tgt_lbl
=
context
.
Output
<
LoDTensor
>
(
"TargetLabel"
);
auto
*
tgt_lbl
=
context
.
Output
<
phi
::
DenseTensor
>
(
"TargetLabel"
);
auto
*
bbox_inside_weight
=
context
.
Output
<
LoDTensor
>
(
"BBoxInsideWeight"
);
auto
*
bbox_inside_weight
=
context
.
Output
<
phi
::
DenseTensor
>
(
"BBoxInsideWeight"
);
PADDLE_ENFORCE_EQ
(
gt_boxes
->
lod
().
size
(),
PADDLE_ENFORCE_EQ
(
gt_boxes
->
lod
().
size
(),
1UL
,
1UL
,
...
@@ -598,11 +600,11 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -598,11 +600,11 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Anchor"
,
AddInput
(
"Anchor"
,
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."
);
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."
);
AddInput
(
"GtBoxes"
,
AddInput
(
"GtBoxes"
,
"(
LoD
Tensor) input ground-truth bbox with shape [K, 4]."
);
"(
phi::Dense
Tensor) input ground-truth bbox with shape [K, 4]."
);
AddInput
(
"IsCrowd"
,
AddInput
(
"IsCrowd"
,
"(
LoD
Tensor) input which indicates ground-truth is crowd."
);
"(
phi::Dense
Tensor) input which indicates ground-truth is crowd."
);
AddInput
(
"ImInfo"
,
AddInput
(
"ImInfo"
,
"(
LoD
Tensor) input image information with shape [N, 3]. "
"(
phi::Dense
Tensor) input image information with shape [N, 3]. "
"N is the batch size, each image information includes height, "
"N is the batch size, each image information includes height, "
"width and scale."
);
"width and scale."
);
AddAttr
<
int
>
(
"rpn_batch_size_per_im"
,
AddAttr
<
int
>
(
"rpn_batch_size_per_im"
,
...
@@ -685,13 +687,13 @@ class RetinanetTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -685,13 +687,13 @@ class RetinanetTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Anchor"
,
AddInput
(
"Anchor"
,
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."
);
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."
);
AddInput
(
"GtBoxes"
,
AddInput
(
"GtBoxes"
,
"(
LoD
Tensor) input ground-truth bbox with shape [K, 4]."
);
"(
phi::Dense
Tensor) input ground-truth bbox with shape [K, 4]."
);
AddInput
(
"GtLabels"
,
AddInput
(
"GtLabels"
,
"(
LoD
Tensor) input ground-truth label with shape [K, 1]."
);
"(
phi::Dense
Tensor) input ground-truth label with shape [K, 1]."
);
AddInput
(
"IsCrowd"
,
AddInput
(
"IsCrowd"
,
"(
LoD
Tensor) input which indicates ground-truth is crowd."
);
"(
phi::Dense
Tensor) input which indicates ground-truth is crowd."
);
AddInput
(
"ImInfo"
,
AddInput
(
"ImInfo"
,
"(
LoD
Tensor) input image information with shape [N, 3]. "
"(
phi::Dense
Tensor) input image information with shape [N, 3]. "
"N is the batch size, each image information includes height, "
"N is the batch size, each image information includes height, "
"width and scale."
);
"width and scale."
);
AddAttr
<
float
>
(
AddAttr
<
float
>
(
...
@@ -994,17 +996,18 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
...
@@ -994,17 +996,18 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
anchor
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Anchor"
);
// (H*W*A) * 4
auto
*
anchor
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Anchor"
);
// (H*W*A) * 4
auto
*
gt_boxes
=
context
.
Input
<
LoDTensor
>
(
"GtBoxes"
);
auto
*
gt_boxes
=
context
.
Input
<
phi
::
DenseTensor
>
(
"GtBoxes"
);
auto
*
gt_labels
=
context
.
Input
<
LoDTensor
>
(
"GtLabels"
);
auto
*
gt_labels
=
context
.
Input
<
phi
::
DenseTensor
>
(
"GtLabels"
);
auto
*
is_crowd
=
context
.
Input
<
LoDTensor
>
(
"IsCrowd"
);
auto
*
is_crowd
=
context
.
Input
<
phi
::
DenseTensor
>
(
"IsCrowd"
);
auto
*
im_info
=
context
.
Input
<
LoDTensor
>
(
"ImInfo"
);
auto
*
im_info
=
context
.
Input
<
phi
::
DenseTensor
>
(
"ImInfo"
);
auto
*
loc_index
=
context
.
Output
<
LoDTensor
>
(
"LocationIndex"
);
auto
*
loc_index
=
context
.
Output
<
phi
::
DenseTensor
>
(
"LocationIndex"
);
auto
*
score_index
=
context
.
Output
<
LoDTensor
>
(
"ScoreIndex"
);
auto
*
score_index
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ScoreIndex"
);
auto
*
tgt_bbox
=
context
.
Output
<
LoDTensor
>
(
"TargetBBox"
);
auto
*
tgt_bbox
=
context
.
Output
<
phi
::
DenseTensor
>
(
"TargetBBox"
);
auto
*
tgt_lbl
=
context
.
Output
<
LoDTensor
>
(
"TargetLabel"
);
auto
*
tgt_lbl
=
context
.
Output
<
phi
::
DenseTensor
>
(
"TargetLabel"
);
auto
*
bbox_inside_weight
=
context
.
Output
<
LoDTensor
>
(
"BBoxInsideWeight"
);
auto
*
bbox_inside_weight
=
auto
*
fg_num
=
context
.
Output
<
LoDTensor
>
(
"ForegroundNumber"
);
context
.
Output
<
phi
::
DenseTensor
>
(
"BBoxInsideWeight"
);
auto
*
fg_num
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ForegroundNumber"
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
gt_boxes
->
lod
().
size
(),
gt_boxes
->
lod
().
size
(),
...
...
paddle/fluid/operators/detection/target_assign_op.cc
浏览文件 @
30a31a53
...
@@ -89,7 +89,8 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -89,7 +89,8 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"X"
,
AddInput
(
"X"
,
"(LoDTensor), This input is a 3D LoDTensor with shape [M, P, K]. "
"(phi::DenseTensor), This input is a 3D phi::DenseTensor with "
"shape [M, P, K]. "
"Some elements in X will be assigned to Out based on the "
"Some elements in X will be assigned to Out based on the "
"MatchIndices and NegIndices."
);
"MatchIndices and NegIndices."
);
AddInput
(
"MatchIndices"
,
AddInput
(
"MatchIndices"
,
...
@@ -97,7 +98,8 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -97,7 +98,8 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"with shape [N, P], If MatchIndices[i][j] is -1, the j-th entity "
"with shape [N, P], If MatchIndices[i][j] is -1, the j-th entity "
"of column is not matched to any entity of row in i-th instance."
);
"of column is not matched to any entity of row in i-th instance."
);
AddInput
(
"NegIndices"
,
AddInput
(
"NegIndices"
,
"(LoDTensor, default LoDTensor<int>), The input negative example "
"(phi::DenseTensor, default phi::DenseTensor<int>), The input "
"negative example "
"indices are an optional input with shape [Neg, 1], where Neg is "
"indices are an optional input with shape [Neg, 1], where Neg is "
"the total number of negative example indices."
)
"the total number of negative example indices."
)
.
AsDispensable
();
.
AsDispensable
();
...
...
paddle/fluid/operators/elementwise/elementwise_add_op.cc
浏览文件 @
30a31a53
...
@@ -31,15 +31,17 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker {
...
@@ -31,15 +31,17 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker {
std
::
string
GetEquation
()
const
override
{
return
"Out = X + Y"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = X + Y"
;
}
void
AddInputX
()
override
{
void
AddInputX
()
override
{
AddInput
(
"X"
,
AddInput
(
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"X"
,
"should be int32, int64, float32, float64."
);
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
}
void
AddInputY
()
override
{
void
AddInputY
()
override
{
AddInput
(
"Y"
,
AddInput
(
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"Y"
,
"should be int32, int64, float32, float64."
);
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
}
std
::
string
GetOpFuntionality
()
const
override
{
std
::
string
GetOpFuntionality
()
const
override
{
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.cc
浏览文件 @
30a31a53
...
@@ -29,15 +29,17 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker {
...
@@ -29,15 +29,17 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker {
std
::
string
GetEquation
()
const
override
{
return
"Out = X / Y"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = X / Y"
;
}
void
AddInputX
()
override
{
void
AddInputX
()
override
{
AddInput
(
"X"
,
AddInput
(
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"X"
,
"should be int32, int64, float32, float64."
);
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
}
void
AddInputY
()
override
{
void
AddInputY
()
override
{
AddInput
(
"Y"
,
AddInput
(
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"Y"
,
"should be int32, int64, float32, float64."
);
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
}
std
::
string
GetOpFuntionality
()
const
override
{
std
::
string
GetOpFuntionality
()
const
override
{
...
...
paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
浏览文件 @
30a31a53
...
@@ -35,15 +35,17 @@ class ElementwiseFloorDivOpMaker : public ElementwiseOpMaker {
...
@@ -35,15 +35,17 @@ class ElementwiseFloorDivOpMaker : public ElementwiseOpMaker {
std
::
string
GetEquation
()
const
override
{
return
"Out = X // Y"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = X // Y"
;
}
void
AddInputX
()
override
{
void
AddInputX
()
override
{
AddInput
(
"X"
,
AddInput
(
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"X"
,
"should be int32, int64."
);
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64."
);
}
}
void
AddInputY
()
override
{
void
AddInputY
()
override
{
AddInput
(
"Y"
,
AddInput
(
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"Y"
,
"should be int32, int64."
);
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64."
);
}
}
std
::
string
GetOpFuntionality
()
const
override
{
std
::
string
GetOpFuntionality
()
const
override
{
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.cc
浏览文件 @
30a31a53
...
@@ -28,15 +28,17 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker {
...
@@ -28,15 +28,17 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker {
std
::
string
GetEquation
()
const
override
{
return
"Out = X
\\\\
odot Y"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = X
\\\\
odot Y"
;
}
void
AddInputX
()
override
{
void
AddInputX
()
override
{
AddInput
(
"X"
,
AddInput
(
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"X"
,
"should be int32, int64, float32, float64."
);
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
}
void
AddInputY
()
override
{
void
AddInputY
()
override
{
AddInput
(
"Y"
,
AddInput
(
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"Y"
,
"should be int32, int64, float32, float64."
);
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
}
std
::
string
GetOpFuntionality
()
const
override
{
std
::
string
GetOpFuntionality
()
const
override
{
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
30a31a53
...
@@ -43,13 +43,14 @@ class ElementwiseOp : public framework::OperatorWithKernel {
...
@@ -43,13 +43,14 @@ class ElementwiseOp : public framework::OperatorWithKernel {
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"Y"
),
"Input"
,
"Y"
,
"ElementwiseOp"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"Y"
),
"Input"
,
"Y"
,
"ElementwiseOp"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"ElementwiseOp"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"ElementwiseOp"
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Y"
).
front
(),
PADDLE_ENFORCE_EQ
(
framework
::
proto
::
VarType
::
LOD_TENSOR
,
ctx
->
GetInputsVarType
(
"Y"
).
front
(),
platform
::
errors
::
InvalidArgument
(
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the "
platform
::
errors
::
InvalidArgument
(
"received is %s [%s]."
,
"The input var's type should be phi::DenseTensor, but the "
ctx
->
GetInputsVarType
(
"Y"
).
front
(),
"received is %s [%s]."
,
ctx
->
Inputs
(
"Y"
).
front
()));
ctx
->
GetInputsVarType
(
"Y"
).
front
(),
ctx
->
Inputs
(
"Y"
).
front
()));
if
(
ctx
->
GetInputsVarType
(
"X"
).
front
()
==
if
(
ctx
->
GetInputsVarType
(
"X"
).
front
()
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
)
{
framework
::
proto
::
VarType
::
SELECTED_ROWS
)
{
...
...
paddle/fluid/operators/elementwise/elementwise_op_function.h
浏览文件 @
30a31a53
...
@@ -61,11 +61,11 @@ namespace operators {
...
@@ -61,11 +61,11 @@ namespace operators {
/*
/*
* Pack input and output tensors into respective vectors with
* Pack input and output tensors into respective vectors with
* consideration of varible X`s class type.
* consideration of varible X`s class type.
* Input variable X is supported to be whether
LoD
Tensor or
* Input variable X is supported to be whether
phi::Dense
Tensor or
* SelectedRows class type in this package function, once X
* SelectedRows class type in this package function, once X
* was SelectedRows type, a valid pointer x_for_selectedrows
* was SelectedRows type, a valid pointer x_for_selectedrows
* is excepted to be passed in from op kernel for acquisition
* is excepted to be passed in from op kernel for acquisition
* of the valid address of
LoD
Tensor created ahead in the function.
* of the valid address of
phi::Dense
Tensor created ahead in the function.
*/
*/
template
<
typename
OutT
>
template
<
typename
OutT
>
int
PackTensorsIntoVector
(
const
framework
::
ExecutionContext
&
ctx
,
int
PackTensorsIntoVector
(
const
framework
::
ExecutionContext
&
ctx
,
...
@@ -112,7 +112,7 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx,
...
@@ -112,7 +112,7 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx,
}
else
{
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"X's type[%s] is not supported by elementwise_op. X's type should be "
"X's type[%s] is not supported by elementwise_op. X's type should be "
"
LoD
Tensor or SelectedRows."
,
"
phi::Dense
Tensor or SelectedRows."
,
framework
::
ToTypeName
(
x_var
->
Type
())));
framework
::
ToTypeName
(
x_var
->
Type
())));
}
}
z
->
mutable_data
<
OutT
>
(
ctx
.
GetPlace
());
z
->
mutable_data
<
OutT
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
浏览文件 @
30a31a53
...
@@ -34,15 +34,17 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker {
...
@@ -34,15 +34,17 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker {
std
::
string
GetEquation
()
const
override
{
return
"Out = X - Y"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = X - Y"
;
}
void
AddInputX
()
override
{
void
AddInputX
()
override
{
AddInput
(
"X"
,
AddInput
(
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"X"
,
"should be int32, int64, float32, float64."
);
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
}
void
AddInputY
()
override
{
void
AddInputY
()
override
{
AddInput
(
"Y"
,
AddInput
(
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"Y"
,
"should be int32, int64, float32, float64."
);
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
}
std
::
string
GetOpFuntionality
()
const
override
{
std
::
string
GetOpFuntionality
()
const
override
{
...
...
paddle/fluid/operators/elementwise/elementwise_xpu.h
浏览文件 @
30a31a53
...
@@ -43,8 +43,8 @@ void XPUElementwise(const framework::ExecutionContext& ctx,
...
@@ -43,8 +43,8 @@ void XPUElementwise(const framework::ExecutionContext& ctx,
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
x_var
->
IsType
<
phi
::
DenseTensor
>
(),
x_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"XPU only support phi::DenseTensor, "
"XPU only support LoDTensor, Input(X) is not LoD
Tensor"
));
"Input(X) is not phi::Dense
Tensor"
));
auto
x
=
x_var
->
Get
<
phi
::
DenseTensor
>
();
auto
x
=
x_var
->
Get
<
phi
::
DenseTensor
>
();
auto
*
y
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Y"
);
auto
*
y
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Y"
);
...
...
paddle/fluid/operators/fused/fused_bn_activation_op.cc
浏览文件 @
30a31a53
...
@@ -24,8 +24,6 @@ limitations under the License. */
...
@@ -24,8 +24,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
void
FusedBatchNormActOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
void
FusedBatchNormActOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"X"
),
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"X"
),
true
,
true
,
...
@@ -309,8 +307,8 @@ framework::OpKernelType FusedBatchNormActGradOp::GetExpectedKernelType(
...
@@ -309,8 +307,8 @@ framework::OpKernelType FusedBatchNormActGradOp::GetExpectedKernelType(
const
Tensor
*
t
=
nullptr
;
const
Tensor
*
t
=
nullptr
;
if
(
var
->
IsType
<
Tensor
>
())
{
if
(
var
->
IsType
<
Tensor
>
())
{
t
=
&
var
->
Get
<
Tensor
>
();
t
=
&
var
->
Get
<
Tensor
>
();
}
else
if
(
var
->
IsType
<
LoD
Tensor
>
())
{
}
else
if
(
var
->
IsType
<
phi
::
Dense
Tensor
>
())
{
t
=
&
var
->
Get
<
LoD
Tensor
>
();
t
=
&
var
->
Get
<
phi
::
Dense
Tensor
>
();
}
}
if
(
t
==
nullptr
)
{
if
(
t
==
nullptr
)
{
PADDLE_THROW
(
PADDLE_THROW
(
...
...
paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
浏览文件 @
30a31a53
...
@@ -23,8 +23,6 @@ limitations under the License. */
...
@@ -23,8 +23,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
void
FusedBatchNormAddActOp
::
InferShape
(
void
FusedBatchNormAddActOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
framework
::
InferShapeContext
*
ctx
)
const
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"FusedBatchNormAddActOp"
);
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"FusedBatchNormAddActOp"
);
...
@@ -267,8 +265,8 @@ framework::OpKernelType FusedBatchNormAddActGradOp::GetExpectedKernelType(
...
@@ -267,8 +265,8 @@ framework::OpKernelType FusedBatchNormAddActGradOp::GetExpectedKernelType(
const
Tensor
*
t
=
nullptr
;
const
Tensor
*
t
=
nullptr
;
if
(
var
->
IsType
<
Tensor
>
())
{
if
(
var
->
IsType
<
Tensor
>
())
{
t
=
&
var
->
Get
<
Tensor
>
();
t
=
&
var
->
Get
<
Tensor
>
();
}
else
if
(
var
->
IsType
<
LoD
Tensor
>
())
{
}
else
if
(
var
->
IsType
<
phi
::
Dense
Tensor
>
())
{
t
=
&
var
->
Get
<
LoD
Tensor
>
();
t
=
&
var
->
Get
<
phi
::
Dense
Tensor
>
();
}
}
if
(
t
==
nullptr
)
{
if
(
t
==
nullptr
)
{
PADDLE_THROW
(
PADDLE_THROW
(
...
...
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
浏览文件 @
30a31a53
...
@@ -213,23 +213,25 @@ void FusedEmbeddingFCLSTMOpMaker::Make() {
...
@@ -213,23 +213,25 @@ void FusedEmbeddingFCLSTMOpMaker::Make() {
"input. This is a tensor with shape (N x D), where N is the "
"input. This is a tensor with shape (N x D), where N is the "
"batch size. `H0` and `C0` can be NULL but only at the same time."
)
"batch size. `H0` and `C0` can be NULL but only at the same time."
)
.
AsDispensable
();
.
AsDispensable
();
AddOutput
(
"Hidden"
,
AddOutput
(
"(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. "
"Hidden"
,
"The shape is (T x D), and lod is the same with the `Input`."
);
"(phi::DenseTensor) (same as LSTMOp) the hidden state of LSTM operator. "
AddOutput
(
"Cell"
,
"The shape is (T x D), and lod is the same with the `Input`."
);
"(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. "
AddOutput
(
"The shape is (T x D), and lod is the same with the `Input`."
);
"Cell"
,
"(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"XX"
,
AddOutput
(
"XX"
,
"(
LoD
Tensor) the result after X * WeightX (size is T x 4D)"
"(
phi::Dense
Tensor) the result after X * WeightX (size is T x 4D)"
" or batched_X (size is T x M), this will be automatically chosen,"
" or batched_X (size is T x M), this will be automatically chosen,"
" where T is the total time steps in this mini-batch,"
" where T is the total time steps in this mini-batch,"
" D is the hidden size, M is the dim size of x input."
)
" D is the hidden size, M is the dim size of x input."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
"(
LoD
Tensor) (T x 4D)."
).
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
"(
phi::Dense
Tensor) (T x 4D)."
).
AsIntermediate
();
AddOutput
(
"BatchedHidden"
,
"(
LoD
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedHidden"
,
"(
phi::Dense
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedCell"
,
"(
LoD
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedCell"
,
"(
phi::Dense
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedH0"
,
"(
LoD
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedH0"
,
"(
phi::Dense
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedC0"
,
"(
LoD
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedC0"
,
"(
phi::Dense
Tensor) (N x D)."
).
AsIntermediate
();
AddAttr
<
bool
>
(
"use_peepholes"
,
AddAttr
<
bool
>
(
"use_peepholes"
,
"(bool, default: True) "
"(bool, default: True) "
"whether to enable diagonal/peephole connections."
)
"whether to enable diagonal/peephole connections."
)
...
@@ -286,15 +288,15 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
...
@@ -286,15 +288,15 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
}
}
#define INIT_BASE_INPUT_OUTPUT \
#define INIT_BASE_INPUT_OUTPUT \
auto* ids = ctx.Input<
LoDTensor>("Ids");
\
auto* ids = ctx.Input<
phi::DenseTensor>("Ids");
\
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* c0 = ctx.Input<phi::DenseTensor>("C0"); \
auto* c0 = ctx.Input<phi::DenseTensor>("C0"); \
auto* embeddings = ctx.Input<phi::DenseTensor>("Embeddings"); \
auto* embeddings = ctx.Input<phi::DenseTensor>("Embeddings"); \
auto* wh = ctx.Input<phi::DenseTensor>("WeightH"); \
auto* wh = ctx.Input<phi::DenseTensor>("WeightH"); \
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); \
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); \
auto* xx = ctx.Output<
LoDTensor>("XX");
\
auto* xx = ctx.Output<
phi::DenseTensor>("XX");
\
auto* hidden_out = ctx.Output<
LoDTensor>("Hidden");
\
auto* hidden_out = ctx.Output<
phi::DenseTensor>("Hidden");
\
auto* cell_out = ctx.Output<
LoDTensor>("Cell");
\
auto* cell_out = ctx.Output<
phi::DenseTensor>("Cell");
\
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
bool use_peepholes = ctx.Attr<bool>("use_peepholes");
bool use_peepholes = ctx.Attr<bool>("use_peepholes");
...
@@ -508,9 +510,9 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
...
@@ -508,9 +510,9 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
auto
*
reordered_h0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedH0"
);
auto
*
reordered_h0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedH0"
);
auto
*
reordered_c0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedC0"
);
auto
*
reordered_c0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedC0"
);
auto
*
batched_input
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedInput"
);
auto
*
batched_input
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedInput"
);
auto
*
batched_c_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedCell"
);
auto
*
batched_c_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedCell"
);
auto
*
batched_h_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedHidden"
);
auto
*
batched_h_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedHidden"
);
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_c_out_data
=
batched_c_out
->
mutable_data
<
T
>
(
place
);
T
*
batched_c_out_data
=
batched_c_out
->
mutable_data
<
T
>
(
place
);
...
...
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
浏览文件 @
30a31a53
...
@@ -18,7 +18,6 @@ limitations under the License. */
...
@@ -18,7 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusedEmbeddingFCLSTMOp
:
public
framework
::
OperatorWithKernel
{
class
FusedEmbeddingFCLSTMOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
浏览文件 @
30a31a53
...
@@ -162,7 +162,7 @@ class FusedEmbeddingSeqPoolOpGradVarTypeInference
...
@@ -162,7 +162,7 @@ class FusedEmbeddingSeqPoolOpGradVarTypeInference
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
{
}
else
{
VLOG
(
3
)
<<
"fused_embedding_seq_pool_grad op "
VLOG
(
3
)
<<
"fused_embedding_seq_pool_grad op "
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to
LoD
Tensor"
;
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to
phi::Dense
Tensor"
;
ctx
->
SetOutputType
(
out_var_name
,
framework
::
proto
::
VarType
::
LOD_TENSOR
);
ctx
->
SetOutputType
(
out_var_name
,
framework
::
proto
::
VarType
::
LOD_TENSOR
);
}
}
ctx
->
SetOutputDataType
(
out_var_name
,
ctx
->
GetInputDataType
(
"W"
));
ctx
->
SetOutputDataType
(
out_var_name
,
ctx
->
GetInputDataType
(
"W"
));
...
...
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
浏览文件 @
30a31a53
...
@@ -29,7 +29,6 @@ namespace paddle {
...
@@ -29,7 +29,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
using
SelectedRows
=
phi
::
SelectedRows
;
using
SelectedRows
=
phi
::
SelectedRows
;
using
DDim
=
framework
::
DDim
;
using
DDim
=
framework
::
DDim
;
...
@@ -83,9 +82,9 @@ void prepare_csr_data(const std::vector<uint64_t> &offset,
...
@@ -83,9 +82,9 @@ void prepare_csr_data(const std::vector<uint64_t> &offset,
template
<
typename
T
>
template
<
typename
T
>
struct
EmbeddingVSumFunctor
{
struct
EmbeddingVSumFunctor
{
void
operator
()(
const
framework
::
ExecutionContext
&
context
,
void
operator
()(
const
framework
::
ExecutionContext
&
context
,
const
LoD
Tensor
*
table_t
,
const
phi
::
Dense
Tensor
*
table_t
,
const
LoD
Tensor
*
ids_t
,
const
phi
::
Dense
Tensor
*
ids_t
,
LoD
Tensor
*
output_t
)
{
phi
::
Dense
Tensor
*
output_t
)
{
auto
*
table
=
table_t
->
data
<
T
>
();
auto
*
table
=
table_t
->
data
<
T
>
();
int64_t
table_height
=
table_t
->
dims
()[
0
];
int64_t
table_height
=
table_t
->
dims
()[
0
];
int64_t
table_width
=
table_t
->
dims
()[
1
];
int64_t
table_width
=
table_t
->
dims
()[
1
];
...
@@ -141,9 +140,11 @@ template <typename T>
...
@@ -141,9 +140,11 @@ template <typename T>
class
FusedEmbeddingSeqPoolKernel
:
public
framework
::
OpKernel
<
T
>
{
class
FusedEmbeddingSeqPoolKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
LoDTensor
*
ids_t
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
// int tensor
const
phi
::
DenseTensor
*
ids_t
=
LoDTensor
*
output_t
=
context
.
Output
<
LoDTensor
>
(
"Out"
);
// float tensor
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
// int tensor
const
LoDTensor
*
table_var
=
context
.
Input
<
LoDTensor
>
(
"W"
);
phi
::
DenseTensor
*
output_t
=
context
.
Output
<
phi
::
DenseTensor
>
(
"Out"
);
// float tensor
const
phi
::
DenseTensor
*
table_var
=
context
.
Input
<
phi
::
DenseTensor
>
(
"W"
);
const
std
::
string
&
combiner_type
=
context
.
Attr
<
std
::
string
>
(
"combiner"
);
const
std
::
string
&
combiner_type
=
context
.
Attr
<
std
::
string
>
(
"combiner"
);
int64_t
last_dim
=
int64_t
last_dim
=
...
@@ -228,23 +229,24 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
...
@@ -228,23 +229,24 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
table_var
=
context
.
InputVar
(
"W"
);
auto
*
table_var
=
context
.
InputVar
(
"W"
);
DDim
table_dim
;
DDim
table_dim
;
if
(
table_var
->
IsType
<
LoD
Tensor
>
())
{
if
(
table_var
->
IsType
<
phi
::
Dense
Tensor
>
())
{
table_dim
=
context
.
Input
<
LoD
Tensor
>
(
"W"
)
->
dims
();
table_dim
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"W"
)
->
dims
();
}
else
if
(
table_var
->
IsType
<
phi
::
SelectedRows
>
())
{
}
else
if
(
table_var
->
IsType
<
phi
::
SelectedRows
>
())
{
auto
*
table_t
=
context
.
Input
<
phi
::
SelectedRows
>
(
"W"
);
auto
*
table_t
=
context
.
Input
<
phi
::
SelectedRows
>
(
"W"
);
table_dim
=
table_t
->
value
().
dims
();
table_dim
=
table_t
->
value
().
dims
();
}
else
{
}
else
{
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"The parameter W of a LookupTable "
"The parameter W of a LookupTable "
"must be either
LoD
Tensor or SelectedRows."
));
"must be either
phi::Dense
Tensor or SelectedRows."
));
}
}
bool
is_sparse
=
context
.
Attr
<
bool
>
(
"is_sparse"
);
bool
is_sparse
=
context
.
Attr
<
bool
>
(
"is_sparse"
);
// Since paddings are not trainable and fixed in forward, the gradient of
// Since paddings are not trainable and fixed in forward, the gradient of
// paddings makes no sense and we don't deal with it in backward.
// paddings makes no sense and we don't deal with it in backward.
if
(
is_sparse
)
{
if
(
is_sparse
)
{
auto
*
ids
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
*
ids
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
auto
*
d_output
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_output
=
context
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_table
=
auto
*
d_table
=
context
.
Output
<
phi
::
SelectedRows
>
(
framework
::
GradVarName
(
"W"
));
context
.
Output
<
phi
::
SelectedRows
>
(
framework
::
GradVarName
(
"W"
));
// runtime shape
// runtime shape
...
@@ -276,9 +278,11 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
...
@@ -276,9 +278,11 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
}
else
{
}
else
{
#if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
#if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
!defined(__OSX__)
!defined(__OSX__)
auto
*
ids
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
*
ids
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
auto
*
d_output
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_output
=
auto
*
d_table
=
context
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
context
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_table
=
context
.
Output
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"W"
));
int64_t
padding_idx
=
context
.
Attr
<
int64_t
>
(
"padding_idx"
);
int64_t
padding_idx
=
context
.
Attr
<
int64_t
>
(
"padding_idx"
);
d_table
->
Resize
(
table_dim
);
d_table
->
Resize
(
table_dim
);
...
...
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
浏览文件 @
30a31a53
...
@@ -95,7 +95,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel {
...
@@ -95,7 +95,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel {
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
inputs
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"X"
);
auto
inputs
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
input_data_type
=
framework
::
proto
::
VarType
::
Type
(
0
);
auto
input_data_type
=
framework
::
proto
::
VarType
::
Type
(
0
);
bool
flag
=
0
;
bool
flag
=
0
;
for
(
auto
*
input
:
inputs
)
{
for
(
auto
*
input
:
inputs
)
{
...
@@ -121,7 +121,7 @@ class FusedSeqpoolCVMOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -121,7 +121,7 @@ class FusedSeqpoolCVMOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"X"
,
AddInput
(
"X"
,
"(vector<
LoD
Tensor>) The input tensors of"
"(vector<
phi::Dense
Tensor>) The input tensors of"
" operator."
)
" operator."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
"CVM"
,
AddInput
(
"CVM"
,
...
...
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
浏览文件 @
30a31a53
...
@@ -424,7 +424,7 @@ template <typename T>
...
@@ -424,7 +424,7 @@ template <typename T>
class
FusedSeqpoolCVMCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
class
FusedSeqpoolCVMCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
inputs
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"X"
);
auto
inputs
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
outputs
=
ctx
.
MultiOutput
<
phi
::
DenseTensor
>
(
"Out"
);
auto
outputs
=
ctx
.
MultiOutput
<
phi
::
DenseTensor
>
(
"Out"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
const
auto
slot_size
=
inputs
.
size
();
const
auto
slot_size
=
inputs
.
size
();
...
@@ -432,7 +432,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
...
@@ -432,7 +432,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
std
::
vector
<
const
size_t
*>
lods_data
(
slot_size
);
std
::
vector
<
const
size_t
*>
lods_data
(
slot_size
);
std
::
vector
<
T
*>
output_data
(
slot_size
);
std
::
vector
<
T
*>
output_data
(
slot_size
);
std
::
vector
<
LoD
Tensor
>
seqpool_outputs
(
slot_size
);
std
::
vector
<
phi
::
Dense
Tensor
>
seqpool_outputs
(
slot_size
);
std
::
vector
<
T
*>
seqpool_output_data
(
slot_size
);
std
::
vector
<
T
*>
seqpool_output_data
(
slot_size
);
auto
padding_value
=
ctx
.
Attr
<
float
>
(
"pad_value"
);
auto
padding_value
=
ctx
.
Attr
<
float
>
(
"pad_value"
);
...
@@ -509,9 +509,11 @@ template <typename T>
...
@@ -509,9 +509,11 @@ template <typename T>
class
FusedSeqpoolCVMGradCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
class
FusedSeqpoolCVMGradCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
out_grads
=
ctx
.
MultiInput
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
out_grads
=
auto
in_grads
=
ctx
.
MultiOutput
<
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
cvm
=
ctx
.
Input
<
LoDTensor
>
(
"CVM"
);
auto
in_grads
=
ctx
.
MultiOutput
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
cvm
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"CVM"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
std
::
string
pooltype
=
ctx
.
Attr
<
std
::
string
>
(
"pooltype"
);
std
::
string
pooltype
=
ctx
.
Attr
<
std
::
string
>
(
"pooltype"
);
auto
use_cvm
=
ctx
.
Attr
<
bool
>
(
"use_cvm"
);
auto
use_cvm
=
ctx
.
Attr
<
bool
>
(
"use_cvm"
);
...
...
paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
浏览文件 @
30a31a53
...
@@ -23,8 +23,6 @@ limitations under the License. */
...
@@ -23,8 +23,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
T
>
template
<
typename
T
>
class
FusedSeqpoolCVMOpCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
FusedSeqpoolCVMOpCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
...
...
paddle/fluid/operators/fused/fusion_group_op.cc
浏览文件 @
30a31a53
...
@@ -87,10 +87,10 @@ class FusionGroupOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -87,10 +87,10 @@ class FusionGroupOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"Inputs"
,
AddInput
(
"Inputs"
,
"(std::vector<
LoD
Tensor>) The inputs of fusion_group op."
)
"(std::vector<
phi::Dense
Tensor>) The inputs of fusion_group op."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
"Outs"
,
AddOutput
(
"Outs"
,
"(std::vector<
LoD
Tensor>) The outputs of fusion_group op."
)
"(std::vector<
phi::Dense
Tensor>) The outputs of fusion_group op."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddAttr
<
std
::
vector
<
int
>>
(
"outs_dtype"
,
AddAttr
<
std
::
vector
<
int
>>
(
"outs_dtype"
,
"The data type of Outputs in fusion_group op."
)
"The data type of Outputs in fusion_group op."
)
...
...
paddle/fluid/operators/fused/fusion_gru_op.cc
浏览文件 @
30a31a53
...
@@ -154,11 +154,12 @@ framework::OpKernelType FusionGRUOp::GetExpectedKernelType(
...
@@ -154,11 +154,12 @@ framework::OpKernelType FusionGRUOp::GetExpectedKernelType(
}
}
void
FusionGRUOpMaker
::
Make
()
{
void
FusionGRUOpMaker
::
Make
()
{
AddInput
(
"X"
,
AddInput
(
"(LoDTensor) the input is a LodTensor, which support "
"X"
,
"variable-time length input sequence. The underlying tensor in "
"(phi::DenseTensor) the input is a LodTensor, which support "
"this LoDTensor is a matrix with shape (T X M), where T is the "
"variable-time length input sequence. The underlying tensor in "
"total time steps in this mini-batch, M is the dim size of x."
);
"this phi::DenseTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"H0"
,
AddInput
(
"H0"
,
"(Tensor, optional) The initial hidden state is an optional "
"(Tensor, optional) The initial hidden state is an optional "
"input. This is a tensor with shape (N x D), where N is the "
"input. This is a tensor with shape (N x D), where N is the "
...
@@ -181,18 +182,18 @@ void FusionGRUOpMaker::Make() {
...
@@ -181,18 +182,18 @@ void FusionGRUOpMaker::Make() {
AddOutput
(
"ReorderedH0"
,
"(Tensor) (N x D), which N is the min-batch size."
)
AddOutput
(
"ReorderedH0"
,
"(Tensor) (N x D), which N is the min-batch size."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddOutput
(
"XX"
,
AddOutput
(
"XX"
,
"(
LoD
Tensor) the result after X * WeightX (size is T x 3D)"
"(
phi::Dense
Tensor) the result after X * WeightX (size is T x 3D)"
" or batched_X (size is T x M), this will be automatically chosen,"
" or batched_X (size is T x M), this will be automatically chosen,"
" where T is the total time steps in this mini-batch,"
" where T is the total time steps in this mini-batch,"
" D is the hidden size, M is the dim size of x input."
)
" D is the hidden size, M is the dim size of x input."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
AddOutput
(
"BatchedInput"
,
"(
LoD
Tensor) This is the batched result of input X"
"(
phi::Dense
Tensor) This is the batched result of input X"
"or the batched result after fc, shape (T x 3D)"
)
"or the batched result after fc, shape (T x 3D)"
)
.
AsIntermediate
();
.
AsIntermediate
();
AddOutput
(
"BatchedOut"
,
"(
LoD
Tensor) (T X D) save batched hidden."
)
AddOutput
(
"BatchedOut"
,
"(
phi::Dense
Tensor) (T X D) save batched hidden."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddOutput
(
"Hidden"
,
"(
LoD
Tensor) (T x D) Same as GRUOp"
);
AddOutput
(
"Hidden"
,
"(
phi::Dense
Tensor) (T x D) Same as GRUOp"
);
AddAttr
<
std
::
string
>
(
"activation"
,
AddAttr
<
std
::
string
>
(
"activation"
,
"(string, default tanh) "
"(string, default tanh) "
"The activation type used for output candidate {h}_t."
)
"The activation type used for output candidate {h}_t."
)
...
@@ -257,9 +258,9 @@ class FusionGRUKernel : public framework::OpKernel<T> {
...
@@ -257,9 +258,9 @@ class FusionGRUKernel : public framework::OpKernel<T> {
}
}
#define INIT_BASE_DEFINES \
#define INIT_BASE_DEFINES \
auto* x = ctx.Input<
LoDTensor>("X");
\
auto* x = ctx.Input<
phi::DenseTensor>("X");
\
auto* wh = ctx.Input<phi::DenseTensor>("WeightH"); \
auto* wh = ctx.Input<phi::DenseTensor>("WeightH"); \
auto* xx = ctx.Output<
LoDTensor>("XX");
\
auto* xx = ctx.Output<
phi::DenseTensor>("XX");
\
auto x_lod = x->lod(); \
auto x_lod = x->lod(); \
auto x_dims = x->dims();
/* T x M*/
\
auto x_dims = x->dims();
/* T x M*/
\
auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) \
auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) \
...
@@ -273,7 +274,7 @@ class FusionGRUKernel : public framework::OpKernel<T> {
...
@@ -273,7 +274,7 @@ class FusionGRUKernel : public framework::OpKernel<T> {
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* wx = ctx.Input<phi::DenseTensor>("WeightX"); \
auto* wx = ctx.Input<phi::DenseTensor>("WeightX"); \
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); \
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); \
auto* hidden_out = ctx.Output<
LoDTensor>("Hidden");
\
auto* hidden_out = ctx.Output<
phi::DenseTensor>("Hidden");
\
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
const int M = x_mat_dims[1]; \
const int M = x_mat_dims[1]; \
const int D = wh_dims[0]; \
const int D = wh_dims[0]; \
...
@@ -398,8 +399,8 @@ class FusionGRUKernel : public framework::OpKernel<T> {
...
@@ -398,8 +399,8 @@ class FusionGRUKernel : public framework::OpKernel<T> {
}
}
INIT_OTHER_DEFINES
;
INIT_OTHER_DEFINES
;
auto
*
reordered_h0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedH0"
);
auto
*
reordered_h0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedH0"
);
auto
*
batched_input
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedInput"
);
auto
*
batched_input
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedInput"
);
auto
*
batched_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedOut"
);
auto
*
batched_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedOut"
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_out_data
=
batched_out
->
mutable_data
<
T
>
(
place
);
T
*
batched_out_data
=
batched_out
->
mutable_data
<
T
>
(
place
);
hidden_out
->
mutable_data
<
T
>
(
place
);
hidden_out
->
mutable_data
<
T
>
(
place
);
...
...
paddle/fluid/operators/fused/fusion_gru_op.h
浏览文件 @
30a31a53
...
@@ -18,7 +18,6 @@ limitations under the License. */
...
@@ -18,7 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionGRUOp
:
public
framework
::
OperatorWithKernel
{
class
FusionGRUOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_lstm_op.cc
浏览文件 @
30a31a53
...
@@ -177,11 +177,12 @@ framework::OpKernelType FusionLSTMOp::GetExpectedKernelType(
...
@@ -177,11 +177,12 @@ framework::OpKernelType FusionLSTMOp::GetExpectedKernelType(
}
}
void
FusionLSTMOpMaker
::
Make
()
{
void
FusionLSTMOpMaker
::
Make
()
{
AddInput
(
"X"
,
AddInput
(
"(LoDTensor) the input is a LodTensor, which support "
"X"
,
"variable-time length input sequence. The underlying tensor in "
"(phi::DenseTensor) the input is a LodTensor, which support "
"this LoDTensor is a matrix with shape (T X M), where T is the "
"variable-time length input sequence. The underlying tensor in "
"total time steps in this mini-batch, M is the dim size of x."
);
"this phi::DenseTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"WeightX"
,
AddInput
(
"WeightX"
,
"(Tensor) the learnable weights of X."
"(Tensor) the learnable weights of X."
" - The shape is (M x 4D), where M is the dim size of x, D is the "
" - The shape is (M x 4D), where M is the dim size of x, D is the "
...
@@ -214,23 +215,25 @@ void FusionLSTMOpMaker::Make() {
...
@@ -214,23 +215,25 @@ void FusionLSTMOpMaker::Make() {
"input. This is a tensor with shape (N x D), where N is the "
"input. This is a tensor with shape (N x D), where N is the "
"batch size. `H0` and `C0` can be NULL but only at the same time."
)
"batch size. `H0` and `C0` can be NULL but only at the same time."
)
.
AsDispensable
();
.
AsDispensable
();
AddOutput
(
"Hidden"
,
AddOutput
(
"(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. "
"Hidden"
,
"The shape is (T x D), and lod is the same with the `Input`."
);
"(phi::DenseTensor) (same as LSTMOp) the hidden state of LSTM operator. "
AddOutput
(
"Cell"
,
"The shape is (T x D), and lod is the same with the `Input`."
);
"(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. "
AddOutput
(
"The shape is (T x D), and lod is the same with the `Input`."
);
"Cell"
,
"(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"XX"
,
AddOutput
(
"XX"
,
"(
LoD
Tensor) the result after X * WeightX (size is T x 4D)"
"(
phi::Dense
Tensor) the result after X * WeightX (size is T x 4D)"
" or batched_X (size is T x M), this will be automatically chosen,"
" or batched_X (size is T x M), this will be automatically chosen,"
" where T is the total time steps in this mini-batch,"
" where T is the total time steps in this mini-batch,"
" D is the hidden size, M is the dim size of x input."
)
" D is the hidden size, M is the dim size of x input."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
"(
LoD
Tensor) (T x 4D)."
).
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
"(
phi::Dense
Tensor) (T x 4D)."
).
AsIntermediate
();
AddOutput
(
"BatchedHidden"
,
"(
LoD
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedHidden"
,
"(
phi::Dense
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedCell"
,
"(
LoD
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedCell"
,
"(
phi::Dense
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedH0"
,
"(
LoD
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedH0"
,
"(
phi::Dense
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedC0"
,
"(
LoD
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedC0"
,
"(
phi::Dense
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"CheckedCell"
,
"(Tensor) (2 x D) only for peephole."
)
AddOutput
(
"CheckedCell"
,
"(Tensor) (2 x D) only for peephole."
)
.
AsIntermediate
();
.
AsIntermediate
();
AddAttr
<
bool
>
(
"use_peepholes"
,
AddAttr
<
bool
>
(
"use_peepholes"
,
...
@@ -295,23 +298,23 @@ This operator fuse the X into LSTM, more details can refer to LSTM op.
...
@@ -295,23 +298,23 @@ This operator fuse the X into LSTM, more details can refer to LSTM op.
template
<
typename
T
>
template
<
typename
T
>
class
FuisonLSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
class
FuisonLSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
#define INIT_BASE_DEFINES \
#define INIT_BASE_DEFINES
\
using DeviceContext = phi::CPUContext; \
using DeviceContext = phi::CPUContext;
\
auto* x = ctx.Input<
LoDTensor>("X");
\
auto* x = ctx.Input<
phi::DenseTensor>("X");
\
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* h0 = ctx.Input<phi::DenseTensor>("H0");
\
auto* c0 = ctx.Input<phi::DenseTensor>("C0"); \
auto* c0 = ctx.Input<phi::DenseTensor>("C0");
\
auto* wx = ctx.Input<phi::DenseTensor>("WeightX"); \
auto* wx = ctx.Input<phi::DenseTensor>("WeightX");
\
auto* wh = ctx.Input<phi::DenseTensor>("WeightH"); \
auto* wh = ctx.Input<phi::DenseTensor>("WeightH");
\
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); \
auto* bias = ctx.Input<phi::DenseTensor>("Bias");
\
auto* xx = ctx.Output<
LoDTensor>("XX");
\
auto* xx = ctx.Output<
phi::DenseTensor>("XX");
\
auto* hidden_out = ctx.Output<
LoDTensor>("Hidden");
\
auto* hidden_out = ctx.Output<
phi::DenseTensor>("Hidden");
\
auto* cell_out = ctx.Output<
LoDTensor>("Cell");
\
auto* cell_out = ctx.Output<
phi::DenseTensor>("Cell");
\
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
bool is_reverse = ctx.Attr<bool>("is_reverse");
\
bool use_peepholes = ctx.Attr<bool>("use_peepholes"); \
bool use_peepholes = ctx.Attr<bool>("use_peepholes");
\
auto x_dims = x->dims();
/* T x M*/
\
auto x_dims = x->dims();
/* T x M*/
\
auto wh_dims = wh->dims();
/* D x 4D*/
\
auto wh_dims = wh->dims();
/* D x 4D*/
\
const int M = x_dims[1]; \
const int M = x_dims[1];
\
const int D = wh_dims[0]; \
const int D = wh_dims[0];
\
const int D4 = wh_dims[1]
const int D4 = wh_dims[1]
#define INIT_OTHER_DEFINES \
#define INIT_OTHER_DEFINES \
...
@@ -439,9 +442,9 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
...
@@ -439,9 +442,9 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
auto
*
reordered_h0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedH0"
);
auto
*
reordered_h0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedH0"
);
auto
*
reordered_c0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedC0"
);
auto
*
reordered_c0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedC0"
);
auto
*
batched_input
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedInput"
);
auto
*
batched_input
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedInput"
);
auto
*
batched_c_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedCell"
);
auto
*
batched_c_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedCell"
);
auto
*
batched_h_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedHidden"
);
auto
*
batched_h_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedHidden"
);
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_c_out_data
=
batched_c_out
->
mutable_data
<
T
>
(
place
);
T
*
batched_c_out_data
=
batched_c_out
->
mutable_data
<
T
>
(
place
);
...
...
paddle/fluid/operators/fused/fusion_lstm_op.h
浏览文件 @
30a31a53
...
@@ -18,7 +18,6 @@ limitations under the License. */
...
@@ -18,7 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionLSTMOp
:
public
framework
::
OperatorWithKernel
{
class
FusionLSTMOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
浏览文件 @
30a31a53
...
@@ -106,14 +106,14 @@ framework::OpKernelType FusionRepeatedFCReluOp::GetExpectedKernelType(
...
@@ -106,14 +106,14 @@ framework::OpKernelType FusionRepeatedFCReluOp::GetExpectedKernelType(
}
}
void
FusionRepeatedFCReluOpMaker
::
Make
()
{
void
FusionRepeatedFCReluOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(
LoD
Tensor) Input tensors of this operator."
);
AddInput
(
"X"
,
"(
phi::Dense
Tensor) Input tensors of this operator."
);
AddInput
(
"W"
,
"(Tensor) The weight tensors of this operator."
).
AsDuplicable
();
AddInput
(
"W"
,
"(Tensor) The weight tensors of this operator."
).
AsDuplicable
();
AddInput
(
"Bias"
,
"(Tensor) The bias tensors of this operator."
)
AddInput
(
"Bias"
,
"(Tensor) The bias tensors of this operator."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
"ReluOut"
,
"(Tensor) The output tensor of each relu operator."
)
AddOutput
(
"ReluOut"
,
"(Tensor) The output tensor of each relu operator."
)
.
AsDuplicable
()
.
AsDuplicable
()
.
AsIntermediate
();
.
AsIntermediate
();
AddOutput
(
"Out"
,
"(
LoD
Tensor) Output tensor of this operator."
);
AddOutput
(
"Out"
,
"(
phi::Dense
Tensor) Output tensor of this operator."
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Fusion Repeated FC with Relu Operator.
Fusion Repeated FC with Relu Operator.
)DOC"
);
)DOC"
);
...
...
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
浏览文件 @
30a31a53
...
@@ -18,7 +18,6 @@
...
@@ -18,7 +18,6 @@
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionRepeatedFCReluOp
:
public
framework
::
OperatorWithKernel
{
class
FusionRepeatedFCReluOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
浏览文件 @
30a31a53
...
@@ -95,11 +95,12 @@ framework::OpKernelType FusionSeqConvEltAddReluOp::GetExpectedKernelType(
...
@@ -95,11 +95,12 @@ framework::OpKernelType FusionSeqConvEltAddReluOp::GetExpectedKernelType(
}
}
void
FusionSeqConvEltAddReluOpMaker
::
Make
()
{
void
FusionSeqConvEltAddReluOpMaker
::
Make
()
{
AddInput
(
"X"
,
AddInput
(
"(LoDTensor) the input is a LodTensor, which support "
"X"
,
"variable-time length input sequence. The underlying tensor in "
"(phi::DenseTensor) the input is a LodTensor, which support "
"this LoDTensor is a matrix with shape (T X M), where T is the "
"variable-time length input sequence. The underlying tensor in "
"total time steps in this mini-batch, M is the dim size of x."
);
"this phi::DenseTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
// PaddingData only support false yet, should be ensured at pass.
// PaddingData only support false yet, should be ensured at pass.
AddInput
(
"Filter"
,
AddInput
(
"Filter"
,
"(Tensor) same as the input(Filter) of sequence conv op is an "
"(Tensor) same as the input(Filter) of sequence conv op is an "
...
@@ -111,9 +112,9 @@ void FusionSeqConvEltAddReluOpMaker::Make() {
...
@@ -111,9 +112,9 @@ void FusionSeqConvEltAddReluOpMaker::Make() {
"output feature size"
);
"output feature size"
);
AddOutput
(
AddOutput
(
"Out"
,
"Out"
,
"(
LoD
Tensor) the output(Out) is a LodTensor, which support "
"(
phi::Dense
Tensor) the output(Out) is a LodTensor, which support "
"variable-time length output sequence. The underlying tensor in "
"variable-time length output sequence. The underlying tensor in "
"this
LoD
Tensor is a matrix with shape (T, N), where, T is the "
"this
phi::Dense
Tensor is a matrix with shape (T, N), where, T is the "
"total time steps in this mini-batch, N is the output feature size."
);
"total time steps in this mini-batch, N is the output feature size."
);
AddOutput
(
"ColMat"
,
AddOutput
(
"ColMat"
,
"(Tensor) (T, K), where T is where T is the "
"(Tensor) (T, K), where T is where T is the "
...
@@ -150,10 +151,10 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel<T> {
...
@@ -150,10 +151,10 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel<T> {
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
DeviceContext
=
phi
::
CPUContext
;
using
DeviceContext
=
phi
::
CPUContext
;
auto
*
x
=
ctx
.
Input
<
LoD
Tensor
>
(
"X"
);
auto
*
x
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Filter"
);
auto
*
w
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Filter"
);
auto
*
b
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Bias"
);
auto
*
b
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Bias"
);
auto
*
y
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
y
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
auto
*
col
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ColMat"
);
auto
*
col
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ColMat"
);
auto
x_lod
=
x
->
lod
();
auto
x_lod
=
x
->
lod
();
...
...
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
浏览文件 @
30a31a53
...
@@ -18,7 +18,6 @@ limitations under the License. */
...
@@ -18,7 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionSeqConvEltAddReluOp
:
public
framework
::
OperatorWithKernel
{
class
FusionSeqConvEltAddReluOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
浏览文件 @
30a31a53
...
@@ -110,12 +110,13 @@ framework::OpKernelType FusionSeqExpandConcatFCOp::GetExpectedKernelType(
...
@@ -110,12 +110,13 @@ framework::OpKernelType FusionSeqExpandConcatFCOp::GetExpectedKernelType(
void
FusionSeqExpandConcatFCOpMaker
::
Make
()
{
void
FusionSeqExpandConcatFCOpMaker
::
Make
()
{
AddInput
(
"X"
,
AddInput
(
"X"
,
"(LoDTensor) input LodDTensors, the first one must be have ref lod "
"(phi::DenseTensor) input LodDTensors, the first one must be have "
"ref lod "
"for sequence expand, and the rest input should have same lod."
)
"for sequence expand, and the rest input should have same lod."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
"FCWeight"
,
"(Tensor) the weights of fc."
);
AddInput
(
"FCWeight"
,
"(Tensor) the weights of fc."
);
AddInput
(
"FCBias"
,
"(Tensor, optional) the bias of fc."
).
AsDispensable
();
AddInput
(
"FCBias"
,
"(Tensor, optional) the bias of fc."
).
AsDispensable
();
AddOutput
(
"Out"
,
"(
LoD
Tensor) Output LodTensor."
);
AddOutput
(
"Out"
,
"(
phi::Dense
Tensor) Output LodTensor."
);
AddOutput
(
AddOutput
(
"FCOut"
,
"FCOut"
,
"(Tensor) the intermediate tensor to keep the result of fc."
"(Tensor) the intermediate tensor to keep the result of fc."
...
@@ -150,10 +151,10 @@ class FusionSeqExpandConcatFCOpKernel : public framework::OpKernel<T> {
...
@@ -150,10 +151,10 @@ class FusionSeqExpandConcatFCOpKernel : public framework::OpKernel<T> {
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
DeviceContext
=
phi
::
CPUContext
;
using
DeviceContext
=
phi
::
CPUContext
;
auto
ins
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"X"
);
auto
ins
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"FCWeight"
);
auto
*
w
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"FCWeight"
);
auto
*
b
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"FCBias"
);
auto
*
b
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"FCBias"
);
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
auto
*
fc_out
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"FCOut"
);
auto
*
fc_out
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"FCOut"
);
auto
*
ref_in
=
ins
[
0
];
auto
*
ref_in
=
ins
[
0
];
...
...
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
浏览文件 @
30a31a53
...
@@ -18,7 +18,6 @@ limitations under the License. */
...
@@ -18,7 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionSeqExpandConcatFCOp
:
public
framework
::
OperatorWithKernel
{
class
FusionSeqExpandConcatFCOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
浏览文件 @
30a31a53
...
@@ -75,8 +75,9 @@ framework::OpKernelType FusionSeqPoolConcatOp::GetExpectedKernelType(
...
@@ -75,8 +75,9 @@ framework::OpKernelType FusionSeqPoolConcatOp::GetExpectedKernelType(
}
}
void
FusionSeqPoolConcatOpMaker
::
Make
()
{
void
FusionSeqPoolConcatOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) Input tensors of this operator."
).
AsDuplicable
();
AddInput
(
"X"
,
"(phi::DenseTensor) Input tensors of this operator."
)
AddOutput
(
"Out"
,
"(LoDTensor) Output tensor of concat operator."
);
.
AsDuplicable
();
AddOutput
(
"Out"
,
"(phi::DenseTensor) Output tensor of concat operator."
);
AddAttr
<
std
::
string
>
(
"pooltype"
,
AddAttr
<
std
::
string
>
(
"pooltype"
,
"(string, default 'SUM') some of the pooling "
"(string, default 'SUM') some of the pooling "
"pooltype of SequencePoolOp."
)
"pooltype of SequencePoolOp."
)
...
@@ -95,8 +96,8 @@ template <typename T>
...
@@ -95,8 +96,8 @@ template <typename T>
class
FusionSeqPoolConcatKernel
:
public
framework
::
OpKernel
<
T
>
{
class
FusionSeqPoolConcatKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
ins
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"X"
);
auto
ins
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
std
::
string
pooltype
=
ctx
.
Attr
<
std
::
string
>
(
"pooltype"
);
std
::
string
pooltype
=
ctx
.
Attr
<
std
::
string
>
(
"pooltype"
);
auto
x0_lod
=
ins
[
0
]
->
lod
();
auto
x0_lod
=
ins
[
0
]
->
lod
();
const
auto
&
x0_dims
=
ins
[
0
]
->
dims
();
const
auto
&
x0_dims
=
ins
[
0
]
->
dims
();
...
...
paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
浏览文件 @
30a31a53
...
@@ -18,7 +18,6 @@
...
@@ -18,7 +18,6 @@
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionSeqPoolConcatOp
:
public
framework
::
OperatorWithKernel
{
class
FusionSeqPoolConcatOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
浏览文件 @
30a31a53
...
@@ -74,11 +74,12 @@ framework::OpKernelType FusionSeqPoolCVMConcatOp::GetExpectedKernelType(
...
@@ -74,11 +74,12 @@ framework::OpKernelType FusionSeqPoolCVMConcatOp::GetExpectedKernelType(
}
}
void
FusionSeqPoolCVMConcatOpMaker
::
Make
()
{
void
FusionSeqPoolCVMConcatOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) Input tensors of this operator."
).
AsDuplicable
();
AddInput
(
"X"
,
"(phi::DenseTensor) Input tensors of this operator."
)
.
AsDuplicable
();
AddInput
(
"CVM"
,
AddInput
(
"CVM"
,
"(Tensor), a 2-D Tensor with shape [N x 2], where N is the batch "
"(Tensor), a 2-D Tensor with shape [N x 2], where N is the batch "
"size, 2 is show and click."
);
"size, 2 is show and click."
);
AddOutput
(
"Out"
,
"(
LoD
Tensor) Output tensor of concat operator."
);
AddOutput
(
"Out"
,
"(
phi::Dense
Tensor) Output tensor of concat operator."
);
AddAttr
<
std
::
string
>
(
"pooltype"
,
AddAttr
<
std
::
string
>
(
"pooltype"
,
"(string, default 'SUM') some of the pooling "
"(string, default 'SUM') some of the pooling "
"pooltype of SequencePoolOp."
)
"pooltype of SequencePoolOp."
)
...
@@ -98,8 +99,8 @@ template <typename T>
...
@@ -98,8 +99,8 @@ template <typename T>
class
FusionSeqPoolCVMConcatKernel
:
public
framework
::
OpKernel
<
T
>
{
class
FusionSeqPoolCVMConcatKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
ins
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"X"
);
auto
ins
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
std
::
string
pooltype
=
ctx
.
Attr
<
std
::
string
>
(
"pooltype"
);
std
::
string
pooltype
=
ctx
.
Attr
<
std
::
string
>
(
"pooltype"
);
auto
x0_lod
=
ins
[
0
]
->
lod
();
auto
x0_lod
=
ins
[
0
]
->
lod
();
const
auto
&
x0_dims
=
ins
[
0
]
->
dims
();
const
auto
&
x0_dims
=
ins
[
0
]
->
dims
();
...
...
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
浏览文件 @
30a31a53
...
@@ -18,7 +18,6 @@
...
@@ -18,7 +18,6 @@
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionSeqPoolCVMConcatOp
:
public
framework
::
OperatorWithKernel
{
class
FusionSeqPoolCVMConcatOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
浏览文件 @
30a31a53
...
@@ -18,7 +18,6 @@
...
@@ -18,7 +18,6 @@
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
// ( (A.^2 * B.^2) - (A * B).^2 ) .* scalar
// ( (A.^2 * B.^2) - (A * B).^2 ) .* scalar
...
...
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
浏览文件 @
30a31a53
...
@@ -59,11 +59,11 @@ class MultiGRUHandler {
...
@@ -59,11 +59,11 @@ class MultiGRUHandler {
origin_mode_
(
ctx
.
Attr
<
bool
>
(
"origin_mode"
)),
origin_mode_
(
ctx
.
Attr
<
bool
>
(
"origin_mode"
)),
layers_
(
ctx
.
Attr
<
int
>
(
"layers"
)),
layers_
(
ctx
.
Attr
<
int
>
(
"layers"
)),
concat_pds_
(
layers_
,
std
::
shared_ptr
<
dnnl
::
concat
::
primitive_desc
>
()),
concat_pds_
(
layers_
,
std
::
shared_ptr
<
dnnl
::
concat
::
primitive_desc
>
()),
x_
(
ctx
.
Input
<
LoD
Tensor
>
(
"X"
)),
x_
(
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"X"
)),
weights_x_
(
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"WeightX"
)),
weights_x_
(
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"WeightX"
)),
weights_h_
(
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"WeightH"
)),
weights_h_
(
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"WeightH"
)),
biases_
(
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Bias"
)),
biases_
(
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Bias"
)),
hidden_
(
ctx
.
Output
<
LoD
Tensor
>
(
"Hidden"
)),
hidden_
(
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Hidden"
)),
x_lod_
(
x_
->
lod
()[
0
])
{
x_lod_
(
x_
->
lod
()[
0
])
{
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
weights_x_
.
size
(),
weights_x_
.
size
(),
...
@@ -127,7 +127,8 @@ class MultiGRUHandler {
...
@@ -127,7 +127,8 @@ class MultiGRUHandler {
if
(
is_int8
)
{
if
(
is_int8
)
{
// Add int8 attributes
// Add int8 attributes
const
auto
scale_weights
=
ctx
.
MultiInput
<
LoDTensor
>
(
"Scale_weights"
);
const
auto
scale_weights
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Scale_weights"
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
scale_weights
.
size
(),
scale_weights
.
size
(),
layers_
*
2
,
layers_
*
2
,
...
@@ -669,11 +670,11 @@ class MultiGRUHandler {
...
@@ -669,11 +670,11 @@ class MultiGRUHandler {
// on Ti size, thus we need another key to cache them
// on Ti size, thus we need another key to cache them
std
::
string
memory_key_
;
std
::
string
memory_key_
;
const
LoD
Tensor
*
x_
;
const
phi
::
Dense
Tensor
*
x_
;
const
std
::
vector
<
const
phi
::
DenseTensor
*>
weights_x_
;
const
std
::
vector
<
const
phi
::
DenseTensor
*>
weights_x_
;
const
std
::
vector
<
const
phi
::
DenseTensor
*>
weights_h_
;
const
std
::
vector
<
const
phi
::
DenseTensor
*>
weights_h_
;
const
std
::
vector
<
const
phi
::
DenseTensor
*>
biases_
;
const
std
::
vector
<
const
phi
::
DenseTensor
*>
biases_
;
LoD
Tensor
*
hidden_
;
phi
::
Dense
Tensor
*
hidden_
;
std
::
vector
<
dnnl
::
primitive_attr
>
attrs_
;
std
::
vector
<
dnnl
::
primitive_attr
>
attrs_
;
const
paddle
::
framework
::
Vector
<
size_t
>&
x_lod_
;
const
paddle
::
framework
::
Vector
<
size_t
>&
x_lod_
;
};
};
...
...
paddle/fluid/operators/fused/multi_gru_op.cc
浏览文件 @
30a31a53
...
@@ -148,11 +148,12 @@ framework::OpKernelType MultiGRUOp::GetExpectedKernelType(
...
@@ -148,11 +148,12 @@ framework::OpKernelType MultiGRUOp::GetExpectedKernelType(
}
}
void
MultiGRUOpMaker
::
Make
()
{
void
MultiGRUOpMaker
::
Make
()
{
AddInput
(
"X"
,
AddInput
(
"(LoDTensor) the input is an LodTensor, which support "
"X"
,
"variable-time length input sequence. The underlying tensor in "
"(phi::DenseTensor) the input is an LodTensor, which support "
"this LoDTensor is a matrix with shape (T X M), where T is the "
"variable-time length input sequence. The underlying tensor in "
"total time steps in this mini-batch, M is the dim size of x."
);
"this phi::DenseTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"WeightX"
,
AddInput
(
"WeightX"
,
"(MultiTensor) The FC weight with shape (M x 3D),"
"(MultiTensor) The FC weight with shape (M x 3D),"
"where M is the dim size of x, D is the hidden size. "
)
"where M is the dim size of x, D is the hidden size. "
)
...
@@ -176,7 +177,7 @@ void MultiGRUOpMaker::Make() {
...
@@ -176,7 +177,7 @@ void MultiGRUOpMaker::Make() {
"Only used with MKL-DNN INT8."
)
"Only used with MKL-DNN INT8."
)
.
AsDuplicable
()
.
AsDuplicable
()
.
AsDispensable
();
.
AsDispensable
();
AddOutput
(
"Hidden"
,
"(
LoD
Tensor) (T x D) Same as GRUOp"
);
AddOutput
(
"Hidden"
,
"(
phi::Dense
Tensor) (T x D) Same as GRUOp"
);
AddAttr
<
std
::
string
>
(
"activation"
,
AddAttr
<
std
::
string
>
(
"activation"
,
"(string, default tanh) "
"(string, default tanh) "
"The activation type used for output candidate {h}_t."
)
"The activation type used for output candidate {h}_t."
)
...
...
paddle/fluid/operators/fused/multi_gru_op.h
浏览文件 @
30a31a53
...
@@ -20,7 +20,6 @@ namespace paddle {
...
@@ -20,7 +20,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
framework
::
ExecutionContext
;
using
framework
::
ExecutionContext
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
MultiGRUOp
:
public
framework
::
OperatorWithKernel
{
class
MultiGRUOp
:
public
framework
::
OperatorWithKernel
{
public:
public:
...
...
paddle/fluid/operators/math/context_project.h
浏览文件 @
30a31a53
...
@@ -27,7 +27,6 @@ namespace operators {
...
@@ -27,7 +27,6 @@ namespace operators {
namespace
math
{
namespace
math
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
/*
/*
* \brief Context projection concatenates features in adjacent time-steps in
* \brief Context projection concatenates features in adjacent time-steps in
...
@@ -51,9 +50,8 @@ using LoDTensor = phi::DenseTensor;
...
@@ -51,9 +50,8 @@ using LoDTensor = phi::DenseTensor;
* For a mini-batch of 2 variable lengths sentences, containing 3, and 1
* For a mini-batch of 2 variable lengths sentences, containing 3, and 1
* time-steps:
* time-steps:
*
*
* Assumed input (X) is a [4, M, N] float LoDTensor, and X->lod()[0] = [0, 3,
* Assumed input (X) is a [4, M, N] float phi::DenseTensor, and X->lod()[0] =
* 4].
* [0, 3, 4]. Besides, for the sake of simplicity, we assume M=1 and N=2.
* Besides, for the sake of simplicity, we assume M=1 and N=2.
*
*
* X = [[a1, a2;
* X = [[a1, a2;
* b1, b2;
* b1, b2;
...
@@ -89,7 +87,7 @@ template <typename DeviceContext, typename T>
...
@@ -89,7 +87,7 @@ template <typename DeviceContext, typename T>
class
ContextProjectFunctor
{
class
ContextProjectFunctor
{
public:
public:
void
operator
()(
const
DeviceContext
&
context
,
void
operator
()(
const
DeviceContext
&
context
,
const
LoD
Tensor
&
in
,
const
phi
::
Dense
Tensor
&
in
,
const
phi
::
DenseTensor
*
padding_data
,
const
phi
::
DenseTensor
*
padding_data
,
bool
padding_trainable
,
bool
padding_trainable
,
const
int
context_start
,
const
int
context_start
,
...
@@ -218,7 +216,7 @@ template <typename DeviceContext, typename T>
...
@@ -218,7 +216,7 @@ template <typename DeviceContext, typename T>
class
ContextProjectGradFunctor
{
class
ContextProjectGradFunctor
{
public:
public:
void
operator
()(
const
DeviceContext
&
context
,
void
operator
()(
const
DeviceContext
&
context
,
const
LoD
Tensor
&
in
,
const
phi
::
Dense
Tensor
&
in
,
bool
padding_trainable
,
bool
padding_trainable
,
const
int
context_start
,
const
int
context_start
,
const
int
context_length
,
const
int
context_length
,
...
...
paddle/fluid/operators/math/sequence_padding.h
浏览文件 @
30a31a53
...
@@ -82,8 +82,8 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
...
@@ -82,8 +82,8 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
}
}
/*
/*
* \brief Padding/Unpadding
LoDTensor to/from normal Tensor of the shap
e
* \brief Padding/Unpadding
phi::DenseTensor to/from normal Tensor of th
e
*
[max_sequence_length, num_sequences, sequence_width].
*
shape
[max_sequence_length, num_sequences, sequence_width].
*
*
* Padding sequence:
* Padding sequence:
* padding[i] = seq[lod[level][i]]
* padding[i] = seq[lod[level][i]]
...
@@ -97,13 +97,11 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
...
@@ -97,13 +97,11 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
* padding (s0, s1, s2, s3; s0, s1, s2, 0; s0, 0, s2, 0; s0, 0, 0, 0)
* padding (s0, s1, s2, s3; s0, s1, s2, 0; s0, 0, s2, 0; s0, 0, 0, 0)
*
*
* \param context device context of this functor.
* \param context device context of this functor.
* \param seq LoDTensor which is stored in sequence format, the shape
* \param seq phi::DenseTensor which is stored in sequence format, the
* is [total_sequence_length, sequence_width] where
* shape is [total_sequence_length, sequence_width] where total_sequence_length
* total_sequence_length is the sum of all sequences'
* is the sum of all sequences' length. \param padding Tensor which is
* length.
* padded to the same length, the shape is [max_sequence_length, num_sequences,
* \param padding Tensor which is padded to the same length, the shape is
* sequence_width]. \param norm_by_times whether dividing sequence's length.
* [max_sequence_length, num_sequences, sequence_width].
* \param norm_by_times whether dividing sequence's length.
*
*
* \note transposition is also done in this functor.
* \note transposition is also done in this functor.
*/
*/
...
...
paddle/fluid/operators/math/sequence_pooling.cc
浏览文件 @
30a31a53
...
@@ -25,7 +25,6 @@ namespace operators {
...
@@ -25,7 +25,6 @@ namespace operators {
namespace
math
{
namespace
math
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
T
,
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
typename
IndexType
=
Eigen
::
DenseIndex
>
...
...
paddle/fluid/operators/math/sequence_scale.h
浏览文件 @
30a31a53
...
@@ -35,7 +35,8 @@ namespace math {
...
@@ -35,7 +35,8 @@ namespace math {
*
*
* \param context Device context of this functor.
* \param context Device context of this functor.
* \param seq LoDTensor which is stored in sequence format, the shape
* \param seq phi::DenseTensor which is stored in sequence format, the
shape
* is [total_sequence_length, sequence_width] where
* is [total_sequence_length, sequence_width] where
* total_sequence_length is the sum of all sequences'
* total_sequence_length is the sum of all sequences'
* length.
* length.
...
...
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
浏览文件 @
30a31a53
...
@@ -28,7 +28,6 @@ using dnnl::prop_kind;
...
@@ -28,7 +28,6 @@ using dnnl::prop_kind;
using
dnnl
::
stream
;
using
dnnl
::
stream
;
using
framework
::
DDim
;
using
framework
::
DDim
;
using
framework
::
ExecutionContext
;
using
framework
::
ExecutionContext
;
using
LoDTensor
=
phi
::
DenseTensor
;
using
phi
::
funcs
::
OneDNNGetDataType
;
using
phi
::
funcs
::
OneDNNGetDataType
;
using
phi
::
funcs
::
to_void_cast
;
using
phi
::
funcs
::
to_void_cast
;
using
platform
::
MKLDNNDeviceContext
;
using
platform
::
MKLDNNDeviceContext
;
...
@@ -382,7 +381,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
...
@@ -382,7 +381,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
void
PrepareSrcMem
(
const
std
::
shared_ptr
<
inner_product_forward
>&
fc_p
,
void
PrepareSrcMem
(
const
std
::
shared_ptr
<
inner_product_forward
>&
fc_p
,
const
std
::
shared_ptr
<
dnnl
::
memory
>&
src_mem
,
const
std
::
shared_ptr
<
dnnl
::
memory
>&
src_mem
,
const
LoD
Tensor
*
x
,
const
phi
::
Dense
Tensor
*
x
,
const
dnnl
::
engine
&
engine
)
const
{
const
dnnl
::
engine
&
engine
)
const
{
auto
x_md
=
x
->
mem_desc
().
reshape
(
src_mem
->
get_desc
().
dims
());
auto
x_md
=
x
->
mem_desc
().
reshape
(
src_mem
->
get_desc
().
dims
());
if
(
x_md
!=
src_mem
->
get_desc
())
{
if
(
x_md
!=
src_mem
->
get_desc
())
{
...
@@ -403,10 +402,10 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
...
@@ -403,10 +402,10 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
auto
*
x
=
ctx
.
Input
<
LoD
Tensor
>
(
"Input"
);
const
auto
*
x
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Input"
);
const
auto
*
weights
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"W"
);
const
auto
*
weights
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"W"
);
const
auto
*
bias
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Bias"
);
const
auto
*
bias
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Bias"
);
auto
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
const
auto
&
scale_weights
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"Scale_weights"
);
const
auto
&
scale_weights
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"Scale_weights"
);
...
@@ -513,9 +512,9 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
...
@@ -513,9 +512,9 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
}
}
void
RecomputeOutputDims
(
const
ExecutionContext
&
ctx
,
void
RecomputeOutputDims
(
const
ExecutionContext
&
ctx
,
const
LoD
Tensor
*
x
,
const
phi
::
Dense
Tensor
*
x
,
const
phi
::
DenseTensor
*
weights
,
const
phi
::
DenseTensor
*
weights
,
LoD
Tensor
*
out
)
const
{
phi
::
Dense
Tensor
*
out
)
const
{
int
in_num_col_dims
=
ctx
.
Attr
<
int
>
(
"in_num_col_dims"
);
int
in_num_col_dims
=
ctx
.
Attr
<
int
>
(
"in_num_col_dims"
);
bool
padding_weights
=
ctx
.
Attr
<
bool
>
(
"padding_weights"
);
bool
padding_weights
=
ctx
.
Attr
<
bool
>
(
"padding_weights"
);
PADDLE_ENFORCE_EQ
(
padding_weights
,
PADDLE_ENFORCE_EQ
(
padding_weights
,
...
...
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
浏览文件 @
30a31a53
...
@@ -26,7 +26,6 @@ namespace operators {
...
@@ -26,7 +26,6 @@ namespace operators {
using
framework
::
DDim
;
using
framework
::
DDim
;
using
framework
::
ExecutionContext
;
using
framework
::
ExecutionContext
;
using
LoDTensor
=
phi
::
DenseTensor
;
using
platform
::
MatMulV2MKLDNNHandler
;
using
platform
::
MatMulV2MKLDNNHandler
;
using
platform
::
MKLDNNDeviceContext
;
using
platform
::
MKLDNNDeviceContext
;
...
...
paddle/fluid/operators/nccl/nccl_op.cu.cc
浏览文件 @
30a31a53
...
@@ -19,8 +19,6 @@ limitations under the License. */
...
@@ -19,8 +19,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
platform
::
Communicator
;
using
platform
::
Communicator
;
template
<
typename
Type
>
template
<
typename
Type
>
...
@@ -62,8 +60,8 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
...
@@ -62,8 +60,8 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
true
,
true
,
platform
::
errors
::
PreconditionNotMet
(
platform
::
errors
::
PreconditionNotMet
(
"This kernel only runs on GPU device."
));
"This kernel only runs on GPU device."
));
auto
*
x
=
ctx
.
Input
<
LoD
Tensor
>
(
"X"
);
auto
*
x
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
auto
*
comm
=
ctx
.
Input
<
Communicator
>
(
"Communicator"
);
auto
*
comm
=
ctx
.
Input
<
Communicator
>
(
"Communicator"
);
std
::
string
reduction
=
ctx
.
Attr
<
std
::
string
>
(
"reduction"
);
std
::
string
reduction
=
ctx
.
Attr
<
std
::
string
>
(
"reduction"
);
...
@@ -97,8 +95,8 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
...
@@ -97,8 +95,8 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"This kernel only runs on GPU device."
));
"This kernel only runs on GPU device."
));
auto
x
=
ctx
.
Input
<
LoD
Tensor
>
(
"X"
);
// x0, x1, x2
auto
x
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"X"
);
// x0, x1, x2
auto
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
auto
*
comm
=
ctx
.
Input
<
Communicator
>
(
"Communicator"
);
auto
*
comm
=
ctx
.
Input
<
Communicator
>
(
"Communicator"
);
int
root
=
ctx
.
Attr
<
int
>
(
"root"
);
int
root
=
ctx
.
Attr
<
int
>
(
"root"
);
std
::
string
reduction
=
ctx
.
Attr
<
std
::
string
>
(
"reduction"
);
std
::
string
reduction
=
ctx
.
Attr
<
std
::
string
>
(
"reduction"
);
...
@@ -144,7 +142,7 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
...
@@ -144,7 +142,7 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
int
gpu_id
=
ctx
.
GetPlace
().
GetDeviceId
();
int
gpu_id
=
ctx
.
GetPlace
().
GetDeviceId
();
int
idx
=
comm
->
GetCommId
(
gpu_id
);
int
idx
=
comm
->
GetCommId
(
gpu_id
);
if
(
idx
==
root
)
{
if
(
idx
==
root
)
{
auto
*
x
=
ctx
.
Input
<
LoD
Tensor
>
(
"X"
);
auto
*
x
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"X"
);
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke Bcast. send "
<<
x
->
numel
();
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke Bcast. send "
<<
x
->
numel
();
PADDLE_ENFORCE_GPU_SUCCESS
(
platform
::
dynload
::
ncclBcast
(
PADDLE_ENFORCE_GPU_SUCCESS
(
platform
::
dynload
::
ncclBcast
(
reinterpret_cast
<
void
*>
(
const_cast
<
T
*>
(
x
->
data
<
T
>
())),
reinterpret_cast
<
void
*>
(
const_cast
<
T
*>
(
x
->
data
<
T
>
())),
...
@@ -155,7 +153,7 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
...
@@ -155,7 +153,7 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
ctx
.
cuda_device_context
().
stream
()));
ctx
.
cuda_device_context
().
stream
()));
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" finished Bcast."
;
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" finished Bcast."
;
}
else
{
}
else
{
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke Bcast. recv buffer "
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke Bcast. recv buffer "
<<
phi
::
product
(
out
->
dims
());
<<
phi
::
product
(
out
->
dims
());
PADDLE_ENFORCE_GPU_SUCCESS
(
PADDLE_ENFORCE_GPU_SUCCESS
(
...
...
paddle/fluid/operators/optimizers/adam_op_mlu.cc
浏览文件 @
30a31a53
...
@@ -20,7 +20,6 @@ namespace paddle {
...
@@ -20,7 +20,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
T
>
template
<
typename
T
>
class
AdamMLUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
AdamMLUKernel
:
public
framework
::
OpKernel
<
T
>
{
...
@@ -30,32 +29,32 @@ class AdamMLUKernel : public framework::OpKernel<T> {
...
@@ -30,32 +29,32 @@ class AdamMLUKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
param
=
ctx
.
Input
<
LoD
Tensor
>
(
"Param"
);
auto
*
param
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Param"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Grad(%s)'s type should be
LoD
Tensor, "
"The Grad(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Grad"
).
front
(),
ctx
.
InputNames
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
grad
=
ctx
.
Input
<
LoD
Tensor
>
(
"Grad"
);
auto
*
grad
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Grad"
);
auto
*
mom1
=
ctx
.
Input
<
LoD
Tensor
>
(
"Moment1"
);
auto
*
mom1
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Moment1"
);
auto
*
mom2
=
ctx
.
Input
<
LoD
Tensor
>
(
"Moment2"
);
auto
*
mom2
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Moment2"
);
auto
*
lr
=
ctx
.
Input
<
LoD
Tensor
>
(
"LearningRate"
);
auto
*
lr
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LearningRate"
);
auto
*
beta1_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta1Pow"
);
auto
*
beta1_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta1Pow"
);
auto
*
beta2_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta2Pow"
);
auto
*
beta2_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta2Pow"
);
auto
*
param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"ParamOut"
);
auto
*
mom1_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Moment1Out"
);
auto
*
mom1_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Moment1Out"
);
auto
*
mom2_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Moment2Out"
);
auto
*
mom2_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Moment2Out"
);
auto
*
beta1_pow_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Beta1PowOut"
);
auto
*
beta1_pow_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Beta1PowOut"
);
auto
*
beta2_pow_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Beta2PowOut"
);
auto
*
beta2_pow_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Beta2PowOut"
);
bool
skip_update
=
false
;
bool
skip_update
=
false
;
if
(
ctx
.
HasInput
(
"SkipUpdate"
))
{
if
(
ctx
.
HasInput
(
"SkipUpdate"
))
{
...
@@ -110,8 +109,8 @@ class AdamMLUKernel : public framework::OpKernel<T> {
...
@@ -110,8 +109,8 @@ class AdamMLUKernel : public framework::OpKernel<T> {
mom1_out
->
ShareDataWith
(
*
mom1
);
mom1_out
->
ShareDataWith
(
*
mom1
);
mom2_out
->
ShareDataWith
(
*
mom2
);
mom2_out
->
ShareDataWith
(
*
mom2
);
LoD
Tensor
beta1_pow_tmp
;
phi
::
Dense
Tensor
beta1_pow_tmp
;
LoD
Tensor
beta2_pow_tmp
;
phi
::
Dense
Tensor
beta2_pow_tmp
;
if
(
beta1_pow
->
place
()
==
platform
::
CPUPlace
())
{
if
(
beta1_pow
->
place
()
==
platform
::
CPUPlace
())
{
T
beta1
=
*
beta1_pow
->
data
<
T
>
();
T
beta1
=
*
beta1_pow
->
data
<
T
>
();
beta1_pow_tmp
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
beta1_pow_tmp
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
...
@@ -292,13 +291,13 @@ class AdamWMLUKernel : public AdamMLUKernel<T> {
...
@@ -292,13 +291,13 @@ class AdamWMLUKernel : public AdamMLUKernel<T> {
}
}
bool
with_decay
=
ctx
.
Attr
<
bool
>
(
"with_decay"
);
bool
with_decay
=
ctx
.
Attr
<
bool
>
(
"with_decay"
);
const
bool
multi_precision
=
ctx
.
Attr
<
bool
>
(
"multi_precision"
);
const
bool
multi_precision
=
ctx
.
Attr
<
bool
>
(
"multi_precision"
);
auto
*
param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"ParamOut"
);
auto
*
master_param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"MasterParamOut"
);
auto
*
master_param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"MasterParamOut"
);
const
auto
*
master_param
=
ctx
.
Input
<
LoD
Tensor
>
(
"MasterParam"
);
const
auto
*
master_param
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"MasterParam"
);
VLOG
(
3
)
<<
"Skip update: "
<<
skip_update
<<
", With decay: "
<<
with_decay
;
VLOG
(
3
)
<<
"Skip update: "
<<
skip_update
<<
", With decay: "
<<
with_decay
;
if
(
!
skip_update
&&
with_decay
)
{
if
(
!
skip_update
&&
with_decay
)
{
auto
*
param
=
ctx
.
Input
<
LoD
Tensor
>
(
"Param"
);
auto
*
param
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Param"
);
MLUCnnlTensorDesc
param_desc
(
*
param
);
MLUCnnlTensorDesc
param_desc
(
*
param
);
if
(
multi_precision
)
{
if
(
multi_precision
)
{
VLOG
(
3
)
<<
"[adamw] multi_precision, cast masterparam to param."
;
VLOG
(
3
)
<<
"[adamw] multi_precision, cast masterparam to param."
;
...
@@ -328,12 +327,12 @@ class AdamWMLUKernel : public AdamMLUKernel<T> {
...
@@ -328,12 +327,12 @@ class AdamWMLUKernel : public AdamMLUKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
lr
=
ctx
.
Input
<
LoD
Tensor
>
(
"LearningRate"
);
auto
*
lr
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LearningRate"
);
float
coeff
=
ctx
.
Attr
<
float
>
(
"coeff"
);
float
coeff
=
ctx
.
Attr
<
float
>
(
"coeff"
);
// update param with decay coeff: mul(-1 * lr, coeff * param) + param
// update param with decay coeff: mul(-1 * lr, coeff * param) + param
...
@@ -502,8 +501,8 @@ class MergedAdamMLUKernel : public framework::OpKernel<T> {
...
@@ -502,8 +501,8 @@ class MergedAdamMLUKernel : public framework::OpKernel<T> {
mom1_outs
[
i
]
->
ShareDataWith
(
*
mom1s
[
i
]);
mom1_outs
[
i
]
->
ShareDataWith
(
*
mom1s
[
i
]);
mom2_outs
[
i
]
->
ShareDataWith
(
*
mom2s
[
i
]);
mom2_outs
[
i
]
->
ShareDataWith
(
*
mom2s
[
i
]);
LoD
Tensor
beta1_pow_tmp
;
phi
::
Dense
Tensor
beta1_pow_tmp
;
LoD
Tensor
beta2_pow_tmp
;
phi
::
Dense
Tensor
beta2_pow_tmp
;
if
(
beta1_pows
[
i
]
->
place
()
==
platform
::
CPUPlace
())
{
if
(
beta1_pows
[
i
]
->
place
()
==
platform
::
CPUPlace
())
{
T
beta1
=
*
beta1_pows
[
i
]
->
data
<
T
>
();
T
beta1
=
*
beta1_pows
[
i
]
->
data
<
T
>
();
beta1_pow_tmp
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
beta1_pow_tmp
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/optimizers/adam_op_npu.cc
浏览文件 @
30a31a53
...
@@ -23,7 +23,6 @@ namespace paddle {
...
@@ -23,7 +23,6 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
AdamNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
AdamNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
...
@@ -33,32 +32,32 @@ class AdamNPUKernel : public framework::OpKernel<T> {
...
@@ -33,32 +32,32 @@ class AdamNPUKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
param
=
ctx
.
Input
<
LoD
Tensor
>
(
"Param"
);
auto
*
param
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Param"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Grad(%s)'s type should be
LoD
Tensor, "
"The Grad(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Grad"
).
front
(),
ctx
.
InputNames
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
grad
=
ctx
.
Input
<
LoD
Tensor
>
(
"Grad"
);
auto
*
grad
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Grad"
);
auto
*
mom1
=
ctx
.
Input
<
LoD
Tensor
>
(
"Moment1"
);
auto
*
mom1
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Moment1"
);
auto
*
mom2
=
ctx
.
Input
<
LoD
Tensor
>
(
"Moment2"
);
auto
*
mom2
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Moment2"
);
auto
*
lr
=
ctx
.
Input
<
LoD
Tensor
>
(
"LearningRate"
);
auto
*
lr
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LearningRate"
);
auto
*
beta1_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta1Pow"
);
auto
*
beta1_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta1Pow"
);
auto
*
beta2_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta2Pow"
);
auto
*
beta2_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta2Pow"
);
auto
*
param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"ParamOut"
);
auto
*
mom1_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Moment1Out"
);
auto
*
mom1_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Moment1Out"
);
auto
*
mom2_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Moment2Out"
);
auto
*
mom2_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Moment2Out"
);
auto
*
beta1_pow_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Beta1PowOut"
);
auto
*
beta1_pow_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Beta1PowOut"
);
auto
*
beta2_pow_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Beta2PowOut"
);
auto
*
beta2_pow_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Beta2PowOut"
);
bool
skip_update
=
false
;
bool
skip_update
=
false
;
if
(
ctx
.
HasInput
(
"SkipUpdate"
))
{
if
(
ctx
.
HasInput
(
"SkipUpdate"
))
{
...
@@ -114,8 +113,8 @@ class AdamNPUKernel : public framework::OpKernel<T> {
...
@@ -114,8 +113,8 @@ class AdamNPUKernel : public framework::OpKernel<T> {
// NOTE(zhiqiu): beta1_pow and beta2_pow may on CPU and not transform
// NOTE(zhiqiu): beta1_pow and beta2_pow may on CPU and not transform
// place.
// place.
LoD
Tensor
beta1_pow_tmp
;
phi
::
Dense
Tensor
beta1_pow_tmp
;
LoD
Tensor
beta2_pow_tmp
;
phi
::
Dense
Tensor
beta2_pow_tmp
;
if
(
beta1_pow
->
place
()
==
platform
::
CPUPlace
())
{
if
(
beta1_pow
->
place
()
==
platform
::
CPUPlace
())
{
T
beta1
=
*
beta1_pow
->
data
<
T
>
();
T
beta1
=
*
beta1_pow
->
data
<
T
>
();
beta1_pow_tmp
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
beta1_pow_tmp
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
...
@@ -279,7 +278,7 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
...
@@ -279,7 +278,7 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
bool
with_decay
=
ctx
.
Attr
<
bool
>
(
"with_decay"
);
bool
with_decay
=
ctx
.
Attr
<
bool
>
(
"with_decay"
);
if
(
!
skip_update
&&
with_decay
)
{
if
(
!
skip_update
&&
with_decay
)
{
float
coeff
=
ctx
.
Attr
<
float
>
(
"coeff"
);
float
coeff
=
ctx
.
Attr
<
float
>
(
"coeff"
);
auto
*
lr
=
ctx
.
Input
<
LoD
Tensor
>
(
"LearningRate"
);
auto
*
lr
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LearningRate"
);
auto
place
=
ctx
.
GetPlace
();
auto
place
=
ctx
.
GetPlace
();
...
@@ -308,18 +307,18 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
...
@@ -308,18 +307,18 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Master Parma is not supported on npu"
));
"Master Parma is not supported on npu"
));
}
else
{
}
else
{
auto
*
param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"ParamOut"
);
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
param
=
ctx
.
Input
<
LoD
Tensor
>
(
"Param"
);
auto
*
param
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Param"
);
const
auto
&
runner
=
const
auto
&
runner
=
NpuOpRunner
(
"Mul"
,
NpuOpRunner
(
"Mul"
,
...
...
paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
浏览文件 @
30a31a53
...
@@ -32,20 +32,20 @@ class DecayedAdagradOp : public framework::OperatorWithKernel {
...
@@ -32,20 +32,20 @@ class DecayedAdagradOp : public framework::OperatorWithKernel {
"Input"
,
"Input"
,
"LearningRate"
,
"LearningRate"
,
"DecayedAdagradOp"
);
"DecayedAdagradOp"
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"The input var's type should be LoDTensor,
but the received is %s"
,
"
but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"The input var's type should be LoDTensor,
but the received is %s"
,
"
but the received is %s"
,
ctx
->
Inputs
(
"Grad"
).
front
(),
ctx
->
Inputs
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
()));
ctx
->
GetInputsVarType
(
"Grad"
).
front
()));
OP_INOUT_CHECK
(
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output"
,
"ParamOut"
,
"DecayedAdagradOp"
);
ctx
->
HasOutput
(
"ParamOut"
),
"Output"
,
"ParamOut"
,
"DecayedAdagradOp"
);
...
...
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
浏览文件 @
30a31a53
...
@@ -27,7 +27,7 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
...
@@ -27,7 +27,7 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
framework
::
ToTypeName
(
param_var
->
Type
())));
...
@@ -35,7 +35,7 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
...
@@ -35,7 +35,7 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Grad"
).
front
(),
ctx
.
InputNames
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
())));
framework
::
ToTypeName
(
grad_var
->
Type
())));
...
...
paddle/fluid/operators/optimizers/dpsgd_op.cc
浏览文件 @
30a31a53
...
@@ -36,18 +36,18 @@ class DpsgdOp : public framework::OperatorWithKernel {
...
@@ -36,18 +36,18 @@ class DpsgdOp : public framework::OperatorWithKernel {
true
,
true
,
platform
::
errors
::
NotFound
(
platform
::
errors
::
NotFound
(
"Input(LearningRate) of DpsgdOp should not be null."
));
"Input(LearningRate) of DpsgdOp should not be null."
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"The input var's type should be LoDTensor,
but the received is %s"
,
"
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"The input var's type should be LoDTensor,
but the received is %s"
,
"
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Grad"
).
front
()));
ctx
->
GetInputsVarType
(
"Grad"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
true
,
...
...
paddle/fluid/operators/optimizers/dpsgd_op.h
浏览文件 @
30a31a53
...
@@ -32,7 +32,7 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
...
@@ -32,7 +32,7 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
framework
::
ToTypeName
(
param_var
->
Type
())));
...
@@ -41,7 +41,7 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
...
@@ -41,7 +41,7 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Grad"
).
front
(),
ctx
.
InputNames
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
())));
framework
::
ToTypeName
(
grad_var
->
Type
())));
...
...
paddle/fluid/operators/optimizers/lamb_op.cc
浏览文件 @
30a31a53
...
@@ -52,10 +52,10 @@ class LambOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -52,10 +52,10 @@ class LambOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"Param"
,
AddInput
(
"Param"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input parameter that has to be updated."
);
"Input parameter that has to be updated."
);
AddInput
(
"Grad"
,
AddInput
(
"Grad"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input gradient of the parameter."
);
"Input gradient of the parameter."
);
AddInput
(
"LearningRate"
,
"(Tensor) Learning rate."
);
AddInput
(
"LearningRate"
,
"(Tensor) Learning rate."
);
AddInput
(
"Moment1"
,
"(Tensor) Input first moment."
);
AddInput
(
"Moment1"
,
"(Tensor) Input first moment."
);
...
@@ -63,7 +63,7 @@ class LambOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -63,7 +63,7 @@ class LambOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Beta1Pow"
,
"(Tensor) Input beta1 power accumulator."
);
AddInput
(
"Beta1Pow"
,
"(Tensor) Input beta1 power accumulator."
);
AddInput
(
"Beta2Pow"
,
"(Tensor) Input beta2 power accumulator."
);
AddInput
(
"Beta2Pow"
,
"(Tensor) Input beta2 power accumulator."
);
AddInput
(
"MasterParam"
,
AddInput
(
"MasterParam"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input master parameter that has to be updated."
)
"Input master parameter that has to be updated."
)
.
AsDispensable
();
.
AsDispensable
();
AddInput
(
AddInput
(
...
...
paddle/fluid/operators/optimizers/lars_momentum_op.cc
浏览文件 @
30a31a53
...
@@ -37,12 +37,12 @@ class LarsMomentumOp : public framework::OperatorWithKernel {
...
@@ -37,12 +37,12 @@ class LarsMomentumOp : public framework::OperatorWithKernel {
"Output"
,
"Output"
,
"VelocityOut"
,
"VelocityOut"
,
"LarsMomentum"
);
"LarsMomentum"
);
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"The input var's type should be LoDTensor,
but the received is %s"
,
"
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
auto
lr_dims
=
ctx
->
GetInputsDim
(
"LearningRate"
);
auto
lr_dims
=
ctx
->
GetInputsDim
(
"LearningRate"
);
auto
grad_dim
=
ctx
->
GetInputsDim
(
"Grad"
);
auto
grad_dim
=
ctx
->
GetInputsDim
(
"Grad"
);
...
@@ -102,7 +102,7 @@ class LarsMomentumOp : public framework::OperatorWithKernel {
...
@@ -102,7 +102,7 @@ class LarsMomentumOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
)[
i
],
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
)[
i
],
framework
::
proto
::
VarType
::
LOD_TENSOR
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
->
Inputs
(
"Grad"
)[
i
].
front
(),
ctx
->
Inputs
(
"Grad"
)[
i
].
front
(),
ctx
->
GetInputsVarType
(
"Grad"
)[
i
]));
ctx
->
GetInputsVarType
(
"Grad"
)[
i
]));
...
@@ -145,31 +145,31 @@ class LarsMomentumOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -145,31 +145,31 @@ class LarsMomentumOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"Param"
,
AddInput
(
"Param"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input parameter that has to be updated"
)
"Input parameter that has to be updated"
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
"Grad"
,
AddInput
(
"Grad"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input gradient of the parameter"
)
"Input gradient of the parameter"
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
"Velocity"
,
AddInput
(
"Velocity"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input velocity (corresponding to the parameter) "
"Input velocity (corresponding to the parameter) "
"that has to be updated"
)
"that has to be updated"
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
"LearningRate"
,
AddInput
(
"LearningRate"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input learning rate"
)
"Input learning rate"
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
"MasterParam"
,
"FP32 master weight for AMP."
)
AddInput
(
"MasterParam"
,
"FP32 master weight for AMP."
)
.
AsDuplicable
()
.
AsDuplicable
()
.
AsDispensable
();
.
AsDispensable
();
AddOutput
(
"ParamOut"
,
AddOutput
(
"ParamOut"
,
"(
LoD
Tensor) This output is updated parameter. "
"(
phi::Dense
Tensor) This output is updated parameter. "
"It shared memory with Input(Param)."
)
"It shared memory with Input(Param)."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
"VelocityOut"
,
AddOutput
(
"VelocityOut"
,
"(
LoD
Tensor) This output is updated velocity. "
"(
phi::Dense
Tensor) This output is updated velocity. "
"It shared memory with Input(Velocity)."
)
"It shared memory with Input(Velocity)."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
"MasterParamOut"
,
AddOutput
(
"MasterParamOut"
,
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
30a31a53
...
@@ -54,12 +54,12 @@ class MomentumOp : public framework::OperatorWithKernel {
...
@@ -54,12 +54,12 @@ class MomentumOp : public framework::OperatorWithKernel {
true
,
true
,
platform
::
errors
::
NotFound
(
platform
::
errors
::
NotFound
(
"Input(LearningRate) of Momentum should not be null."
));
"Input(LearningRate) of Momentum should not be null."
));
PADDLE_ENFORCE_EQ
(
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"The input var's type should be LoDTensor,
but the received is %s"
,
"
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
true
,
...
...
paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
浏览文件 @
30a31a53
...
@@ -16,16 +16,15 @@ namespace paddle {
...
@@ -16,16 +16,15 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
class
RMSPROPNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
class
RMSPROPNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
auto
*
param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"ParamOut"
);
auto
*
moment_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"MomentOut"
);
auto
*
moment_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"MomentOut"
);
auto
*
mean_square_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"MeanSquareOut"
);
auto
*
mean_square_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"MeanSquareOut"
);
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
@@ -34,17 +33,17 @@ class RMSPROPNPUKernel : public framework::OpKernel<T> {
...
@@ -34,17 +33,17 @@ class RMSPROPNPUKernel : public framework::OpKernel<T> {
auto
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
auto
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
auto
rho
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"decay"
));
auto
rho
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"decay"
));
auto
momentum
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"momentum"
));
auto
momentum
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"momentum"
));
auto
*
p_tensor
=
ctx
.
Input
<
LoD
Tensor
>
(
"Param"
);
auto
*
p_tensor
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Param"
);
auto
*
ms_tensor
=
ctx
.
Input
<
LoD
Tensor
>
(
"MeanSquare"
);
auto
*
ms_tensor
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"MeanSquare"
);
auto
*
lr_tensor
=
ctx
.
Input
<
LoD
Tensor
>
(
"LearningRate"
);
auto
*
lr_tensor
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LearningRate"
);
auto
*
mom_tensor
=
ctx
.
Input
<
LoD
Tensor
>
(
"Moment"
);
auto
*
mom_tensor
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Moment"
);
bool
centered
=
ctx
.
Attr
<
bool
>
(
"centered"
);
bool
centered
=
ctx
.
Attr
<
bool
>
(
"centered"
);
auto
stream
=
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
.
stream
();
if
(
grad_var
->
IsType
<
LoD
Tensor
>
())
{
if
(
grad_var
->
IsType
<
phi
::
Dense
Tensor
>
())
{
auto
*
grad_tensor
=
ctx
.
Input
<
LoD
Tensor
>
(
"Grad"
);
auto
*
grad_tensor
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Grad"
);
if
(
centered
)
{
if
(
centered
)
{
framework
::
NPUAttributeMap
attr_input
=
{{
"use_locking"
,
false
}};
framework
::
NPUAttributeMap
attr_input
=
{{
"use_locking"
,
false
}};
const
Tensor
*
rho_tensor
=
nullptr
;
const
Tensor
*
rho_tensor
=
nullptr
;
...
...
paddle/fluid/operators/optimizers/sgd_op.cu
浏览文件 @
30a31a53
...
@@ -72,7 +72,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
...
@@ -72,7 +72,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
true
,
platform
::
errors
::
InvalidArgument
(
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
ctx
.
InputNames
(
"Param"
).
front
(),
paddle
::
framework
::
ToTypeName
(
param_var
->
Type
())));
paddle
::
framework
::
ToTypeName
(
param_var
->
Type
())));
...
@@ -107,7 +107,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
...
@@ -107,7 +107,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
?
master_param_out
->
mutable_data
<
MPDType
>
(
ctx
.
GetPlace
())
?
master_param_out
->
mutable_data
<
MPDType
>
(
ctx
.
GetPlace
())
:
nullptr
;
:
nullptr
;
// Actually, all tensors are
LoD
Tensor except SelectedRows.
// Actually, all tensors are
phi::Dense
Tensor except SelectedRows.
if
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
())
{
if
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
())
{
auto
*
grad
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Grad"
);
auto
*
grad
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Grad"
);
...
...
paddle/fluid/operators/optimizers/sgd_op.h
浏览文件 @
30a31a53
...
@@ -36,7 +36,7 @@ template <typename T>
...
@@ -36,7 +36,7 @@ template <typename T>
struct
sgd_dense_param_kernel
<
T
,
struct
sgd_dense_param_kernel
<
T
,
framework
::
VarTypeTrait
<
phi
::
DenseTensor
>::
kId
>
{
framework
::
VarTypeTrait
<
phi
::
DenseTensor
>::
kId
>
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
)
const
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
)
const
{
VLOG
(
4
)
<<
"[CPU]: sgd_dense_param_kernel<T,
LoD
Tensor>"
;
VLOG
(
4
)
<<
"[CPU]: sgd_dense_param_kernel<T,
phi::Dense
Tensor>"
;
const
auto
*
learning_rate
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"LearningRate"
);
const
auto
*
learning_rate
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"LearningRate"
);
const
auto
*
param
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Param"
);
const
auto
*
param
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ParamOut"
);
...
@@ -95,7 +95,7 @@ template <>
...
@@ -95,7 +95,7 @@ template <>
struct
sgd_dense_param_kernel
<
platform
::
bfloat16
,
struct
sgd_dense_param_kernel
<
platform
::
bfloat16
,
framework
::
VarTypeTrait
<
phi
::
DenseTensor
>::
kId
>
{
framework
::
VarTypeTrait
<
phi
::
DenseTensor
>::
kId
>
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
)
const
{
void
operator
()(
const
framework
::
ExecutionContext
&
ctx
)
const
{
VLOG
(
4
)
<<
"[CPU]: sgd_dense_param_kernel<bfloat16,
LoD
Tensor>"
;
VLOG
(
4
)
<<
"[CPU]: sgd_dense_param_kernel<bfloat16,
phi::Dense
Tensor>"
;
const
auto
*
learning_rate
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"LearningRate"
);
const
auto
*
learning_rate
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"LearningRate"
);
const
auto
*
param
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Param"
);
const
auto
*
param
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ParamOut"
);
...
...
paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
浏览文件 @
30a31a53
...
@@ -90,7 +90,7 @@ class DistributedLookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -90,7 +90,7 @@ class DistributedLookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"Ids"
,
AddInput
(
"Ids"
,
"(
LoDTensor) Ids's type should be LoD
Tensor"
"(
phi::DenseTensor) Ids's type should be phi::Dense
Tensor"
"THe ids to be looked up in W."
)
"THe ids to be looked up in W."
)
.
AsDuplicable
();
.
AsDuplicable
();
...
@@ -98,8 +98,9 @@ class DistributedLookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -98,8 +98,9 @@ class DistributedLookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) The input represents embedding tensors, "
"(Tensor) The input represents embedding tensors, "
"which is a learnable parameter."
);
"which is a learnable parameter."
);
AddOutput
(
"Outputs"
,
AddOutput
(
"(LoDTensor) The lookup results, which have the same type as W."
)
"Outputs"
,
"(phi::DenseTensor) The lookup results, which have the same type as W."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddAttr
<
int
>
(
"table_id"
,
"sparse table id"
).
SetDefault
(
0
);
AddAttr
<
int
>
(
"table_id"
,
"sparse table id"
).
SetDefault
(
0
);
...
...
paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
浏览文件 @
30a31a53
...
@@ -63,22 +63,23 @@ class DistributedPushSparseOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -63,22 +63,23 @@ class DistributedPushSparseOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"Ids"
,
AddInput
(
"Ids"
,
"(
LoDTensor) Ids's type should be LoD
Tensor"
"(
phi::DenseTensor) Ids's type should be phi::Dense
Tensor"
"THe ids to be looked up in W."
)
"THe ids to be looked up in W."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
"Shows"
,
AddInput
(
"Shows"
,
"(
LoDTensor) Shows's type should be LoD
Tensor"
"(
phi::DenseTensor) Shows's type should be phi::Dense
Tensor"
"THe shows default to be 1."
)
"THe shows default to be 1."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddInput
(
"Clicks"
,
AddInput
(
"Clicks"
,
"(
LoDTensor) Clicks's type should be LoD
Tensor"
"(
phi::DenseTensor) Clicks's type should be phi::Dense
Tensor"
"THe clicks usually equal to label."
)
"THe clicks usually equal to label."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
"Outputs"
,
AddOutput
(
"(LoDTensor) The lookup results, which have the same type as W."
)
"Outputs"
,
"(phi::DenseTensor) The lookup results, which have the same type as W."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddAttr
<
int
>
(
"table_id"
,
"sparse table id"
).
SetDefault
(
0
);
AddAttr
<
int
>
(
"table_id"
,
"sparse table id"
).
SetDefault
(
0
);
...
...
paddle/fluid/operators/pscore/fake_init_op.cc
浏览文件 @
30a31a53
...
@@ -45,7 +45,7 @@ class FakeInitOp : public framework::OperatorBase {
...
@@ -45,7 +45,7 @@ class FakeInitOp : public framework::OperatorBase {
}
else
{
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"fake init op's output only"
"fake init op's output only"
"supports SelectedRows and
LoD
Tensor"
));
"supports SelectedRows and
phi::Dense
Tensor"
));
}
}
}
}
};
};
...
...
paddle/fluid/operators/reader/create_py_reader_op.cc
浏览文件 @
30a31a53
...
@@ -109,7 +109,7 @@ class CreatePyReaderOpMaker : public FileReaderMakerBase {
...
@@ -109,7 +109,7 @@ class CreatePyReaderOpMaker : public FileReaderMakerBase {
.
SetDefault
(
1
);
.
SetDefault
(
1
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Create PyReader to support
LoD
Tensor data feeding in Python side.
Create PyReader to support
phi::Dense
Tensor data feeding in Python side.
)DOC"
);
)DOC"
);
}
}
};
};
...
...
paddle/fluid/operators/reader/read_op.cc
浏览文件 @
30a31a53
...
@@ -171,7 +171,7 @@ class ReadOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -171,7 +171,7 @@ class ReadOpMaker : public framework::OpProtoAndCheckerMaker {
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddInput
(
"Reader"
,
"(ReaderHolder) The executed reader."
);
AddInput
(
"Reader"
,
"(ReaderHolder) The executed reader."
);
AddOutput
(
"Out"
,
"(
LoD
Tensor) The output data."
).
AsDuplicable
();
AddOutput
(
"Out"
,
"(
phi::Dense
Tensor) The output data."
).
AsDuplicable
();
AddAttr
<
bool
>
(
AddAttr
<
bool
>
(
"throw_eof_exp"
,
"throw_eof_exp"
,
"If set true, an exception will be thrown when the Reader "
"If set true, an exception will be thrown when the Reader "
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录