Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
30a31a53
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
30a31a53
编写于
11月 28, 2022
作者:
张
张春乔
提交者:
GitHub
11月 28, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
replace LoDTensor with phi::DenseTensor in fluid\operators\*\ except sequence_ops (#48418)
上级
8424cf28
变更
106
展开全部
隐藏空白更改
内联
并排
Showing
106 changed file
with
683 addition
and
638 deletion
+683
-638
paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
+2
-2
paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
+4
-4
paddle/fluid/operators/cinn/cinn_launch_context.cc
paddle/fluid/operators/cinn/cinn_launch_context.cc
+7
-7
paddle/fluid/operators/cinn/cinn_launch_context_test.cc
paddle/fluid/operators/cinn/cinn_launch_context_test.cc
+3
-4
paddle/fluid/operators/cinn/cinn_launch_op.cc
paddle/fluid/operators/cinn/cinn_launch_op.cc
+3
-3
paddle/fluid/operators/cinn/cinn_launch_op.h
paddle/fluid/operators/cinn/cinn_launch_op.h
+7
-7
paddle/fluid/operators/cinn/cinn_launch_op_test.cc
paddle/fluid/operators/cinn/cinn_launch_op_test.cc
+2
-2
paddle/fluid/operators/cinn/test_helper.h
paddle/fluid/operators/cinn/test_helper.h
+7
-7
paddle/fluid/operators/collective/c_embedding_op.cc
paddle/fluid/operators/collective/c_embedding_op.cc
+1
-1
paddle/fluid/operators/collective/c_embedding_op.cu
paddle/fluid/operators/collective/c_embedding_op.cu
+8
-6
paddle/fluid/operators/collective/c_embedding_op.h
paddle/fluid/operators/collective/c_embedding_op.h
+9
-9
paddle/fluid/operators/collective/c_embedding_op_npu.cc
paddle/fluid/operators/collective/c_embedding_op_npu.cc
+10
-8
paddle/fluid/operators/collective/c_embedding_op_xpu.cc
paddle/fluid/operators/collective/c_embedding_op_xpu.cc
+3
-5
paddle/fluid/operators/controlflow/conditional_block_op.cc
paddle/fluid/operators/controlflow/conditional_block_op.cc
+13
-11
paddle/fluid/operators/controlflow/conditional_block_op_test.cc
.../fluid/operators/controlflow/conditional_block_op_test.cc
+2
-3
paddle/fluid/operators/controlflow/feed_op.cc
paddle/fluid/operators/controlflow/feed_op.cc
+4
-4
paddle/fluid/operators/controlflow/fetch_op.cc
paddle/fluid/operators/controlflow/fetch_op.cc
+5
-3
paddle/fluid/operators/controlflow/fetch_v2_op.cc
paddle/fluid/operators/controlflow/fetch_v2_op.cc
+4
-2
paddle/fluid/operators/controlflow/logical_op.cc
paddle/fluid/operators/controlflow/logical_op.cc
+8
-7
paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
...fluid/operators/controlflow/tensor_array_read_write_op.cc
+7
-6
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+13
-12
paddle/fluid/operators/detection/bbox_util.cu.h
paddle/fluid/operators/detection/bbox_util.cu.h
+0
-1
paddle/fluid/operators/detection/bipartite_match_op.cc
paddle/fluid/operators/detection/bipartite_match_op.cc
+5
-5
paddle/fluid/operators/detection/box_clip_op.cc
paddle/fluid/operators/detection/box_clip_op.cc
+4
-4
paddle/fluid/operators/detection/box_clip_op.cu
paddle/fluid/operators/detection/box_clip_op.cu
+2
-2
paddle/fluid/operators/detection/box_clip_op.h
paddle/fluid/operators/detection/box_clip_op.h
+3
-4
paddle/fluid/operators/detection/box_coder_op.cc
paddle/fluid/operators/detection/box_coder_op.cc
+3
-2
paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
...le/fluid/operators/detection/box_decoder_and_assign_op.cc
+6
-8
paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
+8
-6
paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
+3
-4
paddle/fluid/operators/detection/collect_fpn_proposals_op.h
paddle/fluid/operators/detection/collect_fpn_proposals_op.h
+1
-1
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
.../fluid/operators/detection/distribute_fpn_proposals_op.cc
+4
-2
paddle/fluid/operators/detection/generate_mask_labels_op.cc
paddle/fluid/operators/detection/generate_mask_labels_op.cc
+28
-19
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
.../fluid/operators/detection/generate_proposal_labels_op.cc
+39
-25
paddle/fluid/operators/detection/generate_proposals_op.cc
paddle/fluid/operators/detection/generate_proposals_op.cc
+6
-6
paddle/fluid/operators/detection/generate_proposals_op.cu
paddle/fluid/operators/detection/generate_proposals_op.cu
+2
-3
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
+4
-4
paddle/fluid/operators/detection/iou_similarity_op.cc
paddle/fluid/operators/detection/iou_similarity_op.cc
+14
-13
paddle/fluid/operators/detection/locality_aware_nms_op.cc
paddle/fluid/operators/detection/locality_aware_nms_op.cc
+10
-9
paddle/fluid/operators/detection/matrix_nms_op.cc
paddle/fluid/operators/detection/matrix_nms_op.cc
+5
-4
paddle/fluid/operators/detection/mine_hard_examples_op.cc
paddle/fluid/operators/detection/mine_hard_examples_op.cc
+9
-9
paddle/fluid/operators/detection/multiclass_nms_op.cc
paddle/fluid/operators/detection/multiclass_nms_op.cc
+13
-11
paddle/fluid/operators/detection/retinanet_detection_output_op.cc
...luid/operators/detection/retinanet_detection_output_op.cc
+7
-6
paddle/fluid/operators/detection/roi_perspective_transform_op.cc
...fluid/operators/detection/roi_perspective_transform_op.cc
+4
-5
paddle/fluid/operators/detection/rpn_target_assign_op.cc
paddle/fluid/operators/detection/rpn_target_assign_op.cc
+31
-28
paddle/fluid/operators/detection/target_assign_op.cc
paddle/fluid/operators/detection/target_assign_op.cc
+4
-2
paddle/fluid/operators/elementwise/elementwise_add_op.cc
paddle/fluid/operators/elementwise/elementwise_add_op.cc
+8
-6
paddle/fluid/operators/elementwise/elementwise_div_op.cc
paddle/fluid/operators/elementwise/elementwise_div_op.cc
+8
-6
paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
...le/fluid/operators/elementwise/elementwise_floordiv_op.cc
+8
-6
paddle/fluid/operators/elementwise/elementwise_mul_op.cc
paddle/fluid/operators/elementwise/elementwise_mul_op.cc
+8
-6
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+8
-7
paddle/fluid/operators/elementwise/elementwise_op_function.h
paddle/fluid/operators/elementwise/elementwise_op_function.h
+3
-3
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
+8
-6
paddle/fluid/operators/elementwise/elementwise_xpu.h
paddle/fluid/operators/elementwise/elementwise_xpu.h
+2
-2
paddle/fluid/operators/fused/fused_bn_activation_op.cc
paddle/fluid/operators/fused/fused_bn_activation_op.cc
+2
-4
paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
+2
-4
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
+21
-19
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
+0
-1
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
+1
-1
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
+19
-15
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
+2
-2
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
+7
-5
paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
+0
-2
paddle/fluid/operators/fused/fusion_group_op.cc
paddle/fluid/operators/fused/fusion_group_op.cc
+2
-2
paddle/fluid/operators/fused/fusion_gru_op.cc
paddle/fluid/operators/fused/fusion_gru_op.cc
+15
-14
paddle/fluid/operators/fused/fusion_gru_op.h
paddle/fluid/operators/fused/fusion_gru_op.h
+0
-1
paddle/fluid/operators/fused/fusion_lstm_op.cc
paddle/fluid/operators/fused/fusion_lstm_op.cc
+40
-37
paddle/fluid/operators/fused/fusion_lstm_op.h
paddle/fluid/operators/fused/fusion_lstm_op.h
+0
-1
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
+2
-2
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
+0
-1
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
...le/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
+10
-9
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
+0
-1
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
...le/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
+5
-4
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
+0
-1
paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
+5
-4
paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
+0
-1
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
+5
-4
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
+0
-1
paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
+0
-1
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+6
-5
paddle/fluid/operators/fused/multi_gru_op.cc
paddle/fluid/operators/fused/multi_gru_op.cc
+7
-6
paddle/fluid/operators/fused/multi_gru_op.h
paddle/fluid/operators/fused/multi_gru_op.h
+0
-1
paddle/fluid/operators/math/context_project.h
paddle/fluid/operators/math/context_project.h
+4
-6
paddle/fluid/operators/math/sequence_padding.h
paddle/fluid/operators/math/sequence_padding.h
+7
-9
paddle/fluid/operators/math/sequence_pooling.cc
paddle/fluid/operators/math/sequence_pooling.cc
+0
-1
paddle/fluid/operators/math/sequence_scale.h
paddle/fluid/operators/math/sequence_scale.h
+2
-1
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+5
-6
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
+0
-1
paddle/fluid/operators/nccl/nccl_op.cu.cc
paddle/fluid/operators/nccl/nccl_op.cu.cc
+6
-8
paddle/fluid/operators/optimizers/adam_op_mlu.cc
paddle/fluid/operators/optimizers/adam_op_mlu.cc
+22
-23
paddle/fluid/operators/optimizers/adam_op_npu.cc
paddle/fluid/operators/optimizers/adam_op_npu.cc
+18
-19
paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
+14
-14
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
+2
-2
paddle/fluid/operators/optimizers/dpsgd_op.cc
paddle/fluid/operators/optimizers/dpsgd_op.cc
+12
-12
paddle/fluid/operators/optimizers/dpsgd_op.h
paddle/fluid/operators/optimizers/dpsgd_op.h
+2
-2
paddle/fluid/operators/optimizers/lamb_op.cc
paddle/fluid/operators/optimizers/lamb_op.cc
+3
-3
paddle/fluid/operators/optimizers/lars_momentum_op.cc
paddle/fluid/operators/optimizers/lars_momentum_op.cc
+13
-13
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+6
-6
paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
+9
-10
paddle/fluid/operators/optimizers/sgd_op.cu
paddle/fluid/operators/optimizers/sgd_op.cu
+2
-2
paddle/fluid/operators/optimizers/sgd_op.h
paddle/fluid/operators/optimizers/sgd_op.h
+2
-2
paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
+4
-3
paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
+6
-5
paddle/fluid/operators/pscore/fake_init_op.cc
paddle/fluid/operators/pscore/fake_init_op.cc
+1
-1
paddle/fluid/operators/reader/create_py_reader_op.cc
paddle/fluid/operators/reader/create_py_reader_op.cc
+1
-1
paddle/fluid/operators/reader/read_op.cc
paddle/fluid/operators/reader/read_op.cc
+1
-1
未找到文件。
paddle/fluid/operators/cinn/cinn_instruction_run_op.cc
浏览文件 @
30a31a53
...
...
@@ -68,11 +68,11 @@ class CinnInstructionRunOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
kX
,
"(vector<
LoD
Tensor>)"
"(vector<
phi::Dense
Tensor>)"
"which are the input arguments of this cinn instruction"
)
.
AsDuplicable
();
AddOutput
(
kOutputs
,
"(vector<
LoD
Tensor>)"
"(vector<
phi::Dense
Tensor>)"
"which are the output arguments of this cinn instruction"
)
.
AsDuplicable
();
AddAttr
<
int64_t
>
(
...
...
paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
浏览文件 @
30a31a53
...
...
@@ -74,7 +74,7 @@ class TestCinnInstructionRunOp : public ::testing::Test {
// so a cinn_instruction_run_op will throw an error
framework
::
Scope
scope
;
InitVariablesWithRandomValue
<
float
>
({
"x"
,
"y"
},
{
10
,
20
},
place
,
&
scope
);
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
ASSERT_THROW
(
cinn_instruction_run_op
->
Run
(
scope
,
place
),
paddle
::
platform
::
EnforceNotMet
);
...
...
@@ -83,7 +83,7 @@ class TestCinnInstructionRunOp : public ::testing::Test {
// of both type float and int
cinn_launch_op
->
Run
(
scope
,
place
);
scope
.
EraseVars
({
"x"
,
"y"
,
test_op_out_name
});
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
InitVariablesWithRandomValue
<
int
>
({
"x"
,
"y"
},
{
30
,
40
},
place
,
&
scope
);
cinn_launch_op
->
Run
(
scope
,
place
);
}
...
...
@@ -92,8 +92,8 @@ class TestCinnInstructionRunOp : public ::testing::Test {
// Run ops and check the computation results
framework
::
Scope
scope
;
InitVariablesWithRandomValue
<
float
>
({
"x"
,
"y"
},
{
10
,
20
},
place
,
&
scope
);
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
add_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
scope
.
Var
(
add_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
elementwise_add_op
->
Run
(
scope
,
place
);
cinn_launch_op
->
Run
(
scope
,
place
);
CompareOpResult
<
float
>
(
scope
.
GetVar
(
test_op_out_name
),
...
...
paddle/fluid/operators/cinn/cinn_launch_context.cc
浏览文件 @
30a31a53
...
...
@@ -45,7 +45,6 @@
namespace
paddle
{
namespace
operators
::
details
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
framework
::
ParallelExecutor
;
using
framework
::
Scope
;
using
CinnInstruction
=
::
cinn
::
hlir
::
framework
::
Instruction
;
...
...
@@ -268,7 +267,8 @@ void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) {
// assign external malloc/free callbacks of cinn_buffer_t
cinn_buffer
->
external_malloc
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[
this
,
var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
auto
*
tensor
=
cached_scope_
->
GetVar
(
var_name
)
->
GetMutable
<
LoDTensor
>
();
auto
*
tensor
=
cached_scope_
->
GetVar
(
var_name
)
->
GetMutable
<
phi
::
DenseTensor
>
();
tensor
->
Resize
(
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
));
buffer
->
memory
=
reinterpret_cast
<
uint8_t
*>
(
tensor
->
mutable_data
(
*
cached_place_
,
...
...
@@ -294,7 +294,7 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
cinn_buffer
->
external_malloc
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[
this
,
var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
auto
*
tensor
=
cached_temp_scope_
->
Var
(
var_name
)
->
GetMutable
<
LoD
Tensor
>
();
cached_temp_scope_
->
Var
(
var_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
tensor
->
Resize
(
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
));
buffer
->
memory
=
reinterpret_cast
<
uint8_t
*>
(
tensor
->
mutable_data
(
*
cached_place_
,
...
...
@@ -306,8 +306,8 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
// if no instruction use it
cinn_buffer
->
external_free
=
new
std
::
function
<
int
(
void
*
,
cinn_buffer_t
*
)
>
(
[
this
,
var_name
](
void
*
ctx
,
cinn_buffer_t
*
buffer
)
{
auto
*
tensor
=
cached_temp_scope_
->
GetVar
(
var_name
)
->
GetMutable
<
LoD
Tensor
>
();
auto
*
tensor
=
cached_temp_scope_
->
GetVar
(
var_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
tensor
->
clear
();
return
0
;
});
...
...
@@ -438,8 +438,8 @@ ParallelExecutor* CinnLaunchContext::InitializePE(const platform::Place& place,
auto
*
var
=
scope
->
GetVar
(
var_name
);
auto
*
buffer
=
GetCinnBufferOfVar
(
var_name
);
auto
dim
=
framework
::
DDim
(
buffer
->
dims
,
buffer
->
dimensions
);
var
->
GetMutable
<
LoD
Tensor
>
()
->
Resize
(
dim
);
var
->
GetMutable
<
LoD
Tensor
>
()
->
mutable_data
(
var
->
GetMutable
<
phi
::
Dense
Tensor
>
()
->
Resize
(
dim
);
var
->
GetMutable
<
phi
::
Dense
Tensor
>
()
->
mutable_data
(
place
,
framework
::
paddle2cinn
::
TransToPaddleDataType
(
buffer
->
type
));
}
return
parallel_executor_
.
get
();
...
...
paddle/fluid/operators/cinn/cinn_launch_context_test.cc
浏览文件 @
30a31a53
...
...
@@ -40,7 +40,6 @@ USE_OP(cinn_instruction_run);
namespace
paddle
{
namespace
operators
::
details
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
framework
::
OpDesc
;
using
framework
::
ParallelExecutor
;
using
framework
::
ProgramDesc
;
...
...
@@ -203,8 +202,8 @@ TEST_F(CinnLaunchContextTest, TestConstructResult) {
TEST_F
(
CinnLaunchContextTest
,
TestCheckTensorEquivalent
)
{
platform
::
CPUPlace
place
;
framework
::
Scope
scope
;
auto
*
tensor1
=
scope
.
Var
(
"var1"
)
->
GetMutable
<
LoD
Tensor
>
();
auto
*
tensor2
=
scope
.
Var
(
"var2"
)
->
GetMutable
<
LoD
Tensor
>
();
auto
*
tensor1
=
scope
.
Var
(
"var1"
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
auto
*
tensor2
=
scope
.
Var
(
"var2"
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
// dimension not equivalent
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
3
,
5
}),
place
);
...
...
@@ -264,7 +263,7 @@ TEST_F(CinnLaunchContextTest, TestCallbackAssignment) {
launch_context
->
UpdateCapturedEnv
(
scope
,
place
);
// assign external variables
auto
*
tensor1
=
scope
.
Var
(
"var1"
)
->
GetMutable
<
LoD
Tensor
>
();
auto
*
tensor1
=
scope
.
Var
(
"var1"
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
float
*
data1
=
tensor1
->
mutable_data
<
float
>
(
phi
::
make_ddim
({
3
,
4
}),
place
);
data1
[
0
]
=
9.99
f
;
data1
[
10
]
=
19.99
f
;
...
...
paddle/fluid/operators/cinn/cinn_launch_op.cc
浏览文件 @
30a31a53
...
...
@@ -128,18 +128,18 @@ class CinnLaunchOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
kX
,
"(vector<
LoD
Tensor>)"
"(vector<
phi::Dense
Tensor>)"
"which are the input of graph inside the CinnLaunchOp"
"excluding kNoNeedBufferX."
)
.
AsDuplicable
();
AddInput
(
kNoNeedBufferX
,
"(vector<
LoD
Tensor>)"
"(vector<
phi::Dense
Tensor>)"
"which are the input of graph inside the CinnLaunchOp but"
"their buffer are not needed."
)
.
AsDuplicable
()
.
AsDispensable
();
AddOutput
(
kOutputs
,
"(vector<
LoD
Tensor>)"
"(vector<
phi::Dense
Tensor>)"
"which are the output of graph inside the CinnLaunchOp."
)
.
AsDuplicable
();
AddAttr
<
int64_t
>
(
...
...
paddle/fluid/operators/cinn/cinn_launch_op.h
浏览文件 @
30a31a53
...
...
@@ -34,7 +34,6 @@ DECLARE_bool(enable_pe_launch_cinn);
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
CinnCompiler
=
framework
::
paddle2cinn
::
CinnCompiler
;
using
CinnCompiledObject
=
framework
::
paddle2cinn
::
CinnCompiledObject
;
...
...
@@ -76,29 +75,30 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
<<
"value:
\n
"
<<
CinnCompiler
::
GetInstance
()
->
ReadableKey
(
compilation_key
);
std
::
map
<
std
::
string
,
const
LoD
Tensor
*>
inputs_name2tensor
;
std
::
map
<
std
::
string
,
const
phi
::
Dense
Tensor
*>
inputs_name2tensor
;
std
::
vector
<
std
::
string
>
input_x_variable_names
;
std
::
vector
<
std
::
string
>
input_no_need_buffer_variable_names
;
auto
add_name2tensor_fn
=
[
&
inputs_name2tensor
](
const
std
::
vector
<
std
::
string
>&
variable_names
,
const
std
::
vector
<
const
LoDTensor
*>&
tensors
)
{
[
&
inputs_name2tensor
](
const
std
::
vector
<
std
::
string
>&
variable_names
,
const
std
::
vector
<
const
phi
::
DenseTensor
*>&
tensors
)
{
std
::
transform
(
variable_names
.
begin
(),
variable_names
.
end
(),
tensors
.
begin
(),
std
::
inserter
(
inputs_name2tensor
,
inputs_name2tensor
.
end
()),
[](
const
std
::
string
&
name
,
const
LoD
Tensor
*
tensor
)
{
[](
const
std
::
string
&
name
,
const
phi
::
Dense
Tensor
*
tensor
)
{
return
std
::
make_pair
(
name
,
tensor
);
});
};
auto
input_x_tensors
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
kX
);
auto
input_x_tensors
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
kX
);
if
(
!
input_x_tensors
.
empty
())
{
input_x_variable_names
=
std
::
move
(
ctx
.
InputNames
(
kX
));
add_name2tensor_fn
(
input_x_variable_names
,
input_x_tensors
);
}
auto
input_no_need_buffer_tensors
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
kNoNeedBufferX
);
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
kNoNeedBufferX
);
if
(
!
input_no_need_buffer_tensors
.
empty
())
{
input_no_need_buffer_variable_names
=
std
::
move
(
ctx
.
InputNames
(
kNoNeedBufferX
));
...
...
paddle/fluid/operators/cinn/cinn_launch_op_test.cc
浏览文件 @
30a31a53
...
...
@@ -78,8 +78,8 @@ class TestCinnLaunchOp : public ::testing::Test {
// Run ops and check the computation results
framework
::
Scope
scope
;
InitVariablesWithRandomValue
<
float
>
({
"x"
,
"y"
},
{
10
,
20
},
place
,
&
scope
);
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
add_op_out_name
)
->
GetMutable
<
LoD
Tensor
>
();
scope
.
Var
(
test_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
scope
.
Var
(
add_op_out_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
elementwise_add_op
->
Run
(
scope
,
place
);
cinn_launch_op
->
Run
(
scope
,
place
);
CompareOpResult
<
float
>
(
scope
.
GetVar
(
test_op_out_name
),
...
...
paddle/fluid/operators/cinn/test_helper.h
浏览文件 @
30a31a53
...
...
@@ -29,7 +29,6 @@ limitations under the License. */
namespace
paddle
::
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Variable
=
framework
::
Variable
;
using
Graph
=
framework
::
ir
::
Graph
;
using
Node
=
framework
::
ir
::
Node
;
...
...
@@ -97,11 +96,11 @@ void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
std
::
default_random_engine
engine
(
seed
());
std
::
uniform_real_distribution
<
float
>
dist
(
0
,
100
);
LoD
Tensor
tmp_tensor
;
phi
::
Dense
Tensor
tmp_tensor
;
auto
*
tmp_data
=
tmp_tensor
.
mutable_data
<
DataType
>
(
common_ddim
,
platform
::
CPUPlace
());
for
(
const
auto
&
var_name
:
var_names
)
{
auto
*
tensor
=
scope
->
Var
(
var_name
)
->
GetMutable
<
LoD
Tensor
>
();
auto
*
tensor
=
scope
->
Var
(
var_name
)
->
GetMutable
<
phi
::
Dense
Tensor
>
();
tensor
->
mutable_data
<
DataType
>
(
common_ddim
,
place
);
for
(
auto
i
=
0
;
i
<
tensor
->
numel
();
++
i
)
{
tmp_data
[
i
]
=
static_cast
<
DataType
>
(
dist
(
engine
));
...
...
@@ -112,11 +111,12 @@ void InitVariablesWithRandomValue(const std::vector<std::string>& var_names,
template
<
typename
DataType
>
void
CompareOpResult
(
Variable
*
test_out
,
Variable
*
expected_out
)
{
LoD
Tensor
test_tensor
,
expected_tensor
;
phi
::
Dense
Tensor
test_tensor
,
expected_tensor
;
paddle
::
framework
::
TensorCopySync
(
test_out
->
Get
<
LoDTensor
>
(),
platform
::
CPUPlace
(),
&
test_tensor
);
paddle
::
framework
::
TensorCopySync
(
expected_out
->
Get
<
LoDTensor
>
(),
platform
::
CPUPlace
(),
&
expected_tensor
);
test_out
->
Get
<
phi
::
DenseTensor
>
(),
platform
::
CPUPlace
(),
&
test_tensor
);
paddle
::
framework
::
TensorCopySync
(
expected_out
->
Get
<
phi
::
DenseTensor
>
(),
platform
::
CPUPlace
(),
&
expected_tensor
);
ASSERT_TRUE
(
test_tensor
.
IsInitialized
());
ASSERT_TRUE
(
expected_tensor
.
IsInitialized
());
...
...
paddle/fluid/operators/collective/c_embedding_op.cc
浏览文件 @
30a31a53
...
...
@@ -162,7 +162,7 @@ class CEmbeddingOpGradVarTypeInference : public framework::VarTypeInference {
void
operator
()(
framework
::
InferVarTypeContext
*
ctx
)
const
override
{
auto
out_var_name
=
framework
::
GradVarName
(
"W"
);
VLOG
(
3
)
<<
"c_embedding_grad op "
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to
LoD
Tensor"
;
<<
" is set to
phi::Dense
Tensor"
;
ctx
->
SetOutputType
(
out_var_name
,
framework
::
proto
::
VarType
::
LOD_TENSOR
);
ctx
->
SetOutputDataType
(
out_var_name
,
ctx
->
GetInputDataType
(
"W"
));
}
...
...
paddle/fluid/operators/collective/c_embedding_op.cu
浏览文件 @
30a31a53
...
...
@@ -86,9 +86,9 @@ template <typename T>
class
CEmbeddingCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
table_t
=
context
.
Input
<
LoD
Tensor
>
(
"W"
);
auto
*
ids_t
=
context
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
output_t
=
context
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
table_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"W"
);
auto
*
ids_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
auto
*
output_t
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
const
auto
&
dev_ctx
=
context
.
template
device_context
<
phi
::
GPUContext
>();
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
...
...
@@ -142,9 +142,11 @@ class CEmbeddingGradCUDAKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
auto
&
dev_ctx
=
context
.
template
device_context
<
phi
::
GPUContext
>();
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
auto
ids_t
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
d_output_t
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
d_table_t
=
context
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
auto
ids_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
auto
d_output_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
d_table_t
=
context
.
Output
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"W"
));
int
N
=
d_table_t
->
dims
()[
0
];
int
D
=
d_table_t
->
dims
()[
1
];
...
...
paddle/fluid/operators/collective/c_embedding_op.h
浏览文件 @
30a31a53
...
...
@@ -25,8 +25,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
inline
void
CheckTableValid
()
{}
template
<
typename
TIds
,
typename
TData
>
...
...
@@ -57,9 +55,9 @@ template <typename T>
class
CEmbeddingOpCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
table_t
=
ctx
.
Input
<
LoD
Tensor
>
(
"W"
);
auto
*
ids_t
=
ctx
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
output_t
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
table_t
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"W"
);
auto
*
ids_t
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
auto
*
output_t
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
const
int64_t
start_idx
=
ctx
.
Attr
<
int64_t
>
(
"start_index"
);
VLOG
(
10
)
<<
"table_dims:"
<<
table_t
->
dims
();
...
...
@@ -119,10 +117,12 @@ class CEmbeddingGradOpCPUKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
auto
ids_t
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
d_output_t
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
table_t
=
context
.
Input
<
LoDTensor
>
(
"W"
);
auto
table_grad_t
=
context
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
auto
ids_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
auto
d_output_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
table_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"W"
);
auto
table_grad_t
=
context
.
Output
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"W"
));
T
*
table_grad_data
=
table_grad_t
->
mutable_data
<
T
>
(
table_t
->
dims
(),
context
.
GetPlace
());
...
...
paddle/fluid/operators/collective/c_embedding_op_npu.cc
浏览文件 @
30a31a53
...
...
@@ -111,9 +111,9 @@ void shard_index(const Tensor &table_t,
template
<
typename
TIds
,
typename
T
>
void
NPUGetIdsEmbedding
(
const
framework
::
ExecutionContext
&
context
)
{
auto
*
table_t
=
context
.
Input
<
LoD
Tensor
>
(
"W"
);
auto
*
ids_t
=
context
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
output_t
=
context
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
table_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"W"
);
auto
*
ids_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
auto
*
output_t
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
auto
stream
=
...
...
@@ -165,7 +165,7 @@ template <typename T>
class
CEmbeddingNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
ids_t
=
context
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
ids_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
const
auto
&
index_type
=
framework
::
TransToProtoVarType
(
ids_t
->
dtype
());
if
(
index_type
==
framework
::
proto
::
VarType
::
INT32
)
{
...
...
@@ -181,10 +181,12 @@ template <typename TIds, typename T>
void
NPUUpdateEmbedding
(
const
framework
::
ExecutionContext
&
context
)
{
// get inputs
const
int64_t
start_idx
=
context
.
Attr
<
int64_t
>
(
"start_index"
);
auto
ids_t
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
d_output_t
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
ids_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
auto
d_output_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
table_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"W"
);
auto
table_grad_t
=
context
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
auto
table_grad_t
=
context
.
Output
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"W"
));
VLOG
(
10
)
<<
"ids_t:"
<<
ids_t
<<
", d_output_t:"
<<
d_output_t
<<
", table_t:"
<<
table_t
<<
", table_grad_t"
<<
table_grad_t
;
...
...
@@ -243,7 +245,7 @@ template <typename T>
class
CEmbeddingGradNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
ids_t
=
context
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
ids_t
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
const
auto
&
index_type
=
framework
::
TransToProtoVarType
(
ids_t
->
dtype
());
if
(
index_type
==
framework
::
proto
::
VarType
::
INT32
)
{
...
...
paddle/fluid/operators/collective/c_embedding_op_xpu.cc
浏览文件 @
30a31a53
...
...
@@ -18,15 +18,13 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
DeviceContext
,
typename
T
>
class
CEmbeddingOpXPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
table_t
=
ctx
.
Input
<
LoD
Tensor
>
(
"W"
);
auto
*
ids_t
=
ctx
.
Input
<
LoD
Tensor
>
(
"Ids"
);
auto
*
output_t
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
table_t
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"W"
);
auto
*
ids_t
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Ids"
);
auto
*
output_t
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
const
int64_t
start_index
=
ctx
.
Attr
<
int64_t
>
(
"start_index"
);
const
T
*
table_data
=
table_t
->
data
<
T
>
();
T
*
output_data
=
output_t
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/controlflow/conditional_block_op.cc
浏览文件 @
30a31a53
...
...
@@ -363,13 +363,14 @@ class ConditionalBlockGradOp : public ConditionalOp {
}
if
(
input_var
->
IsType
<
phi
::
DenseTensor
>
())
{
PADDLE_ENFORCE_EQ
(
outside_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Type of outside_var %s is NOT LoDTensor, which "
"doesn't match input_var %s."
,
outside_grad_name
,
input_name
));
PADDLE_ENFORCE_EQ
(
outside_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Type of outside_var %s is NOT phi::DenseTensor, which "
"doesn't match input_var %s."
,
outside_grad_name
,
input_name
));
AssignZeroToOutsideTensor
(
place
,
scope
,
input_var
->
Get
<
phi
::
DenseTensor
>
(),
...
...
@@ -402,7 +403,8 @@ class ConditionalBlockGradOp : public ConditionalOp {
}
else
{
// TODO(huihuangzheng): add support for SelectedRows
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"Conditional block grad op doesn't support non-LoDTensor output "
"Conditional block grad op doesn't support non-phi::DenseTensor "
"output "
"now."
));
}
}
...
...
@@ -475,9 +477,9 @@ class ConditionalBlockGradInferShape : public framework::InferShapeBase {
class
ConditionalBlockGradInferVarType
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
framework
::
InferVarTypeContext
*
ctx
)
const
override
{
// NOTE(Aurelius84): VarType of Output is
LoDTensor by default. In case of
//
Input is {Tensor, LoDTensorArray}, we need synchronous the Input's
// VarType into Input@GRAD to avoid generating {Tensor, Tensor} as
// NOTE(Aurelius84): VarType of Output is
phi::DenseTensor by default. In
//
case of Input is {Tensor, LoDTensorArray}, we need synchronous the
//
Input's
VarType into Input@GRAD to avoid generating {Tensor, Tensor} as
// Input@GRAD.
auto
input_size
=
ctx
->
InputSize
(
ConditionalOp
::
kInputs
);
auto
output_size
=
...
...
paddle/fluid/operators/controlflow/conditional_block_op_test.cc
浏览文件 @
30a31a53
...
...
@@ -21,7 +21,6 @@ limitations under the License. */
USE_NO_KERNEL_OP
(
conditional_block
);
USE_NO_KERNEL_OP
(
conditional_block_grad
);
using
LoDTensor
=
phi
::
DenseTensor
;
using
LoDTensorArray
=
paddle
::
framework
::
LoDTensorArray
;
using
Scope
=
paddle
::
framework
::
Scope
;
using
Variable
=
paddle
::
framework
::
Variable
;
...
...
@@ -32,7 +31,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
Scope
scope
;
Variable
*
cond_var
=
scope
.
Var
(
"condition"
);
LoDTensor
*
cond_tensor
=
cond_var
->
GetMutable
<
LoD
Tensor
>
();
phi
::
DenseTensor
*
cond_tensor
=
cond_var
->
GetMutable
<
phi
::
Dense
Tensor
>
();
paddle
::
framework
::
DDim
cond_dims
=
phi
::
make_ddim
({
1
});
bool
*
cond_data
=
cond_tensor
->
mutable_data
<
bool
>
(
cond_dims
,
place
);
cond_data
[
0
]
=
false
;
...
...
@@ -41,7 +40,7 @@ TEST(ConditionalBlockGrad, NoNeedRunLoDTensorArray) {
LoDTensorArray
*
input_tensors
=
input_var
->
GetMutable
<
LoDTensorArray
>
();
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
paddle
::
framework
::
DDim
in_dims
=
phi
::
make_ddim
({
i
+
1
,
i
+
2
});
LoD
Tensor
lod_tensor
;
phi
::
Dense
Tensor
lod_tensor
;
float
*
in_data
=
lod_tensor
.
mutable_data
<
float
>
(
in_dims
,
place
);
for
(
int
j
=
0
;
j
<
(
i
+
1
)
*
(
i
+
2
);
++
j
)
{
in_data
[
j
]
=
static_cast
<
float
>
(
j
);
...
...
paddle/fluid/operators/controlflow/feed_op.cc
浏览文件 @
30a31a53
...
...
@@ -29,7 +29,7 @@ namespace paddle {
namespace
operators
{
// FeedVariableVisitor is to feed the variable data
// according to data type (
LoD
Tensor or Strings).
// according to data type (
phi::Dense
Tensor or Strings).
class
FeedVariableVisitor
{
public:
explicit
FeedVariableVisitor
(
framework
::
Variable
*
out_var
,
...
...
@@ -146,11 +146,11 @@ class FeedOpInfoMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(vector<
LoD
Tensor>) "
"A feeding list of
LoD
Tensor, which may have "
"(vector<
phi::Dense
Tensor>) "
"A feeding list of
phi::Dense
Tensor, which may have "
"different dimension and data type."
);
AddOutput
(
"Out"
,
"(
LoDTensor) The LoD
Tensor which is a copy "
"(
phi::DenseTensor) The phi::Dense
Tensor which is a copy "
"of the col-th feeding "
"object."
);
AddAttr
<
int
>
(
"col"
,
"(int) The column index of current feeding object."
);
...
...
paddle/fluid/operators/controlflow/fetch_op.cc
浏览文件 @
30a31a53
...
...
@@ -143,12 +143,14 @@ class FetchOpInfoMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(LoDTensor) The resulted LoDTensor which is expected to return "
"(phi::DenseTensor) The resulted phi::DenseTensor which is "
"expected to return "
"to users."
);
AddOutput
(
"Out"
,
"(vector<LoDTensor>|unordered_map<string, int32_t>) A fetching list"
" of LoDTensor|unordered_map<string, int32_t> which may have "
"(vector<phi::DenseTensor>|unordered_map<string, int32_t>) A fetching "
"list"
" of phi::DenseTensor|unordered_map<string, int32_t> which may have "
"different dimension, shape and data type."
);
AddAttr
<
int
>
(
"col"
,
"(int) The column index of fetching object."
);
AddComment
(
R"DOC(
...
...
paddle/fluid/operators/controlflow/fetch_v2_op.cc
浏览文件 @
30a31a53
...
...
@@ -201,10 +201,12 @@ class FetchV2OpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(LoDTensor) The resulted LoDTensor which is expected to return "
"(phi::DenseTensor) The resulted phi::DenseTensor which is "
"expected to return "
"to users."
);
AddOutput
(
"Out"
,
"(vector<LoDTensor>) A fetching list of LoDTensor which may have "
"(vector<phi::DenseTensor>) A fetching list of phi::DenseTensor "
"which may have "
"different dimension, shape and data type."
);
AddAttr
<
int
>
(
"col"
,
"(int) The column index of fetching object."
);
AddAttr
<
bool
>
(
"deepcopy"
,
"(bool) Whether deep copy is required."
)
...
...
paddle/fluid/operators/controlflow/logical_op.cc
浏览文件 @
30a31a53
...
...
@@ -35,7 +35,7 @@ class BinaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
comment
.
type
));
AddOutput
(
"Out"
,
string
::
Sprintf
(
"n-dim bool Variable"
));
AddComment
(
string
::
Sprintf
(
R"DOC(%s Operator
It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim
LoD
Tensor or Tensor.
It operates element-wise on X and Y, and returns the Out. X, Y and Out are N-dim
phi::Dense
Tensor or Tensor.
Each element of Out is calculated by %s
)DOC"
,
comment
.
type
,
...
...
@@ -49,13 +49,14 @@ class UnaryLogicalOpProtoMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
OpComment
comment
;
AddInput
(
"X"
,
string
::
Sprintf
(
"Operand of %s operator. Must be "
"a LoDTensor or Tensor of type being one of bool, "
"int8, int16, int32, int64, float32, float64."
,
comment
.
type
));
AddOutput
(
"Out"
,
string
::
Sprintf
(
"n-dim bool LoDTensor or Tensor."
));
string
::
Sprintf
(
"Operand of %s operator. Must be "
"a phi::DenseTensor or Tensor of type being one of bool, "
"int8, int16, int32, int64, float32, float64."
,
comment
.
type
));
AddOutput
(
"Out"
,
string
::
Sprintf
(
"n-dim bool phi::DenseTensor or Tensor."
));
AddComment
(
string
::
Sprintf
(
R"DOC(%s Operator
It operates element-wise on X, and returns the Out. X and Out are N-dim
LoD
Tensor or Tensor.
It operates element-wise on X, and returns the Out. X and Out are N-dim
phi::Dense
Tensor or Tensor.
Each element of Out is calculated by %s
)DOC"
,
comment
.
type
,
...
...
paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc
浏览文件 @
30a31a53
...
...
@@ -67,7 +67,8 @@ class WriteToArrayOp : public ArrayOp {
class
WriteToArrayOpProtoMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(LoDTensor) the tensor will be written to tensor array"
);
AddInput
(
"X"
,
"(phi::DenseTensor) the tensor will be written to tensor array"
);
AddInput
(
"I"
,
"(Tensor) the subscript index in tensor array. The number of element "
...
...
@@ -76,9 +77,9 @@ class WriteToArrayOpProtoMaker : public framework::OpProtoAndCheckerMaker {
AddComment
(
R"DOC(
WriteToArray Operator.
This operator writes a
LoDTensor to a LoD
Tensor array.
This operator writes a
phi::DenseTensor to a phi::Dense
Tensor array.
Assume $T$ is
LoD
Tensor, $i$ is the subscript of the array, and $A$ is the array. The
Assume $T$ is
phi::Dense
Tensor, $i$ is the subscript of the array, and $A$ is the array. The
equation is
$$A[i] = T$$
...
...
@@ -196,13 +197,13 @@ class ReadFromArrayProtoMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) the writed tensor when used as the grad op of "
"write_to_array. We use this to fill zero gradient."
)
.
AsDispensable
();
AddOutput
(
"Out"
,
"(
LoD
Tensor) the tensor will be read from."
);
AddOutput
(
"Out"
,
"(
phi::Dense
Tensor) the tensor will be read from."
);
AddComment
(
R"DOC(
ReadFromArray Operator.
Read a
LoDTensor from a LoD
Tensor Array.
Read a
phi::DenseTensor from a phi::Dense
Tensor Array.
Assume $T$ is
LoD
Tensor, $i$ is the subscript of the array, and $A$ is the array. The
Assume $T$ is
phi::Dense
Tensor, $i$ is the subscript of the array, and $A$ is the array. The
equation is
$$T = A[i]$$
...
...
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
30a31a53
...
...
@@ -32,7 +32,6 @@ namespace paddle {
namespace
operators
{
using
StepScopeVar
=
std
::
vector
<
framework
::
Scope
*>
;
using
LoDTensor
=
phi
::
DenseTensor
;
namespace
{
// NOLINT
static
std
::
string
GetSkipEagerDeletionVarsDebugString
(
...
...
@@ -62,7 +61,7 @@ class WhileOp : public framework::OperatorBase {
platform
::
errors
::
NotFound
(
"Input(Condition) of WhileOp is not found."
));
auto
&
cond
=
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
LoD
Tensor
>
();
auto
&
cond
=
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
phi
::
Dense
Tensor
>
();
PADDLE_ENFORCE_EQ
(
cond
.
dims
(),
phi
::
make_ddim
({
1
}),
...
...
@@ -149,9 +148,10 @@ class WhileOp : public framework::OperatorBase {
framework
::
Variable
*
input_var
=
scope
.
FindVar
(
input_var_name
);
if
(
input_var
->
IsType
<
phi
::
DenseTensor
>
())
{
rename_vars
.
push_back
(
input_var_rename
);
auto
input_var_tensor
=
input_var
->
Get
<
LoD
Tensor
>
();
auto
input_var_tensor
=
input_var
->
Get
<
phi
::
Dense
Tensor
>
();
auto
*
rename_input_var_tensor
=
current_scope
.
Var
(
input_var_rename
)
->
GetMutable
<
LoDTensor
>
();
current_scope
.
Var
(
input_var_rename
)
->
GetMutable
<
phi
::
DenseTensor
>
();
framework
::
TensorCopy
(
input_var_tensor
,
dev_place
,
rename_input_var_tensor
);
rename_input_var_tensor
->
set_lod
(
input_var_tensor
.
lod
());
...
...
@@ -166,8 +166,8 @@ class WhileOp : public framework::OperatorBase {
var_rename
.
substr
(
0
,
var_rename
.
size
()
-
strlen
(
kSuffix
));
current_scope
.
Rename
(
var_rename
,
input_var_name
);
}
cond_data
=
GetCondData
(
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
LoD
Tensor
>
());
cond_data
=
GetCondData
(
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
phi
::
Dense
Tensor
>
());
}
}
else
{
auto
&
current_scope
=
scope
.
NewScope
();
...
...
@@ -188,8 +188,8 @@ class WhileOp : public framework::OperatorBase {
}
executor
.
RunPreparedContext
(
ctx
.
get
(),
&
current_scope
,
false
,
false
,
false
);
cond_data
=
GetCondData
(
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
LoD
Tensor
>
());
cond_data
=
GetCondData
(
scope
.
FindVar
(
Input
(
kCondition
))
->
Get
<
phi
::
Dense
Tensor
>
());
}
scope
.
DeleteScope
(
&
current_scope
);
}
...
...
@@ -325,7 +325,8 @@ class WhileGradOp : public framework::OperatorBase {
}
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Currently only support LoDTensor and LoDTensorArray in "
"Currently only support phi::DenseTensor and "
"phi::DenseTensorArray in "
"WhileGradOp."
));
}
}
...
...
@@ -398,16 +399,16 @@ class WhileGradOp : public framework::OperatorBase {
inside_grad_name
));
PADDLE_ENFORCE_EQ
(
var
->
IsType
<
framework
::
LoDTensorArray
>
()
||
var
->
IsType
<
LoD
Tensor
>
(),
var
->
IsType
<
phi
::
Dense
Tensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"Currently the type of var only can be LoDTensorArray, "
"or
LoD
Tensor, but the received var[%s] is %s."
,
"or
phi::Dense
Tensor, but the received var[%s] is %s."
,
inside_grad_name
,
framework
::
ToTypeName
(
var
->
Type
())));
if
((
var_iter
==
outside_og_names
.
end
())
&&
var
->
IsType
<
LoD
Tensor
>
())
{
var
->
IsType
<
phi
::
Dense
Tensor
>
())
{
auto
&
inside_tensor
=
var
->
Get
<
phi
::
DenseTensor
>
();
framework
::
AttributeMap
attrs
;
attrs
[
"dtype"
]
=
...
...
paddle/fluid/operators/detection/bbox_util.cu.h
浏览文件 @
30a31a53
...
...
@@ -31,7 +31,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
...
...
paddle/fluid/operators/detection/bipartite_match_op.cc
浏览文件 @
30a31a53
...
...
@@ -19,7 +19,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
BipartiteMatchOp
:
public
framework
::
OperatorWithKernel
{
public:
...
...
@@ -196,7 +195,7 @@ class BipartiteMatchKernel : public framework::OpKernel<T> {
}
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
dist_mat
=
context
.
Input
<
LoD
Tensor
>
(
"DistMat"
);
auto
*
dist_mat
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"DistMat"
);
auto
*
match_indices
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ColToRowMatchIndices"
);
auto
*
match_dist
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ColToRowMatchDist"
);
...
...
@@ -251,7 +250,8 @@ class BipartiteMatchOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
AddInput
(
"DistMat"
,
"(LoDTensor or Tensor) this input is a 2-D LoDTensor with shape "
"(phi::DenseTensor or Tensor) this input is a 2-D phi::DenseTensor "
"with shape "
"[K, M]. It is pair-wise distance matrix between the entities "
"represented by each row and each column. For example, assumed one "
"entity is A with shape [K], another entity is B with shape [M]. The "
...
...
@@ -302,8 +302,8 @@ row entity to the column entity and the matched indices are not duplicated
in each row of ColToRowMatchIndices. If the column entity is not matched
any row entity, set -1 in ColToRowMatchIndices.
Please note that the input DistMat can be
LoD
Tensor (with LoD) or Tensor.
If
LoD
Tensor with LoD, the height of ColToRowMatchIndices is batch size.
Please note that the input DistMat can be
phi::Dense
Tensor (with LoD) or Tensor.
If
phi::Dense
Tensor with LoD, the height of ColToRowMatchIndices is batch size.
If Tensor, the height of ColToRowMatchIndices is 1.
)DOC"
);
...
...
paddle/fluid/operators/detection/box_clip_op.cc
浏览文件 @
30a31a53
...
...
@@ -66,15 +66,15 @@ class BoxClipOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"Input"
,
"(
LoD
Tensor) "
"Input is a
LoD
Tensor with shape [..., 4] holds 4 points"
"(
phi::Dense
Tensor) "
"Input is a
phi::Dense
Tensor with shape [..., 4] holds 4 points"
"in last dimension in format [xmin, ymin, xmax, ymax]"
);
AddInput
(
"ImInfo"
,
"(Tensor) Information for image reshape is in shape (N, 3), "
"in format (height, width, im_scale)"
);
AddOutput
(
"Output"
,
"(
LoD
Tensor) "
"Output is a
LoD
Tensor with the same shape as Input"
"(
phi::Dense
Tensor) "
"Output is a
phi::Dense
Tensor with the same shape as Input"
"and it is the result after clip"
);
AddComment
(
R"DOC(
This operator clips input boxes to original input images.
...
...
paddle/fluid/operators/detection/box_clip_op.cu
浏览文件 @
30a31a53
...
...
@@ -49,9 +49,9 @@ template <typename DeviceContext, typename T>
class
GPUBoxClipKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
input
=
context
.
Input
<
LoD
Tensor
>
(
"Input"
);
auto
*
input
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Input"
);
auto
*
im_info
=
context
.
Input
<
phi
::
DenseTensor
>
(
"ImInfo"
);
auto
*
output
=
context
.
Output
<
LoD
Tensor
>
(
"Output"
);
auto
*
output
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"Output"
);
const
int64_t
num
=
input
->
dims
()[
0
];
const
int64_t
bbox_width
=
input
->
numel
()
/
num
;
auto
lod
=
input
->
lod
();
...
...
paddle/fluid/operators/detection/box_clip_op.h
浏览文件 @
30a31a53
...
...
@@ -20,15 +20,14 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
DeviceContext
,
typename
T
>
class
BoxClipKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
input_box
=
context
.
Input
<
LoD
Tensor
>
(
"Input"
);
auto
*
im_info
=
context
.
Input
<
LoD
Tensor
>
(
"ImInfo"
);
auto
*
output_box
=
context
.
Output
<
LoD
Tensor
>
(
"Output"
);
auto
*
input_box
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"Input"
);
auto
*
im_info
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"ImInfo"
);
auto
*
output_box
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"Output"
);
auto
&
dev_ctx
=
context
.
template
device_context
<
phi
::
CPUContext
>();
output_box
->
mutable_data
<
T
>
(
context
.
GetPlace
());
if
(
input_box
->
lod
().
size
())
{
...
...
paddle/fluid/operators/detection/box_coder_op.cc
浏览文件 @
30a31a53
...
...
@@ -44,7 +44,8 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
.
AsDispensable
();
AddInput
(
"TargetBox"
,
"(LoDTensor or Tensor) This input can be a 2-D LoDTensor with shape "
"(phi::DenseTensor or Tensor) This input can be a 2-D phi::DenseTensor "
"with shape "
"[N, 4] when code_type is 'encode_center_size'. This input also can "
"be a 3-D Tensor with shape [N, M, 4] when code_type is "
"'decode_center_size'. [N, 4], each box is represented as "
...
...
@@ -79,7 +80,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
"not be provided at the same time."
)
.
SetDefault
(
std
::
vector
<
float
>
{});
AddOutput
(
"OutputBox"
,
"(
LoD
Tensor or Tensor) "
"(
phi::Dense
Tensor or Tensor) "
"When code_type is 'encode_center_size', the output tensor of "
"box_coder_op with shape [N, M, 4] representing the result of N "
"target boxes encoded with M Prior boxes and variances. When "
...
...
paddle/fluid/operators/detection/box_decoder_and_assign_op.cc
浏览文件 @
30a31a53
...
...
@@ -14,8 +14,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
class
BoxDecoderAndAssignOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
...
@@ -157,12 +155,12 @@ class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"default."
)
.
AsDispensable
();
AddInput
(
"TargetBox"
,
"(
LoD
Tensor or Tensor) "
"This input can be a 2-D
LoD
Tensor with shape "
"(
phi::Dense
Tensor or Tensor) "
"This input can be a 2-D
phi::Dense
Tensor with shape "
"[N, classnum*4]. It holds N targets for N boxes."
);
AddInput
(
"BoxScore"
,
"(
LoD
Tensor or Tensor) "
"This input can be a 2-D
LoD
Tensor with shape "
"(
phi::Dense
Tensor or Tensor) "
"This input can be a 2-D
phi::Dense
Tensor with shape "
"[N, classnum], each box is represented as [classnum] which is "
"the classification probabilities."
);
AddAttr
<
float
>
(
"box_clip"
,
...
...
@@ -170,12 +168,12 @@ class BoxDecoderAndAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"clip box to prevent overflowing"
)
.
SetDefault
(
4.135
f
);
AddOutput
(
"DecodeBox"
,
"(
LoD
Tensor or Tensor) "
"(
phi::Dense
Tensor or Tensor) "
"the output tensor of op with shape [N, classnum * 4] "
"representing the result of N target boxes decoded with "
"M Prior boxes and variances for each class."
);
AddOutput
(
"OutputAssignBox"
,
"(
LoD
Tensor or Tensor) "
"(
phi::Dense
Tensor or Tensor) "
"the output tensor of op with shape [N, 4] "
"representing the result of N target boxes decoded with "
"M Prior boxes and variances with the best non-background class "
...
...
paddle/fluid/operators/detection/collect_fpn_proposals_op.cc
浏览文件 @
30a31a53
...
...
@@ -17,7 +17,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
CollectFpnProposalsOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
...
@@ -76,8 +75,8 @@ class CollectFpnProposalsOp : public framework::OperatorWithKernel {
PADDLE_GET
(
framework
::
Variable
*
,
roi_inputs
[
i
]);
framework
::
Variable
*
score_var
=
PADDLE_GET
(
framework
::
Variable
*
,
score_inputs
[
i
]);
auto
&
roi_lod
=
roi_var
->
Get
<
LoD
Tensor
>
().
lod
();
auto
&
score_lod
=
score_var
->
Get
<
LoD
Tensor
>
().
lod
();
auto
&
roi_lod
=
roi_var
->
Get
<
phi
::
Dense
Tensor
>
().
lod
();
auto
&
score_lod
=
score_var
->
Get
<
phi
::
Dense
Tensor
>
().
lod
();
PADDLE_ENFORCE_EQ
(
roi_lod
,
score_lod
,
...
...
@@ -101,11 +100,13 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"MultiLevelRois"
,
"(LoDTensor) Multiple roi LoDTensors from each level in shape "
"(phi::DenseTensor) Multiple roi phi::DenseTensors from each "
"level in shape "
"(N, 4), N is the number of RoIs"
)
.
AsDuplicable
();
AddInput
(
"MultiLevelScores"
,
"(LoDTensor) Multiple score LoDTensors from each level in shape"
"(phi::DenseTensor) Multiple score phi::DenseTensors from each "
"level in shape"
" (N, 1), N is the number of RoIs."
)
.
AsDuplicable
();
AddInput
(
...
...
@@ -115,7 +116,8 @@ class CollectFpnProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
"images."
)
.
AsDuplicable
()
.
AsDispensable
();
AddOutput
(
"FpnRois"
,
"(LoDTensor) All selected RoIs with highest scores"
);
AddOutput
(
"FpnRois"
,
"(phi::DenseTensor) All selected RoIs with highest scores"
);
AddOutput
(
"RoisNum"
,
"(Tensor), Number of RoIs in each images."
)
.
AsDispensable
();
AddAttr
<
int
>
(
"post_nms_topN"
,
...
...
paddle/fluid/operators/detection/collect_fpn_proposals_op.cu
浏览文件 @
30a31a53
...
...
@@ -34,7 +34,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
static
constexpr
int
kNumCUDAThreads
=
64
;
static
constexpr
int
kNumMaxinumNumBlocks
=
4096
;
...
...
@@ -58,9 +57,9 @@ template <typename DeviceContext, typename T>
class
GPUCollectFpnProposalsOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
auto
roi_ins
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"MultiLevelRois"
);
const
auto
score_ins
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"MultiLevelScores"
);
auto
fpn_rois
=
ctx
.
Output
<
LoD
Tensor
>
(
"FpnRois"
);
const
auto
roi_ins
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"MultiLevelRois"
);
const
auto
score_ins
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"MultiLevelScores"
);
auto
fpn_rois
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"FpnRois"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
const
int
post_nms_topN
=
ctx
.
Attr
<
int
>
(
"post_nms_topN"
);
...
...
paddle/fluid/operators/detection/collect_fpn_proposals_op.h
浏览文件 @
30a31a53
...
...
@@ -91,7 +91,7 @@ class CollectFpnProposalsOpKernel : public framework::OpKernel<T> {
"is %d"
,
multi_layer_rois
.
size
(),
multi_layer_scores
.
size
()));
// Check if the lod information of two
LoD
Tensor is same
// Check if the lod information of two
phi::Dense
Tensor is same
const
int
num_fpn_level
=
multi_layer_rois
.
size
();
std
::
vector
<
int
>
integral_of_all_rois
(
num_fpn_level
+
1
,
0
);
for
(
int
i
=
0
;
i
<
num_fpn_level
;
++
i
)
{
...
...
paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc
浏览文件 @
30a31a53
...
...
@@ -37,12 +37,14 @@ class DistributeFpnProposalsOp : public framework::OperatorWithKernel {
class
DistributeFpnProposalsOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"FpnRois"
,
"(LoDTensor) The RoIs at all levels in shape (-1, 4)"
);
AddInput
(
"FpnRois"
,
"(phi::DenseTensor) The RoIs at all levels in shape (-1, 4)"
);
AddInput
(
"RoisNum"
,
"(Tensor) The number of RoIs in shape (B),"
"B is the number of images"
)
.
AsDispensable
();
AddOutput
(
"MultiFpnRois"
,
"(LoDTensor) Output with distribute operator"
)
AddOutput
(
"MultiFpnRois"
,
"(phi::DenseTensor) Output with distribute operator"
)
.
AsDuplicable
();
AddOutput
(
"RestoreIndex"
,
"(Tensor) An array of positive number which is "
...
...
paddle/fluid/operators/detection/generate_mask_labels_op.cc
浏览文件 @
30a31a53
...
...
@@ -26,11 +26,12 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
const
int
kBoxDim
=
4
;
template
<
typename
T
>
void
AppendMask
(
LoDTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
void
AppendMask
(
phi
::
DenseTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
auto
*
out_data
=
out
->
data
<
T
>
();
auto
*
to_add_data
=
to_add
->
data
<
T
>
();
memcpy
(
out_data
+
offset
,
to_add_data
,
to_add
->
numel
()
*
sizeof
(
T
));
...
...
@@ -331,16 +332,16 @@ template <typename T>
class
GenerateMaskLabelsKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
im_info
=
ctx
.
Input
<
LoD
Tensor
>
(
"ImInfo"
);
auto
*
gt_classes
=
ctx
.
Input
<
LoD
Tensor
>
(
"GtClasses"
);
auto
*
is_crowd
=
ctx
.
Input
<
LoD
Tensor
>
(
"IsCrowd"
);
auto
*
gt_segms
=
ctx
.
Input
<
LoD
Tensor
>
(
"GtSegms"
);
auto
*
rois
=
ctx
.
Input
<
LoD
Tensor
>
(
"Rois"
);
auto
*
label_int32
=
ctx
.
Input
<
LoD
Tensor
>
(
"LabelsInt32"
);
auto
*
im_info
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"ImInfo"
);
auto
*
gt_classes
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"GtClasses"
);
auto
*
is_crowd
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"IsCrowd"
);
auto
*
gt_segms
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"GtSegms"
);
auto
*
rois
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Rois"
);
auto
*
label_int32
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LabelsInt32"
);
auto
*
mask_rois
=
ctx
.
Output
<
LoD
Tensor
>
(
"MaskRois"
);
auto
*
roi_has_mask_int32
=
ctx
.
Output
<
LoD
Tensor
>
(
"RoiHasMaskInt32"
);
auto
*
mask_int32
=
ctx
.
Output
<
LoD
Tensor
>
(
"MaskInt32"
);
auto
*
mask_rois
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"MaskRois"
);
auto
*
roi_has_mask_int32
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"RoiHasMaskInt32"
);
auto
*
mask_int32
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"MaskInt32"
);
int
num_classes
=
ctx
.
Attr
<
int
>
(
"num_classes"
);
int
resolution
=
ctx
.
Attr
<
int
>
(
"resolution"
);
...
...
@@ -463,17 +464,20 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
"B is the number of input images, "
"each element consists of im_height, im_width, im_scale."
);
AddInput
(
"GtClasses"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with "
"shape [M, 1]. "
"M is the number of groundtruth, "
"each element is a class label of groundtruth."
);
AddInput
(
"IsCrowd"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[M, 1]. "
"M is the number of groundtruth, "
"each element is a flag indicates whether a groundtruth is crowd."
);
AddInput
(
"GtSegms"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [S, 2], it's LoD "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[S, 2], it's LoD "
"level is 3. The LoD[0] represents the gt objects number of each "
"instance. LoD[1] represents the segmentation counts of each objects. "
"LoD[2] represents the polygons number of each segmentation. S the "
...
...
@@ -481,24 +485,29 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
"coordinate points."
);
AddInput
(
"Rois"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [R, 4]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[R, 4]. "
"R is the number of rois which is the output of "
"generate_proposal_labels, "
"each element is a bounding box with (xmin, ymin, xmax, ymax) format."
);
AddInput
(
"LabelsInt32"
,
"(LoDTensor), This intput is a 2D LoDTensor with shape [R, 1], "
"(phi::DenseTensor), This intput is a 2D phi::DenseTensor with "
"shape [R, 1], "
"each element represents a class label of a roi"
);
AddOutput
(
"MaskRois"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
"[P, 4]. "
"P is the number of mask, "
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
AddOutput
(
"RoiHasMaskInt32"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 1], "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
"shape [P, 1], "
"each element represents the output mask rois index with regard "
"to input rois"
);
AddOutput
(
"MaskInt32"
,
"(LoDTensor), This output is a 4D LoDTensor with shape [P, Q], "
"(phi::DenseTensor), This output is a 4D phi::DenseTensor with "
"shape [P, Q], "
"Q equal to num_classes * resolution * resolution"
);
AddAttr
<
int
>
(
"num_classes"
,
"Class number."
);
...
...
paddle/fluid/operators/detection/generate_proposal_labels_op.cc
浏览文件 @
30a31a53
...
...
@@ -26,11 +26,12 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
const
int
kBoxDim
=
4
;
template
<
typename
T
>
void
AppendRois
(
LoDTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
void
AppendRois
(
phi
::
DenseTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
auto
*
out_data
=
out
->
data
<
T
>
();
auto
*
to_add_data
=
to_add
->
data
<
T
>
();
memcpy
(
out_data
+
offset
,
to_add_data
,
to_add
->
numel
()
*
sizeof
(
T
));
...
...
@@ -513,19 +514,21 @@ template <typename T>
class
GenerateProposalLabelsKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
rpn_rois
=
context
.
Input
<
LoDTensor
>
(
"RpnRois"
);
auto
*
gt_classes
=
context
.
Input
<
LoDTensor
>
(
"GtClasses"
);
auto
*
is_crowd
=
context
.
Input
<
LoDTensor
>
(
"IsCrowd"
);
auto
*
gt_boxes
=
context
.
Input
<
LoDTensor
>
(
"GtBoxes"
);
auto
*
im_info
=
context
.
Input
<
LoDTensor
>
(
"ImInfo"
);
auto
*
rois
=
context
.
Output
<
LoDTensor
>
(
"Rois"
);
auto
*
labels_int32
=
context
.
Output
<
LoDTensor
>
(
"LabelsInt32"
);
auto
*
bbox_targets
=
context
.
Output
<
LoDTensor
>
(
"BboxTargets"
);
auto
*
bbox_inside_weights
=
context
.
Output
<
LoDTensor
>
(
"BboxInsideWeights"
);
auto
*
rpn_rois
=
context
.
Input
<
phi
::
DenseTensor
>
(
"RpnRois"
);
auto
*
gt_classes
=
context
.
Input
<
phi
::
DenseTensor
>
(
"GtClasses"
);
auto
*
is_crowd
=
context
.
Input
<
phi
::
DenseTensor
>
(
"IsCrowd"
);
auto
*
gt_boxes
=
context
.
Input
<
phi
::
DenseTensor
>
(
"GtBoxes"
);
auto
*
im_info
=
context
.
Input
<
phi
::
DenseTensor
>
(
"ImInfo"
);
auto
*
rois
=
context
.
Output
<
phi
::
DenseTensor
>
(
"Rois"
);
auto
*
labels_int32
=
context
.
Output
<
phi
::
DenseTensor
>
(
"LabelsInt32"
);
auto
*
bbox_targets
=
context
.
Output
<
phi
::
DenseTensor
>
(
"BboxTargets"
);
auto
*
bbox_inside_weights
=
context
.
Output
<
phi
::
DenseTensor
>
(
"BboxInsideWeights"
);
auto
*
bbox_outside_weights
=
context
.
Output
<
LoDTensor
>
(
"BboxOutsideWeights"
);
auto
*
max_overlap_with_gt
=
context
.
Output
<
LoDTensor
>
(
"MaxOverlapWithGT"
);
context
.
Output
<
phi
::
DenseTensor
>
(
"BboxOutsideWeights"
);
auto
*
max_overlap_with_gt
=
context
.
Output
<
phi
::
DenseTensor
>
(
"MaxOverlapWithGT"
);
int
batch_size_per_im
=
context
.
Attr
<
int
>
(
"batch_size_per_im"
);
float
fg_fraction
=
context
.
Attr
<
float
>
(
"fg_fraction"
);
...
...
@@ -685,21 +688,25 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
AddInput
(
"RpnRois"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [N, 4]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[N, 4]. "
"N is the number of the GenerateProposalOp's output, "
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
AddInput
(
"GtClasses"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with "
"shape [M, 1]. "
"M is the number of groundtruth, "
"each element is a class label of groundtruth."
);
AddInput
(
"IsCrowd"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 1]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[M, 1]. "
"M is the number of groundtruth, "
"each element is a flag indicates whether a groundtruth is crowd."
);
AddInput
(
"GtBoxes"
,
"(LoDTensor), This input is a 2D LoDTensor with shape [M, 4]. "
"(phi::DenseTensor), This input is a 2D phi::DenseTensor with shape "
"[M, 4]. "
"M is the number of groundtruth, "
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
AddInput
(
"ImInfo"
,
...
...
@@ -707,7 +714,8 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
"B is the number of input images, "
"each element consists of im_height, im_width, im_scale."
);
AddInput
(
"MaxOverlap"
,
"(LoDTensor), This input is a 1D LoDTensor with shape [N]."
"(phi::DenseTensor), This input is a 1D phi::DenseTensor with "
"shape [N]."
"N is the number of Input(RpnRois), "
"each element is the maximum overlap between "
"the proposal RoI and ground-truth."
)
...
...
@@ -715,28 +723,34 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
AddOutput
(
"Rois"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
"[P, 4]. "
"P usuall equal to batch_size_per_im * batch_size, "
"each element is a bounding box with [xmin, ymin, xmax, ymax] format."
);
AddOutput
(
"LabelsInt32"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 1], "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
"shape [P, 1], "
"each element represents a class label of a roi"
);
AddOutput
(
"BboxTargets"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with "
"shape [P, 4 * "
"class_nums], "
"each element represents a box label of a roi"
);
AddOutput
(
"BboxInsideWeights"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
"[P, 4 * "
"class_nums], "
"each element indicates whether a box should contribute to loss."
);
AddOutput
(
"BboxOutsideWeights"
,
"(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
"(phi::DenseTensor), This output is a 2D phi::DenseTensor with shape "
"[P, 4 * "
"class_nums], "
"each element indicates whether a box should contribute to loss."
);
AddOutput
(
"MaxOverlapWithGT"
,
"(LoDTensor), This output is a 1D LoDTensor with shape [P], "
"(phi::DenseTensor), This output is a 1D phi::DenseTensor with "
"shape [P], "
"each element indicates the maxoverlap "
"between output RoIs and ground-truth. "
"The output RoIs may include ground-truth "
...
...
paddle/fluid/operators/detection/generate_proposals_op.cc
浏览文件 @
30a31a53
...
...
@@ -28,7 +28,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
GenerateProposalsOp
:
public
framework
::
OperatorWithKernel
{
public:
...
...
@@ -90,8 +89,8 @@ class GenerateProposalsKernel : public framework::OpKernel<T> {
"Variances"
,
"GenerateProposals"
);
auto
*
rpn_rois
=
context
.
Output
<
LoD
Tensor
>
(
"RpnRois"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
LoD
Tensor
>
(
"RpnRoiProbs"
);
auto
*
rpn_rois
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"RpnRois"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"RpnRoiProbs"
);
int
pre_nms_top_n
=
context
.
Attr
<
int
>
(
"pre_nms_topN"
);
int
post_nms_top_n
=
context
.
Attr
<
int
>
(
"post_nms_topN"
);
...
...
@@ -288,9 +287,10 @@ class GenerateProposalsOpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) Bounding box variances with same shape as `Anchors`."
);
AddOutput
(
"RpnRois"
,
"(LoDTensor), Output proposals with shape (rois_num, 4)."
);
AddOutput
(
"RpnRoiProbs"
,
"(LoDTensor) Scores of proposals with shape (rois_num, 1)."
);
"(phi::DenseTensor), Output proposals with shape (rois_num, 4)."
);
AddOutput
(
"RpnRoiProbs"
,
"(phi::DenseTensor) Scores of proposals with shape (rois_num, 1)."
);
AddOutput
(
"RpnRoisNum"
,
"(Tensor), The number of Rpn RoIs in each image"
)
.
AsDispensable
();
AddAttr
<
int
>
(
"pre_nms_topN"
,
...
...
paddle/fluid/operators/detection/generate_proposals_op.cu
浏览文件 @
30a31a53
...
...
@@ -29,7 +29,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
namespace
{
template
<
typename
T
>
...
...
@@ -144,8 +143,8 @@ class CUDAGenerateProposalsKernel : public framework::OpKernel<T> {
"Variances"
,
"GenerateProposals"
);
auto
*
rpn_rois
=
context
.
Output
<
LoD
Tensor
>
(
"RpnRois"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
LoD
Tensor
>
(
"RpnRoiProbs"
);
auto
*
rpn_rois
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"RpnRois"
);
auto
*
rpn_roi_probs
=
context
.
Output
<
phi
::
Dense
Tensor
>
(
"RpnRoiProbs"
);
int
pre_nms_top_n
=
context
.
Attr
<
int
>
(
"pre_nms_topN"
);
int
post_nms_top_n
=
context
.
Attr
<
int
>
(
"post_nms_topN"
);
...
...
paddle/fluid/operators/detection/generate_proposals_v2_op.cc
浏览文件 @
30a31a53
...
...
@@ -30,7 +30,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
GenerateProposalsV2Op
:
public
framework
::
OperatorWithKernel
{
public:
...
...
@@ -65,9 +64,10 @@ class GenerateProposalsV2OpMaker : public framework::OpProtoAndCheckerMaker {
"(Tensor) Bounding box variances with same shape as `Anchors`."
);
AddOutput
(
"RpnRois"
,
"(LoDTensor), Output proposals with shape (rois_num, 4)."
);
AddOutput
(
"RpnRoiProbs"
,
"(LoDTensor) Scores of proposals with shape (rois_num, 1)."
);
"(phi::DenseTensor), Output proposals with shape (rois_num, 4)."
);
AddOutput
(
"RpnRoiProbs"
,
"(phi::DenseTensor) Scores of proposals with shape (rois_num, 1)."
);
AddOutput
(
"RpnRoisNum"
,
"(Tensor), The number of Rpn RoIs in each image"
)
.
AsDispensable
();
AddAttr
<
int
>
(
"pre_nms_topN"
,
...
...
paddle/fluid/operators/detection/iou_similarity_op.cc
浏览文件 @
30a31a53
...
...
@@ -59,17 +59,18 @@ class IOUSimilarityOp : public framework::OperatorWithKernel {
class
IOUSimilarityOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(LoDTensor, default LoDTensor<float>) "
"Box list X is a 2-D LoDTensor with shape [N, 4] holds N boxes, "
"each box is represented as [xmin, ymin, xmax, ymax], "
"the shape of X is [N, 4]. [xmin, ymin] is the left top "
"coordinate of the box if the input is image feature map, they "
"are close to the origin of the coordinate system. "
"[xmax, ymax] is the right bottom coordinate of the box. "
"This tensor can contain LoD information to represent a batch "
"of inputs. One instance of this batch can contain different "
"numbers of entities."
);
AddInput
(
"X"
,
"(phi::DenseTensor, default phi::DenseTensor<float>) "
"Box list X is a 2-D phi::DenseTensor with shape [N, 4] holds N boxes, "
"each box is represented as [xmin, ymin, xmax, ymax], "
"the shape of X is [N, 4]. [xmin, ymin] is the left top "
"coordinate of the box if the input is image feature map, they "
"are close to the origin of the coordinate system. "
"[xmax, ymax] is the right bottom coordinate of the box. "
"This tensor can contain LoD information to represent a batch "
"of inputs. One instance of this batch can contain different "
"numbers of entities."
);
AddInput
(
"Y"
,
"(Tensor, default Tensor<float>) "
"Box list Y holds M boxes, each box is represented as "
...
...
@@ -82,7 +83,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
"whether treat the priorbox as a normalized box"
)
.
SetDefault
(
true
);
AddOutput
(
"Out"
,
"(
LoD
Tensor, the lod is same as input X) The output of "
"(
phi::Dense
Tensor, the lod is same as input X) The output of "
"iou_similarity op, a tensor with shape [N, M] "
"representing pairwise iou scores."
);
...
...
@@ -90,7 +91,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
**IOU Similarity Operator**
Computes intersection-over-union (IOU) between two box lists.
Box list 'X' should be a
LoD
Tensor and 'Y' is a common Tensor,
Box list 'X' should be a
phi::Dense
Tensor and 'Y' is a common Tensor,
boxes in 'Y' are shared by all instance of the batched inputs of X.
Given two boxes A and B, the calculation of IOU is as follows:
...
...
paddle/fluid/operators/detection/locality_aware_nms_op.cc
浏览文件 @
30a31a53
...
...
@@ -20,7 +20,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
LocalityAwareNMSOp
:
public
framework
::
OperatorWithKernel
{
public:
...
...
@@ -352,15 +351,15 @@ class LocalityAwareNMSKernel : public framework::OpKernel<T> {
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
boxes_input
=
ctx
.
Input
<
LoD
Tensor
>
(
"BBoxes"
);
auto
*
scores_input
=
ctx
.
Input
<
LoD
Tensor
>
(
"Scores"
);
auto
*
outs
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
boxes_input
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"BBoxes"
);
auto
*
scores_input
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Scores"
);
auto
*
outs
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
auto
&
score_dims
=
scores_input
->
dims
();
auto
score_size
=
score_dims
.
size
();
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
CPUContext
>();
LoD
Tensor
scores
;
LoD
Tensor
boxes
;
phi
::
Dense
Tensor
scores
;
phi
::
Dense
Tensor
boxes
;
paddle
::
framework
::
TensorCopySync
(
*
scores_input
,
platform
::
CPUPlace
(),
&
scores
);
paddle
::
framework
::
TensorCopySync
(
...
...
@@ -476,10 +475,12 @@ class LocalityAwareNMSOpMaker : public framework::OpProtoAndCheckerMaker {
"Whether detections are normalized."
)
.
SetDefault
(
true
);
AddOutput
(
"Out"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
"represents the "
"detections. Each row has 6 values: "
"[label, confidence, xmin, ymin, xmax, ymax] or "
"(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] "
"represents the "
"detections. Each row has 10 values: "
"[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
"total number of detections in this mini-batch."
...
...
@@ -501,7 +502,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
per image if keep_top_k is larger than -1.
This operator support multi-class and batched inputs. It applying NMS
independently for each class. The outputs is a 2-D LoDTenosr, for each
image, the offsets in first dimension of
LoD
Tensor are called LoD, the number
image, the offsets in first dimension of
phi::Dense
Tensor are called LoD, the number
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
means there is no detected bbox for this image.
...
...
paddle/fluid/operators/detection/matrix_nms_op.cc
浏览文件 @
30a31a53
...
...
@@ -21,7 +21,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
MatrixNMSOp
:
public
framework
::
OperatorWithKernel
{
public:
...
...
@@ -89,14 +88,16 @@ class MatrixNMSOpMaker : public framework::OpProtoAndCheckerMaker {
"when 'use_gaussian' is enabled."
)
.
SetDefault
(
2.
);
AddOutput
(
"Out"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
"represents the "
"detections. Each row has 6 values: "
"[label, confidence, xmin, ymin, xmax, ymax]. "
"the offsets in first dimension are called LoD, the number of "
"offset is N + 1, if LoD[i + 1] - LoD[i] == 0, means there is "
"no detected bbox."
);
AddOutput
(
"Index"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 1] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 1] "
"represents the "
"index of selected bbox. The index is the absolute index cross "
"batches."
);
AddOutput
(
"RoisNum"
,
"(Tensor), Number of RoIs in each images."
)
...
...
@@ -113,7 +114,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
per image if keep_top_k is larger than -1.
This operator support multi-class and batched inputs. It applying NMS
independently for each class. The outputs is a 2-D LoDTenosr, for each
image, the offsets in first dimension of
LoD
Tensor are called LoD, the number
image, the offsets in first dimension of
phi::Dense
Tensor are called LoD, the number
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
means there is no detected bbox for this image. Now this operator has one more
output, which is RoisNum. The size of RoisNum is N, RoisNum[i] means the number of
...
...
paddle/fluid/operators/detection/mine_hard_examples_op.cc
浏览文件 @
30a31a53
...
...
@@ -363,15 +363,15 @@ class MineHardExamplesOpMaker : public framework::OpProtoAndCheckerMaker {
.
SetDefault
(
"max_negative"
)
.
InEnum
({
"hard_example"
,
"max_negative"
});
AddOutput
(
"NegIndices"
,
"(LoDTensor<int>) The output of negative example indices. a LoD
Tensor "
"with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
"and each element is the prior box index. "
"For example, the batch size is 2, the lod is [[0, 1, 2]], "
"the sample 0's box 1(MatchIndices[0][1]) is selected, "
"and sample 1's box 0 is selected. The output NegIndices is "
"[[1], [0]]."
);
AddOutput
(
"NegIndices"
,
"(phi::DenseTensor<int>) The output of negative example indices. "
"a phi::Dense
Tensor "
"with shape [Neg, 1]. The size of lod[0] minus 1 is batch size, "
"and each element is the prior box index. "
"For example, the batch size is 2, the lod is [[0, 1, 2]], "
"the sample 0's box 1(MatchIndices[0][1]) is selected, "
"and sample 1's box 0 is selected. The output NegIndices is "
"[[1], [0]]."
);
AddOutput
(
"UpdatedMatchIndices"
,
"(Tensor<int>) The output of updated MatchIndices, a tensor with "
...
...
paddle/fluid/operators/detection/multiclass_nms_op.cc
浏览文件 @
30a31a53
...
...
@@ -22,7 +22,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
inline
std
::
vector
<
size_t
>
GetNmsLodFromRoisNum
(
const
phi
::
DenseTensor
*
rois_num
)
{
...
...
@@ -357,11 +356,11 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
}
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
boxes
=
ctx
.
Input
<
LoD
Tensor
>
(
"BBoxes"
);
auto
*
scores
=
ctx
.
Input
<
LoD
Tensor
>
(
"Scores"
);
auto
*
outs
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
boxes
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"BBoxes"
);
auto
*
scores
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Scores"
);
auto
*
outs
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
bool
return_index
=
ctx
.
HasOutput
(
"Index"
)
?
true
:
false
;
auto
index
=
ctx
.
Output
<
LoD
Tensor
>
(
"Index"
);
auto
index
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Index"
);
bool
has_roisnum
=
ctx
.
HasInput
(
"RoisNum"
)
?
true
:
false
;
auto
rois_num
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"RoisNum"
);
auto
score_dims
=
scores
->
dims
();
...
...
@@ -496,7 +495,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
"predicted locations of M bounding bboxes, N is the batch size. "
"Each bounding box has four coordinate values and the layout is "
"[xmin, ymin, xmax, ymax], when box size equals to 4."
"2. (
LoD
Tensor) A 3-D Tensor with shape [M, C, 4]"
"2. (
phi::Dense
Tensor) A 3-D Tensor with shape [M, C, 4]"
"M is the number of bounding boxes, C is the class number"
);
AddInput
(
"Scores"
,
"Two types of scores are supported:"
...
...
@@ -505,7 +504,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
"class number, M is number of bounding boxes. For each category "
"there are total M scores which corresponding M bounding boxes. "
" Please note, M is equal to the 2nd dimension of BBoxes. "
"2. (
LoDTensor) A 2-D LoD
Tensor with shape [M, C]. "
"2. (
phi::DenseTensor) A 2-D phi::Dense
Tensor with shape [M, C]. "
"M is the number of bbox, C is the class number. In this case, "
"Input BBoxes should be the second case with shape [M, C, 4]."
);
AddAttr
<
int
>
(
...
...
@@ -540,10 +539,12 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
"Whether detections are normalized."
)
.
SetDefault
(
true
);
AddOutput
(
"Out"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
"represents the "
"detections. Each row has 6 values: "
"[label, confidence, xmin, ymin, xmax, ymax] or "
"(LoDTensor) A 2-D LoDTensor with shape [No, 10] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 10] "
"represents the "
"detections. Each row has 10 values: "
"[label, confidence, x1, y1, x2, y2, x3, y3, x4, y4]. No is the "
"total number of detections in this mini-batch."
...
...
@@ -564,7 +565,7 @@ Aftern NMS step, at most keep_top_k number of total bboxes are to be kept
per image if keep_top_k is larger than -1.
This operator support multi-class and batched inputs. It applying NMS
independently for each class. The outputs is a 2-D LoDTenosr, for each
image, the offsets in first dimension of
LoD
Tensor are called LoD, the number
image, the offsets in first dimension of
phi::Dense
Tensor are called LoD, the number
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
means there is no detected bbox for this image.
)DOC"
);
...
...
@@ -600,7 +601,8 @@ class MultiClassNMS2OpMaker : public MultiClassNMSOpMaker {
void
Make
()
override
{
MultiClassNMSOpMaker
::
Make
();
AddOutput
(
"Index"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 1] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 1] "
"represents the "
"index of selected bbox. The index is the absolute index cross "
"batches."
)
.
AsIntermediate
();
...
...
paddle/fluid/operators/detection/retinanet_detection_output_op.cc
浏览文件 @
30a31a53
...
...
@@ -19,7 +19,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
RetinanetDetectionOutputOp
:
public
framework
::
OperatorWithKernel
{
public:
...
...
@@ -490,8 +489,8 @@ class RetinanetDetectionOutputKernel : public framework::OpKernel<T> {
auto
boxes
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"BBoxes"
);
auto
scores
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Scores"
);
auto
anchors
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Anchors"
);
auto
*
im_info
=
ctx
.
Input
<
LoD
Tensor
>
(
"ImInfo"
);
auto
*
outs
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
im_info
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"ImInfo"
);
auto
*
outs
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
std
::
vector
<
Tensor
>
boxes_list
(
boxes
.
size
());
std
::
vector
<
Tensor
>
scores_list
(
scores
.
size
());
...
...
@@ -586,7 +585,8 @@ class RetinanetDetectionOutputOpMaker
"[xmin, ymin, xmax, ymax]."
)
.
AsDuplicable
();
AddInput
(
"ImInfo"
,
"(LoDTensor) A 2-D LoDTensor with shape [N, 3] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [N, 3] "
"represents the "
"image information. N is the batch size, each image information "
"includes height, width and scale."
);
AddAttr
<
float
>
(
"score_threshold"
,
...
...
@@ -609,7 +609,8 @@ class RetinanetDetectionOutputOpMaker
"Number of total bounding boxes to be kept per image after NMS "
"step."
);
AddOutput
(
"Out"
,
"(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the "
"(phi::DenseTensor) A 2-D phi::DenseTensor with shape [No, 6] "
"represents the "
"detections. Each row has 6 values: "
"[label, confidence, xmin, ymin, xmax, ymax]"
"No is the total number of detections in this mini-batch."
...
...
@@ -650,7 +651,7 @@ After NMS step, at most keep_top_k number of total bounding boxes are to be kept
per image if keep_top_k is larger than -1.
This operator support multi-class and batched inputs. It applying NMS
independently for each class. The outputs is a 2-D LoDTenosr, for each
image, the offsets in first dimension of
LoD
Tensor are called LoD, the number
image, the offsets in first dimension of
phi::Dense
Tensor are called LoD, the number
of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
means there is no detected bounding box for this image. If there is no detected boxes
for all images, all the elements in LoD are set to 0, and the output tensor is
...
...
paddle/fluid/operators/detection/roi_perspective_transform_op.cc
浏览文件 @
30a31a53
...
...
@@ -23,7 +23,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
T
>
bool
GT_E
(
T
a
,
T
b
)
{
...
...
@@ -504,7 +503,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
rois_dims
.
size
(),
2
,
platform
::
errors
::
InvalidArgument
(
"ROIs should be a 2-D
LoD
Tensor of shape (num_rois, 8)"
"ROIs should be a 2-D
phi::Dense
Tensor of shape (num_rois, 8)"
"given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]. But received "
"rois dims is %d"
,
rois_dims
.
size
()));
...
...
@@ -512,7 +511,7 @@ class ROIPerspectiveTransformOp : public framework::OperatorWithKernel {
rois_dims
[
1
],
8
,
platform
::
errors
::
InvalidArgument
(
"ROIs should be a 2-D
LoD
Tensor of shape (num_rois, 8)"
"ROIs should be a 2-D
phi::Dense
Tensor of shape (num_rois, 8)"
"given as [[x0, y0, x1, y1, x2, y2, x3, y3], ...]. But received %d"
,
rois_dims
[
1
]));
...
...
@@ -608,9 +607,9 @@ class ROIPerspectiveTransformOpMaker
"H is the height of the feature, and "
"W is the width of the feature."
);
AddInput
(
"ROIs"
,
"(
LoD
Tensor), "
"(
phi::Dense
Tensor), "
"ROIs (Regions of Interest) to be transformed. "
"should be a 2-D
LoD
Tensor of shape (num_rois, 8)"
"should be a 2-D
phi::Dense
Tensor of shape (num_rois, 8)"
"given as [[x1, y1, x2, y2, x3, y3, x4, y4], ...]."
"(x1, y1) is the top left coordinates, and "
"(x2, y2) is the top right coordinates, and"
...
...
paddle/fluid/operators/detection/rpn_target_assign_op.cc
浏览文件 @
30a31a53
...
...
@@ -22,7 +22,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
...
...
@@ -105,7 +104,9 @@ class RpnTargetAssignOp : public framework::OperatorWithKernel {
};
template
<
typename
T
>
void
AppendRpns
(
LoDTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
void
AppendRpns
(
phi
::
DenseTensor
*
out
,
int64_t
offset
,
phi
::
DenseTensor
*
to_add
)
{
auto
*
out_data
=
out
->
data
<
T
>
();
auto
*
to_add_data
=
to_add
->
data
<
T
>
();
memcpy
(
out_data
+
offset
,
to_add_data
,
to_add
->
numel
()
*
sizeof
(
T
));
...
...
@@ -395,15 +396,16 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
anchor
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Anchor"
);
// (H*W*A) * 4
auto
*
gt_boxes
=
context
.
Input
<
LoD
Tensor
>
(
"GtBoxes"
);
auto
*
is_crowd
=
context
.
Input
<
LoD
Tensor
>
(
"IsCrowd"
);
auto
*
im_info
=
context
.
Input
<
LoD
Tensor
>
(
"ImInfo"
);
auto
*
gt_boxes
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"GtBoxes"
);
auto
*
is_crowd
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"IsCrowd"
);
auto
*
im_info
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"ImInfo"
);
auto
*
loc_index
=
context
.
Output
<
LoDTensor
>
(
"LocationIndex"
);
auto
*
score_index
=
context
.
Output
<
LoDTensor
>
(
"ScoreIndex"
);
auto
*
tgt_bbox
=
context
.
Output
<
LoDTensor
>
(
"TargetBBox"
);
auto
*
tgt_lbl
=
context
.
Output
<
LoDTensor
>
(
"TargetLabel"
);
auto
*
bbox_inside_weight
=
context
.
Output
<
LoDTensor
>
(
"BBoxInsideWeight"
);
auto
*
loc_index
=
context
.
Output
<
phi
::
DenseTensor
>
(
"LocationIndex"
);
auto
*
score_index
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ScoreIndex"
);
auto
*
tgt_bbox
=
context
.
Output
<
phi
::
DenseTensor
>
(
"TargetBBox"
);
auto
*
tgt_lbl
=
context
.
Output
<
phi
::
DenseTensor
>
(
"TargetLabel"
);
auto
*
bbox_inside_weight
=
context
.
Output
<
phi
::
DenseTensor
>
(
"BBoxInsideWeight"
);
PADDLE_ENFORCE_EQ
(
gt_boxes
->
lod
().
size
(),
1UL
,
...
...
@@ -598,11 +600,11 @@ class RpnTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Anchor"
,
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."
);
AddInput
(
"GtBoxes"
,
"(
LoD
Tensor) input ground-truth bbox with shape [K, 4]."
);
"(
phi::Dense
Tensor) input ground-truth bbox with shape [K, 4]."
);
AddInput
(
"IsCrowd"
,
"(
LoD
Tensor) input which indicates ground-truth is crowd."
);
"(
phi::Dense
Tensor) input which indicates ground-truth is crowd."
);
AddInput
(
"ImInfo"
,
"(
LoD
Tensor) input image information with shape [N, 3]. "
"(
phi::Dense
Tensor) input image information with shape [N, 3]. "
"N is the batch size, each image information includes height, "
"width and scale."
);
AddAttr
<
int
>
(
"rpn_batch_size_per_im"
,
...
...
@@ -685,13 +687,13 @@ class RetinanetTargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Anchor"
,
"(Tensor) input anchor is a 2-D Tensor with shape [H*W*A, 4]."
);
AddInput
(
"GtBoxes"
,
"(
LoD
Tensor) input ground-truth bbox with shape [K, 4]."
);
"(
phi::Dense
Tensor) input ground-truth bbox with shape [K, 4]."
);
AddInput
(
"GtLabels"
,
"(
LoD
Tensor) input ground-truth label with shape [K, 1]."
);
"(
phi::Dense
Tensor) input ground-truth label with shape [K, 1]."
);
AddInput
(
"IsCrowd"
,
"(
LoD
Tensor) input which indicates ground-truth is crowd."
);
"(
phi::Dense
Tensor) input which indicates ground-truth is crowd."
);
AddInput
(
"ImInfo"
,
"(
LoD
Tensor) input image information with shape [N, 3]. "
"(
phi::Dense
Tensor) input image information with shape [N, 3]. "
"N is the batch size, each image information includes height, "
"width and scale."
);
AddAttr
<
float
>
(
...
...
@@ -994,17 +996,18 @@ class RetinanetTargetAssignKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
anchor
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Anchor"
);
// (H*W*A) * 4
auto
*
gt_boxes
=
context
.
Input
<
LoDTensor
>
(
"GtBoxes"
);
auto
*
gt_labels
=
context
.
Input
<
LoDTensor
>
(
"GtLabels"
);
auto
*
is_crowd
=
context
.
Input
<
LoDTensor
>
(
"IsCrowd"
);
auto
*
im_info
=
context
.
Input
<
LoDTensor
>
(
"ImInfo"
);
auto
*
loc_index
=
context
.
Output
<
LoDTensor
>
(
"LocationIndex"
);
auto
*
score_index
=
context
.
Output
<
LoDTensor
>
(
"ScoreIndex"
);
auto
*
tgt_bbox
=
context
.
Output
<
LoDTensor
>
(
"TargetBBox"
);
auto
*
tgt_lbl
=
context
.
Output
<
LoDTensor
>
(
"TargetLabel"
);
auto
*
bbox_inside_weight
=
context
.
Output
<
LoDTensor
>
(
"BBoxInsideWeight"
);
auto
*
fg_num
=
context
.
Output
<
LoDTensor
>
(
"ForegroundNumber"
);
auto
*
gt_boxes
=
context
.
Input
<
phi
::
DenseTensor
>
(
"GtBoxes"
);
auto
*
gt_labels
=
context
.
Input
<
phi
::
DenseTensor
>
(
"GtLabels"
);
auto
*
is_crowd
=
context
.
Input
<
phi
::
DenseTensor
>
(
"IsCrowd"
);
auto
*
im_info
=
context
.
Input
<
phi
::
DenseTensor
>
(
"ImInfo"
);
auto
*
loc_index
=
context
.
Output
<
phi
::
DenseTensor
>
(
"LocationIndex"
);
auto
*
score_index
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ScoreIndex"
);
auto
*
tgt_bbox
=
context
.
Output
<
phi
::
DenseTensor
>
(
"TargetBBox"
);
auto
*
tgt_lbl
=
context
.
Output
<
phi
::
DenseTensor
>
(
"TargetLabel"
);
auto
*
bbox_inside_weight
=
context
.
Output
<
phi
::
DenseTensor
>
(
"BBoxInsideWeight"
);
auto
*
fg_num
=
context
.
Output
<
phi
::
DenseTensor
>
(
"ForegroundNumber"
);
PADDLE_ENFORCE_EQ
(
gt_boxes
->
lod
().
size
(),
...
...
paddle/fluid/operators/detection/target_assign_op.cc
浏览文件 @
30a31a53
...
...
@@ -89,7 +89,8 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(LoDTensor), This input is a 3D LoDTensor with shape [M, P, K]. "
"(phi::DenseTensor), This input is a 3D phi::DenseTensor with "
"shape [M, P, K]. "
"Some elements in X will be assigned to Out based on the "
"MatchIndices and NegIndices."
);
AddInput
(
"MatchIndices"
,
...
...
@@ -97,7 +98,8 @@ class TargetAssignOpMaker : public framework::OpProtoAndCheckerMaker {
"with shape [N, P], If MatchIndices[i][j] is -1, the j-th entity "
"of column is not matched to any entity of row in i-th instance."
);
AddInput
(
"NegIndices"
,
"(LoDTensor, default LoDTensor<int>), The input negative example "
"(phi::DenseTensor, default phi::DenseTensor<int>), The input "
"negative example "
"indices are an optional input with shape [Neg, 1], where Neg is "
"the total number of negative example indices."
)
.
AsDispensable
();
...
...
paddle/fluid/operators/elementwise/elementwise_add_op.cc
浏览文件 @
30a31a53
...
...
@@ -31,15 +31,17 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker {
std
::
string
GetEquation
()
const
override
{
return
"Out = X + Y"
;
}
void
AddInputX
()
override
{
AddInput
(
"X"
,
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
AddInput
(
"X"
,
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
void
AddInputY
()
override
{
AddInput
(
"Y"
,
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
AddInput
(
"Y"
,
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
std
::
string
GetOpFuntionality
()
const
override
{
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.cc
浏览文件 @
30a31a53
...
...
@@ -29,15 +29,17 @@ class ElementwiseDivOpMaker : public ElementwiseOpMaker {
std
::
string
GetEquation
()
const
override
{
return
"Out = X / Y"
;
}
void
AddInputX
()
override
{
AddInput
(
"X"
,
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
AddInput
(
"X"
,
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
void
AddInputY
()
override
{
AddInput
(
"Y"
,
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
AddInput
(
"Y"
,
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
std
::
string
GetOpFuntionality
()
const
override
{
...
...
paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc
浏览文件 @
30a31a53
...
...
@@ -35,15 +35,17 @@ class ElementwiseFloorDivOpMaker : public ElementwiseOpMaker {
std
::
string
GetEquation
()
const
override
{
return
"Out = X // Y"
;
}
void
AddInputX
()
override
{
AddInput
(
"X"
,
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"should be int32, int64."
);
AddInput
(
"X"
,
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64."
);
}
void
AddInputY
()
override
{
AddInput
(
"Y"
,
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"should be int32, int64."
);
AddInput
(
"Y"
,
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64."
);
}
std
::
string
GetOpFuntionality
()
const
override
{
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.cc
浏览文件 @
30a31a53
...
...
@@ -28,15 +28,17 @@ class ElementwiseMulOpMaker : public ElementwiseOpMaker {
std
::
string
GetEquation
()
const
override
{
return
"Out = X
\\\\
odot Y"
;
}
void
AddInputX
()
override
{
AddInput
(
"X"
,
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
AddInput
(
"X"
,
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
void
AddInputY
()
override
{
AddInput
(
"Y"
,
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
AddInput
(
"Y"
,
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
std
::
string
GetOpFuntionality
()
const
override
{
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
30a31a53
...
...
@@ -43,13 +43,14 @@ class ElementwiseOp : public framework::OperatorWithKernel {
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"Y"
),
"Input"
,
"Y"
,
"ElementwiseOp"
);
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"Out"
),
"Output"
,
"Out"
,
"ElementwiseOp"
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Y"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be LoDTensor, but the "
"received is %s [%s]."
,
ctx
->
GetInputsVarType
(
"Y"
).
front
(),
ctx
->
Inputs
(
"Y"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Y"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, but the "
"received is %s [%s]."
,
ctx
->
GetInputsVarType
(
"Y"
).
front
(),
ctx
->
Inputs
(
"Y"
).
front
()));
if
(
ctx
->
GetInputsVarType
(
"X"
).
front
()
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
)
{
...
...
paddle/fluid/operators/elementwise/elementwise_op_function.h
浏览文件 @
30a31a53
...
...
@@ -61,11 +61,11 @@ namespace operators {
/*
* Pack input and output tensors into respective vectors with
* consideration of varible X`s class type.
* Input variable X is supported to be whether
LoD
Tensor or
* Input variable X is supported to be whether
phi::Dense
Tensor or
* SelectedRows class type in this package function, once X
* was SelectedRows type, a valid pointer x_for_selectedrows
* is excepted to be passed in from op kernel for acquisition
* of the valid address of
LoD
Tensor created ahead in the function.
* of the valid address of
phi::Dense
Tensor created ahead in the function.
*/
template
<
typename
OutT
>
int
PackTensorsIntoVector
(
const
framework
::
ExecutionContext
&
ctx
,
...
...
@@ -112,7 +112,7 @@ int PackTensorsIntoVector(const framework::ExecutionContext &ctx,
}
else
{
PADDLE_THROW
(
platform
::
errors
::
InvalidArgument
(
"X's type[%s] is not supported by elementwise_op. X's type should be "
"
LoD
Tensor or SelectedRows."
,
"
phi::Dense
Tensor or SelectedRows."
,
framework
::
ToTypeName
(
x_var
->
Type
())));
}
z
->
mutable_data
<
OutT
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/elementwise/elementwise_sub_op.cc
浏览文件 @
30a31a53
...
...
@@ -34,15 +34,17 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker {
std
::
string
GetEquation
()
const
override
{
return
"Out = X - Y"
;
}
void
AddInputX
()
override
{
AddInput
(
"X"
,
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
AddInput
(
"X"
,
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
void
AddInputY
()
override
{
AddInput
(
"Y"
,
"(Variable), Tensor or LoDTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
AddInput
(
"Y"
,
"(Variable), Tensor or phi::DenseTensor of any dimensions. Its dtype "
"should be int32, int64, float32, float64."
);
}
std
::
string
GetOpFuntionality
()
const
override
{
...
...
paddle/fluid/operators/elementwise/elementwise_xpu.h
浏览文件 @
30a31a53
...
...
@@ -43,8 +43,8 @@ void XPUElementwise(const framework::ExecutionContext& ctx,
PADDLE_ENFORCE_EQ
(
x_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"XPU only support LoDTensor, Input(X) is not LoD
Tensor"
));
platform
::
errors
::
InvalidArgument
(
"XPU only support phi::DenseTensor, "
"Input(X) is not phi::Dense
Tensor"
));
auto
x
=
x_var
->
Get
<
phi
::
DenseTensor
>
();
auto
*
y
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Y"
);
...
...
paddle/fluid/operators/fused/fused_bn_activation_op.cc
浏览文件 @
30a31a53
...
...
@@ -24,8 +24,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
void
FusedBatchNormActOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
PADDLE_ENFORCE_EQ
(
ctx
->
HasInput
(
"X"
),
true
,
...
...
@@ -309,8 +307,8 @@ framework::OpKernelType FusedBatchNormActGradOp::GetExpectedKernelType(
const
Tensor
*
t
=
nullptr
;
if
(
var
->
IsType
<
Tensor
>
())
{
t
=
&
var
->
Get
<
Tensor
>
();
}
else
if
(
var
->
IsType
<
LoD
Tensor
>
())
{
t
=
&
var
->
Get
<
LoD
Tensor
>
();
}
else
if
(
var
->
IsType
<
phi
::
Dense
Tensor
>
())
{
t
=
&
var
->
Get
<
phi
::
Dense
Tensor
>
();
}
if
(
t
==
nullptr
)
{
PADDLE_THROW
(
...
...
paddle/fluid/operators/fused/fused_bn_add_activation_op.cc
浏览文件 @
30a31a53
...
...
@@ -23,8 +23,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
void
FusedBatchNormAddActOp
::
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
{
OP_INOUT_CHECK
(
ctx
->
HasInput
(
"X"
),
"Input"
,
"X"
,
"FusedBatchNormAddActOp"
);
...
...
@@ -267,8 +265,8 @@ framework::OpKernelType FusedBatchNormAddActGradOp::GetExpectedKernelType(
const
Tensor
*
t
=
nullptr
;
if
(
var
->
IsType
<
Tensor
>
())
{
t
=
&
var
->
Get
<
Tensor
>
();
}
else
if
(
var
->
IsType
<
LoD
Tensor
>
())
{
t
=
&
var
->
Get
<
LoD
Tensor
>
();
}
else
if
(
var
->
IsType
<
phi
::
Dense
Tensor
>
())
{
t
=
&
var
->
Get
<
phi
::
Dense
Tensor
>
();
}
if
(
t
==
nullptr
)
{
PADDLE_THROW
(
...
...
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
浏览文件 @
30a31a53
...
...
@@ -213,23 +213,25 @@ void FusedEmbeddingFCLSTMOpMaker::Make() {
"input. This is a tensor with shape (N x D), where N is the "
"batch size. `H0` and `C0` can be NULL but only at the same time."
)
.
AsDispensable
();
AddOutput
(
"Hidden"
,
"(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"Cell"
,
"(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"Hidden"
,
"(phi::DenseTensor) (same as LSTMOp) the hidden state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"Cell"
,
"(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"XX"
,
"(
LoD
Tensor) the result after X * WeightX (size is T x 4D)"
"(
phi::Dense
Tensor) the result after X * WeightX (size is T x 4D)"
" or batched_X (size is T x M), this will be automatically chosen,"
" where T is the total time steps in this mini-batch,"
" D is the hidden size, M is the dim size of x input."
)
.
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
"(
LoD
Tensor) (T x 4D)."
).
AsIntermediate
();
AddOutput
(
"BatchedHidden"
,
"(
LoD
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedCell"
,
"(
LoD
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedH0"
,
"(
LoD
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedC0"
,
"(
LoD
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
"(
phi::Dense
Tensor) (T x 4D)."
).
AsIntermediate
();
AddOutput
(
"BatchedHidden"
,
"(
phi::Dense
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedCell"
,
"(
phi::Dense
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedH0"
,
"(
phi::Dense
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedC0"
,
"(
phi::Dense
Tensor) (N x D)."
).
AsIntermediate
();
AddAttr
<
bool
>
(
"use_peepholes"
,
"(bool, default: True) "
"whether to enable diagonal/peephole connections."
)
...
...
@@ -286,15 +288,15 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
}
#define INIT_BASE_INPUT_OUTPUT \
auto* ids = ctx.Input<
LoDTensor>("Ids");
\
auto* ids = ctx.Input<
phi::DenseTensor>("Ids");
\
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* c0 = ctx.Input<phi::DenseTensor>("C0"); \
auto* embeddings = ctx.Input<phi::DenseTensor>("Embeddings"); \
auto* wh = ctx.Input<phi::DenseTensor>("WeightH"); \
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); \
auto* xx = ctx.Output<
LoDTensor>("XX");
\
auto* hidden_out = ctx.Output<
LoDTensor>("Hidden");
\
auto* cell_out = ctx.Output<
LoDTensor>("Cell");
\
auto* xx = ctx.Output<
phi::DenseTensor>("XX");
\
auto* hidden_out = ctx.Output<
phi::DenseTensor>("Hidden");
\
auto* cell_out = ctx.Output<
phi::DenseTensor>("Cell");
\
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
bool use_peepholes = ctx.Attr<bool>("use_peepholes");
...
...
@@ -508,9 +510,9 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
auto
*
reordered_h0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedH0"
);
auto
*
reordered_c0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedC0"
);
auto
*
batched_input
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedInput"
);
auto
*
batched_c_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedCell"
);
auto
*
batched_h_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedHidden"
);
auto
*
batched_input
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedInput"
);
auto
*
batched_c_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedCell"
);
auto
*
batched_h_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedHidden"
);
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_c_out_data
=
batched_c_out
->
mutable_data
<
T
>
(
place
);
...
...
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h
浏览文件 @
30a31a53
...
...
@@ -18,7 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusedEmbeddingFCLSTMOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc
浏览文件 @
30a31a53
...
...
@@ -162,7 +162,7 @@ class FusedEmbeddingSeqPoolOpGradVarTypeInference
framework
::
proto
::
VarType
::
SELECTED_ROWS
);
}
else
{
VLOG
(
3
)
<<
"fused_embedding_seq_pool_grad op "
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to
LoD
Tensor"
;
<<
framework
::
GradVarName
(
"W"
)
<<
" is set to
phi::Dense
Tensor"
;
ctx
->
SetOutputType
(
out_var_name
,
framework
::
proto
::
VarType
::
LOD_TENSOR
);
}
ctx
->
SetOutputDataType
(
out_var_name
,
ctx
->
GetInputDataType
(
"W"
));
...
...
paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h
浏览文件 @
30a31a53
...
...
@@ -29,7 +29,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
using
SelectedRows
=
phi
::
SelectedRows
;
using
DDim
=
framework
::
DDim
;
...
...
@@ -83,9 +82,9 @@ void prepare_csr_data(const std::vector<uint64_t> &offset,
template
<
typename
T
>
struct
EmbeddingVSumFunctor
{
void
operator
()(
const
framework
::
ExecutionContext
&
context
,
const
LoD
Tensor
*
table_t
,
const
LoD
Tensor
*
ids_t
,
LoD
Tensor
*
output_t
)
{
const
phi
::
Dense
Tensor
*
table_t
,
const
phi
::
Dense
Tensor
*
ids_t
,
phi
::
Dense
Tensor
*
output_t
)
{
auto
*
table
=
table_t
->
data
<
T
>
();
int64_t
table_height
=
table_t
->
dims
()[
0
];
int64_t
table_width
=
table_t
->
dims
()[
1
];
...
...
@@ -141,9 +140,11 @@ template <typename T>
class
FusedEmbeddingSeqPoolKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
const
LoDTensor
*
ids_t
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
// int tensor
LoDTensor
*
output_t
=
context
.
Output
<
LoDTensor
>
(
"Out"
);
// float tensor
const
LoDTensor
*
table_var
=
context
.
Input
<
LoDTensor
>
(
"W"
);
const
phi
::
DenseTensor
*
ids_t
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
// int tensor
phi
::
DenseTensor
*
output_t
=
context
.
Output
<
phi
::
DenseTensor
>
(
"Out"
);
// float tensor
const
phi
::
DenseTensor
*
table_var
=
context
.
Input
<
phi
::
DenseTensor
>
(
"W"
);
const
std
::
string
&
combiner_type
=
context
.
Attr
<
std
::
string
>
(
"combiner"
);
int64_t
last_dim
=
...
...
@@ -228,23 +229,24 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
auto
*
table_var
=
context
.
InputVar
(
"W"
);
DDim
table_dim
;
if
(
table_var
->
IsType
<
LoD
Tensor
>
())
{
table_dim
=
context
.
Input
<
LoD
Tensor
>
(
"W"
)
->
dims
();
if
(
table_var
->
IsType
<
phi
::
Dense
Tensor
>
())
{
table_dim
=
context
.
Input
<
phi
::
Dense
Tensor
>
(
"W"
)
->
dims
();
}
else
if
(
table_var
->
IsType
<
phi
::
SelectedRows
>
())
{
auto
*
table_t
=
context
.
Input
<
phi
::
SelectedRows
>
(
"W"
);
table_dim
=
table_t
->
value
().
dims
();
}
else
{
PADDLE_THROW
(
platform
::
errors
::
PermissionDenied
(
"The parameter W of a LookupTable "
"must be either
LoD
Tensor or SelectedRows."
));
"must be either
phi::Dense
Tensor or SelectedRows."
));
}
bool
is_sparse
=
context
.
Attr
<
bool
>
(
"is_sparse"
);
// Since paddings are not trainable and fixed in forward, the gradient of
// paddings makes no sense and we don't deal with it in backward.
if
(
is_sparse
)
{
auto
*
ids
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
*
d_output
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
ids
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
auto
*
d_output
=
context
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_table
=
context
.
Output
<
phi
::
SelectedRows
>
(
framework
::
GradVarName
(
"W"
));
// runtime shape
...
...
@@ -276,9 +278,11 @@ class FusedEmbeddingSeqPoolGradKernel : public framework::OpKernel<T> {
}
else
{
#if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
!defined(__OSX__)
auto
*
ids
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
*
d_output
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_table
=
context
.
Output
<
LoDTensor
>
(
framework
::
GradVarName
(
"W"
));
auto
*
ids
=
context
.
Input
<
phi
::
DenseTensor
>
(
"Ids"
);
auto
*
d_output
=
context
.
Input
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_table
=
context
.
Output
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"W"
));
int64_t
padding_idx
=
context
.
Attr
<
int64_t
>
(
"padding_idx"
);
d_table
->
Resize
(
table_dim
);
...
...
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc
浏览文件 @
30a31a53
...
...
@@ -95,7 +95,7 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel {
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
inputs
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"X"
);
auto
inputs
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
input_data_type
=
framework
::
proto
::
VarType
::
Type
(
0
);
bool
flag
=
0
;
for
(
auto
*
input
:
inputs
)
{
...
...
@@ -121,7 +121,7 @@ class FusedSeqpoolCVMOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(vector<
LoD
Tensor>) The input tensors of"
"(vector<
phi::Dense
Tensor>) The input tensors of"
" operator."
)
.
AsDuplicable
();
AddInput
(
"CVM"
,
...
...
paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu
浏览文件 @
30a31a53
...
...
@@ -424,7 +424,7 @@ template <typename T>
class
FusedSeqpoolCVMCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
inputs
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"X"
);
auto
inputs
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
outputs
=
ctx
.
MultiOutput
<
phi
::
DenseTensor
>
(
"Out"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
const
auto
slot_size
=
inputs
.
size
();
...
...
@@ -432,7 +432,7 @@ class FusedSeqpoolCVMCUDAKernel : public framework::OpKernel<T> {
std
::
vector
<
const
size_t
*>
lods_data
(
slot_size
);
std
::
vector
<
T
*>
output_data
(
slot_size
);
std
::
vector
<
LoD
Tensor
>
seqpool_outputs
(
slot_size
);
std
::
vector
<
phi
::
Dense
Tensor
>
seqpool_outputs
(
slot_size
);
std
::
vector
<
T
*>
seqpool_output_data
(
slot_size
);
auto
padding_value
=
ctx
.
Attr
<
float
>
(
"pad_value"
);
...
...
@@ -509,9 +509,11 @@ template <typename T>
class
FusedSeqpoolCVMGradCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
out_grads
=
ctx
.
MultiInput
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
in_grads
=
ctx
.
MultiOutput
<
LoDTensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
cvm
=
ctx
.
Input
<
LoDTensor
>
(
"CVM"
);
auto
out_grads
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
in_grads
=
ctx
.
MultiOutput
<
phi
::
DenseTensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
cvm
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"CVM"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
phi
::
GPUContext
>();
std
::
string
pooltype
=
ctx
.
Attr
<
std
::
string
>
(
"pooltype"
);
auto
use_cvm
=
ctx
.
Attr
<
bool
>
(
"use_cvm"
);
...
...
paddle/fluid/operators/fused/fused_seqpool_cvm_op.h
浏览文件 @
30a31a53
...
...
@@ -23,8 +23,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
T
>
class
FusedSeqpoolCVMOpCPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
paddle/fluid/operators/fused/fusion_group_op.cc
浏览文件 @
30a31a53
...
...
@@ -87,10 +87,10 @@ class FusionGroupOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"Inputs"
,
"(std::vector<
LoD
Tensor>) The inputs of fusion_group op."
)
"(std::vector<
phi::Dense
Tensor>) The inputs of fusion_group op."
)
.
AsDuplicable
();
AddOutput
(
"Outs"
,
"(std::vector<
LoD
Tensor>) The outputs of fusion_group op."
)
"(std::vector<
phi::Dense
Tensor>) The outputs of fusion_group op."
)
.
AsDuplicable
();
AddAttr
<
std
::
vector
<
int
>>
(
"outs_dtype"
,
"The data type of Outputs in fusion_group op."
)
...
...
paddle/fluid/operators/fused/fusion_gru_op.cc
浏览文件 @
30a31a53
...
...
@@ -154,11 +154,12 @@ framework::OpKernelType FusionGRUOp::GetExpectedKernelType(
}
void
FusionGRUOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) the input is a LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"X"
,
"(phi::DenseTensor) the input is a LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this phi::DenseTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"H0"
,
"(Tensor, optional) The initial hidden state is an optional "
"input. This is a tensor with shape (N x D), where N is the "
...
...
@@ -181,18 +182,18 @@ void FusionGRUOpMaker::Make() {
AddOutput
(
"ReorderedH0"
,
"(Tensor) (N x D), which N is the min-batch size."
)
.
AsIntermediate
();
AddOutput
(
"XX"
,
"(
LoD
Tensor) the result after X * WeightX (size is T x 3D)"
"(
phi::Dense
Tensor) the result after X * WeightX (size is T x 3D)"
" or batched_X (size is T x M), this will be automatically chosen,"
" where T is the total time steps in this mini-batch,"
" D is the hidden size, M is the dim size of x input."
)
.
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
"(
LoD
Tensor) This is the batched result of input X"
"(
phi::Dense
Tensor) This is the batched result of input X"
"or the batched result after fc, shape (T x 3D)"
)
.
AsIntermediate
();
AddOutput
(
"BatchedOut"
,
"(
LoD
Tensor) (T X D) save batched hidden."
)
AddOutput
(
"BatchedOut"
,
"(
phi::Dense
Tensor) (T X D) save batched hidden."
)
.
AsIntermediate
();
AddOutput
(
"Hidden"
,
"(
LoD
Tensor) (T x D) Same as GRUOp"
);
AddOutput
(
"Hidden"
,
"(
phi::Dense
Tensor) (T x D) Same as GRUOp"
);
AddAttr
<
std
::
string
>
(
"activation"
,
"(string, default tanh) "
"The activation type used for output candidate {h}_t."
)
...
...
@@ -257,9 +258,9 @@ class FusionGRUKernel : public framework::OpKernel<T> {
}
#define INIT_BASE_DEFINES \
auto* x = ctx.Input<
LoDTensor>("X");
\
auto* x = ctx.Input<
phi::DenseTensor>("X");
\
auto* wh = ctx.Input<phi::DenseTensor>("WeightH"); \
auto* xx = ctx.Output<
LoDTensor>("XX");
\
auto* xx = ctx.Output<
phi::DenseTensor>("XX");
\
auto x_lod = x->lod(); \
auto x_dims = x->dims();
/* T x M*/
\
auto x_mat_dims = (x_dims.size() == 3 && x_dims[1] == 1) \
...
...
@@ -273,7 +274,7 @@ class FusionGRUKernel : public framework::OpKernel<T> {
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* wx = ctx.Input<phi::DenseTensor>("WeightX"); \
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); \
auto* hidden_out = ctx.Output<
LoDTensor>("Hidden");
\
auto* hidden_out = ctx.Output<
phi::DenseTensor>("Hidden");
\
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
const int M = x_mat_dims[1]; \
const int D = wh_dims[0]; \
...
...
@@ -398,8 +399,8 @@ class FusionGRUKernel : public framework::OpKernel<T> {
}
INIT_OTHER_DEFINES
;
auto
*
reordered_h0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedH0"
);
auto
*
batched_input
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedInput"
);
auto
*
batched_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedOut"
);
auto
*
batched_input
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedInput"
);
auto
*
batched_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedOut"
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_out_data
=
batched_out
->
mutable_data
<
T
>
(
place
);
hidden_out
->
mutable_data
<
T
>
(
place
);
...
...
paddle/fluid/operators/fused/fusion_gru_op.h
浏览文件 @
30a31a53
...
...
@@ -18,7 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionGRUOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_lstm_op.cc
浏览文件 @
30a31a53
...
...
@@ -177,11 +177,12 @@ framework::OpKernelType FusionLSTMOp::GetExpectedKernelType(
}
void
FusionLSTMOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) the input is a LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"X"
,
"(phi::DenseTensor) the input is a LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this phi::DenseTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"WeightX"
,
"(Tensor) the learnable weights of X."
" - The shape is (M x 4D), where M is the dim size of x, D is the "
...
...
@@ -214,23 +215,25 @@ void FusionLSTMOpMaker::Make() {
"input. This is a tensor with shape (N x D), where N is the "
"batch size. `H0` and `C0` can be NULL but only at the same time."
)
.
AsDispensable
();
AddOutput
(
"Hidden"
,
"(LoDTensor) (same as LSTMOp) the hidden state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"Cell"
,
"(LoDTensor) (same as LSTMOp) the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"Hidden"
,
"(phi::DenseTensor) (same as LSTMOp) the hidden state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"Cell"
,
"(phi::DenseTensor) (same as LSTMOp) the cell state of LSTM operator. "
"The shape is (T x D), and lod is the same with the `Input`."
);
AddOutput
(
"XX"
,
"(
LoD
Tensor) the result after X * WeightX (size is T x 4D)"
"(
phi::Dense
Tensor) the result after X * WeightX (size is T x 4D)"
" or batched_X (size is T x M), this will be automatically chosen,"
" where T is the total time steps in this mini-batch,"
" D is the hidden size, M is the dim size of x input."
)
.
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
"(
LoD
Tensor) (T x 4D)."
).
AsIntermediate
();
AddOutput
(
"BatchedHidden"
,
"(
LoD
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedCell"
,
"(
LoD
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedH0"
,
"(
LoD
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedC0"
,
"(
LoD
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedInput"
,
"(
phi::Dense
Tensor) (T x 4D)."
).
AsIntermediate
();
AddOutput
(
"BatchedHidden"
,
"(
phi::Dense
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"BatchedCell"
,
"(
phi::Dense
Tensor) (T x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedH0"
,
"(
phi::Dense
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"ReorderedC0"
,
"(
phi::Dense
Tensor) (N x D)."
).
AsIntermediate
();
AddOutput
(
"CheckedCell"
,
"(Tensor) (2 x D) only for peephole."
)
.
AsIntermediate
();
AddAttr
<
bool
>
(
"use_peepholes"
,
...
...
@@ -295,23 +298,23 @@ This operator fuse the X into LSTM, more details can refer to LSTM op.
template
<
typename
T
>
class
FuisonLSTMKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
#define INIT_BASE_DEFINES \
using DeviceContext = phi::CPUContext; \
auto* x = ctx.Input<
LoDTensor>("X");
\
auto* h0 = ctx.Input<phi::DenseTensor>("H0"); \
auto* c0 = ctx.Input<phi::DenseTensor>("C0"); \
auto* wx = ctx.Input<phi::DenseTensor>("WeightX"); \
auto* wh = ctx.Input<phi::DenseTensor>("WeightH"); \
auto* bias = ctx.Input<phi::DenseTensor>("Bias"); \
auto* xx = ctx.Output<
LoDTensor>("XX");
\
auto* hidden_out = ctx.Output<
LoDTensor>("Hidden");
\
auto* cell_out = ctx.Output<
LoDTensor>("Cell");
\
bool is_reverse = ctx.Attr<bool>("is_reverse"); \
bool use_peepholes = ctx.Attr<bool>("use_peepholes"); \
auto x_dims = x->dims();
/* T x M*/
\
auto wh_dims = wh->dims();
/* D x 4D*/
\
const int M = x_dims[1]; \
const int D = wh_dims[0]; \
#define INIT_BASE_DEFINES
\
using DeviceContext = phi::CPUContext;
\
auto* x = ctx.Input<
phi::DenseTensor>("X");
\
auto* h0 = ctx.Input<phi::DenseTensor>("H0");
\
auto* c0 = ctx.Input<phi::DenseTensor>("C0");
\
auto* wx = ctx.Input<phi::DenseTensor>("WeightX");
\
auto* wh = ctx.Input<phi::DenseTensor>("WeightH");
\
auto* bias = ctx.Input<phi::DenseTensor>("Bias");
\
auto* xx = ctx.Output<
phi::DenseTensor>("XX");
\
auto* hidden_out = ctx.Output<
phi::DenseTensor>("Hidden");
\
auto* cell_out = ctx.Output<
phi::DenseTensor>("Cell");
\
bool is_reverse = ctx.Attr<bool>("is_reverse");
\
bool use_peepholes = ctx.Attr<bool>("use_peepholes");
\
auto x_dims = x->dims();
/* T x M*/
\
auto wh_dims = wh->dims();
/* D x 4D*/
\
const int M = x_dims[1];
\
const int D = wh_dims[0];
\
const int D4 = wh_dims[1]
#define INIT_OTHER_DEFINES \
...
...
@@ -439,9 +442,9 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
auto
*
reordered_h0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedH0"
);
auto
*
reordered_c0
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ReorderedC0"
);
auto
*
batched_input
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedInput"
);
auto
*
batched_c_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedCell"
);
auto
*
batched_h_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"BatchedHidden"
);
auto
*
batched_input
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedInput"
);
auto
*
batched_c_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedCell"
);
auto
*
batched_h_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"BatchedHidden"
);
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_c_out_data
=
batched_c_out
->
mutable_data
<
T
>
(
place
);
...
...
paddle/fluid/operators/fused/fusion_lstm_op.h
浏览文件 @
30a31a53
...
...
@@ -18,7 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionLSTMOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc
浏览文件 @
30a31a53
...
...
@@ -106,14 +106,14 @@ framework::OpKernelType FusionRepeatedFCReluOp::GetExpectedKernelType(
}
void
FusionRepeatedFCReluOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(
LoD
Tensor) Input tensors of this operator."
);
AddInput
(
"X"
,
"(
phi::Dense
Tensor) Input tensors of this operator."
);
AddInput
(
"W"
,
"(Tensor) The weight tensors of this operator."
).
AsDuplicable
();
AddInput
(
"Bias"
,
"(Tensor) The bias tensors of this operator."
)
.
AsDuplicable
();
AddOutput
(
"ReluOut"
,
"(Tensor) The output tensor of each relu operator."
)
.
AsDuplicable
()
.
AsIntermediate
();
AddOutput
(
"Out"
,
"(
LoD
Tensor) Output tensor of this operator."
);
AddOutput
(
"Out"
,
"(
phi::Dense
Tensor) Output tensor of this operator."
);
AddComment
(
R"DOC(
Fusion Repeated FC with Relu Operator.
)DOC"
);
...
...
paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h
浏览文件 @
30a31a53
...
...
@@ -18,7 +18,6 @@
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionRepeatedFCReluOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc
浏览文件 @
30a31a53
...
...
@@ -95,11 +95,12 @@ framework::OpKernelType FusionSeqConvEltAddReluOp::GetExpectedKernelType(
}
void
FusionSeqConvEltAddReluOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) the input is a LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"X"
,
"(phi::DenseTensor) the input is a LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this phi::DenseTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
// PaddingData only support false yet, should be ensured at pass.
AddInput
(
"Filter"
,
"(Tensor) same as the input(Filter) of sequence conv op is an "
...
...
@@ -111,9 +112,9 @@ void FusionSeqConvEltAddReluOpMaker::Make() {
"output feature size"
);
AddOutput
(
"Out"
,
"(
LoD
Tensor) the output(Out) is a LodTensor, which support "
"(
phi::Dense
Tensor) the output(Out) is a LodTensor, which support "
"variable-time length output sequence. The underlying tensor in "
"this
LoD
Tensor is a matrix with shape (T, N), where, T is the "
"this
phi::Dense
Tensor is a matrix with shape (T, N), where, T is the "
"total time steps in this mini-batch, N is the output feature size."
);
AddOutput
(
"ColMat"
,
"(Tensor) (T, K), where T is where T is the "
...
...
@@ -150,10 +151,10 @@ class FusionSeqConvEltAddReluKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
DeviceContext
=
phi
::
CPUContext
;
auto
*
x
=
ctx
.
Input
<
LoD
Tensor
>
(
"X"
);
auto
*
x
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Filter"
);
auto
*
b
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Bias"
);
auto
*
y
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
y
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
auto
*
col
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"ColMat"
);
auto
x_lod
=
x
->
lod
();
...
...
paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h
浏览文件 @
30a31a53
...
...
@@ -18,7 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionSeqConvEltAddReluOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
浏览文件 @
30a31a53
...
...
@@ -110,12 +110,13 @@ framework::OpKernelType FusionSeqExpandConcatFCOp::GetExpectedKernelType(
void
FusionSeqExpandConcatFCOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) input LodDTensors, the first one must be have ref lod "
"(phi::DenseTensor) input LodDTensors, the first one must be have "
"ref lod "
"for sequence expand, and the rest input should have same lod."
)
.
AsDuplicable
();
AddInput
(
"FCWeight"
,
"(Tensor) the weights of fc."
);
AddInput
(
"FCBias"
,
"(Tensor, optional) the bias of fc."
).
AsDispensable
();
AddOutput
(
"Out"
,
"(
LoD
Tensor) Output LodTensor."
);
AddOutput
(
"Out"
,
"(
phi::Dense
Tensor) Output LodTensor."
);
AddOutput
(
"FCOut"
,
"(Tensor) the intermediate tensor to keep the result of fc."
...
...
@@ -150,10 +151,10 @@ class FusionSeqExpandConcatFCOpKernel : public framework::OpKernel<T> {
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
using
DeviceContext
=
phi
::
CPUContext
;
auto
ins
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"X"
);
auto
ins
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
*
w
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"FCWeight"
);
auto
*
b
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"FCBias"
);
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
auto
*
fc_out
=
ctx
.
Output
<
phi
::
DenseTensor
>
(
"FCOut"
);
auto
*
ref_in
=
ins
[
0
];
...
...
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h
浏览文件 @
30a31a53
...
...
@@ -18,7 +18,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionSeqExpandConcatFCOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc
浏览文件 @
30a31a53
...
...
@@ -75,8 +75,9 @@ framework::OpKernelType FusionSeqPoolConcatOp::GetExpectedKernelType(
}
void
FusionSeqPoolConcatOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) Input tensors of this operator."
).
AsDuplicable
();
AddOutput
(
"Out"
,
"(LoDTensor) Output tensor of concat operator."
);
AddInput
(
"X"
,
"(phi::DenseTensor) Input tensors of this operator."
)
.
AsDuplicable
();
AddOutput
(
"Out"
,
"(phi::DenseTensor) Output tensor of concat operator."
);
AddAttr
<
std
::
string
>
(
"pooltype"
,
"(string, default 'SUM') some of the pooling "
"pooltype of SequencePoolOp."
)
...
...
@@ -95,8 +96,8 @@ template <typename T>
class
FusionSeqPoolConcatKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
ins
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
ins
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
std
::
string
pooltype
=
ctx
.
Attr
<
std
::
string
>
(
"pooltype"
);
auto
x0_lod
=
ins
[
0
]
->
lod
();
const
auto
&
x0_dims
=
ins
[
0
]
->
dims
();
...
...
paddle/fluid/operators/fused/fusion_seqpool_concat_op.h
浏览文件 @
30a31a53
...
...
@@ -18,7 +18,6 @@
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionSeqPoolConcatOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc
浏览文件 @
30a31a53
...
...
@@ -74,11 +74,12 @@ framework::OpKernelType FusionSeqPoolCVMConcatOp::GetExpectedKernelType(
}
void
FusionSeqPoolCVMConcatOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) Input tensors of this operator."
).
AsDuplicable
();
AddInput
(
"X"
,
"(phi::DenseTensor) Input tensors of this operator."
)
.
AsDuplicable
();
AddInput
(
"CVM"
,
"(Tensor), a 2-D Tensor with shape [N x 2], where N is the batch "
"size, 2 is show and click."
);
AddOutput
(
"Out"
,
"(
LoD
Tensor) Output tensor of concat operator."
);
AddOutput
(
"Out"
,
"(
phi::Dense
Tensor) Output tensor of concat operator."
);
AddAttr
<
std
::
string
>
(
"pooltype"
,
"(string, default 'SUM') some of the pooling "
"pooltype of SequencePoolOp."
)
...
...
@@ -98,8 +99,8 @@ template <typename T>
class
FusionSeqPoolCVMConcatKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
ins
=
ctx
.
MultiInput
<
LoD
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
ins
=
ctx
.
MultiInput
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
std
::
string
pooltype
=
ctx
.
Attr
<
std
::
string
>
(
"pooltype"
);
auto
x0_lod
=
ins
[
0
]
->
lod
();
const
auto
&
x0_dims
=
ins
[
0
]
->
dims
();
...
...
paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h
浏览文件 @
30a31a53
...
...
@@ -18,7 +18,6 @@
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
class
FusionSeqPoolCVMConcatOp
:
public
framework
::
OperatorWithKernel
{
...
...
paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h
浏览文件 @
30a31a53
...
...
@@ -18,7 +18,6 @@
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
Tensor
=
phi
::
DenseTensor
;
// ( (A.^2 * B.^2) - (A * B).^2 ) .* scalar
...
...
paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
浏览文件 @
30a31a53
...
...
@@ -59,11 +59,11 @@ class MultiGRUHandler {
origin_mode_
(
ctx
.
Attr
<
bool
>
(
"origin_mode"
)),
layers_
(
ctx
.
Attr
<
int
>
(
"layers"
)),
concat_pds_
(
layers_
,
std
::
shared_ptr
<
dnnl
::
concat
::
primitive_desc
>
()),
x_
(
ctx
.
Input
<
LoD
Tensor
>
(
"X"
)),
x_
(
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"X"
)),
weights_x_
(
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"WeightX"
)),
weights_h_
(
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"WeightH"
)),
biases_
(
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Bias"
)),
hidden_
(
ctx
.
Output
<
LoD
Tensor
>
(
"Hidden"
)),
hidden_
(
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Hidden"
)),
x_lod_
(
x_
->
lod
()[
0
])
{
PADDLE_ENFORCE_EQ
(
weights_x_
.
size
(),
...
...
@@ -127,7 +127,8 @@ class MultiGRUHandler {
if
(
is_int8
)
{
// Add int8 attributes
const
auto
scale_weights
=
ctx
.
MultiInput
<
LoDTensor
>
(
"Scale_weights"
);
const
auto
scale_weights
=
ctx
.
MultiInput
<
phi
::
DenseTensor
>
(
"Scale_weights"
);
PADDLE_ENFORCE_EQ
(
scale_weights
.
size
(),
layers_
*
2
,
...
...
@@ -669,11 +670,11 @@ class MultiGRUHandler {
// on Ti size, thus we need another key to cache them
std
::
string
memory_key_
;
const
LoD
Tensor
*
x_
;
const
phi
::
Dense
Tensor
*
x_
;
const
std
::
vector
<
const
phi
::
DenseTensor
*>
weights_x_
;
const
std
::
vector
<
const
phi
::
DenseTensor
*>
weights_h_
;
const
std
::
vector
<
const
phi
::
DenseTensor
*>
biases_
;
LoD
Tensor
*
hidden_
;
phi
::
Dense
Tensor
*
hidden_
;
std
::
vector
<
dnnl
::
primitive_attr
>
attrs_
;
const
paddle
::
framework
::
Vector
<
size_t
>&
x_lod_
;
};
...
...
paddle/fluid/operators/fused/multi_gru_op.cc
浏览文件 @
30a31a53
...
...
@@ -148,11 +148,12 @@ framework::OpKernelType MultiGRUOp::GetExpectedKernelType(
}
void
MultiGRUOpMaker
::
Make
()
{
AddInput
(
"X"
,
"(LoDTensor) the input is an LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this LoDTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"X"
,
"(phi::DenseTensor) the input is an LodTensor, which support "
"variable-time length input sequence. The underlying tensor in "
"this phi::DenseTensor is a matrix with shape (T X M), where T is the "
"total time steps in this mini-batch, M is the dim size of x."
);
AddInput
(
"WeightX"
,
"(MultiTensor) The FC weight with shape (M x 3D),"
"where M is the dim size of x, D is the hidden size. "
)
...
...
@@ -176,7 +177,7 @@ void MultiGRUOpMaker::Make() {
"Only used with MKL-DNN INT8."
)
.
AsDuplicable
()
.
AsDispensable
();
AddOutput
(
"Hidden"
,
"(
LoD
Tensor) (T x D) Same as GRUOp"
);
AddOutput
(
"Hidden"
,
"(
phi::Dense
Tensor) (T x D) Same as GRUOp"
);
AddAttr
<
std
::
string
>
(
"activation"
,
"(string, default tanh) "
"The activation type used for output candidate {h}_t."
)
...
...
paddle/fluid/operators/fused/multi_gru_op.h
浏览文件 @
30a31a53
...
...
@@ -20,7 +20,6 @@ namespace paddle {
namespace
operators
{
using
framework
::
ExecutionContext
;
using
LoDTensor
=
phi
::
DenseTensor
;
class
MultiGRUOp
:
public
framework
::
OperatorWithKernel
{
public:
...
...
paddle/fluid/operators/math/context_project.h
浏览文件 @
30a31a53
...
...
@@ -27,7 +27,6 @@ namespace operators {
namespace
math
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
/*
* \brief Context projection concatenates features in adjacent time-steps in
...
...
@@ -51,9 +50,8 @@ using LoDTensor = phi::DenseTensor;
* For a mini-batch of 2 variable lengths sentences, containing 3, and 1
* time-steps:
*
* Assumed input (X) is a [4, M, N] float LoDTensor, and X->lod()[0] = [0, 3,
* 4].
* Besides, for the sake of simplicity, we assume M=1 and N=2.
* Assumed input (X) is a [4, M, N] float phi::DenseTensor, and X->lod()[0] =
* [0, 3, 4]. Besides, for the sake of simplicity, we assume M=1 and N=2.
*
* X = [[a1, a2;
* b1, b2;
...
...
@@ -89,7 +87,7 @@ template <typename DeviceContext, typename T>
class
ContextProjectFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
LoD
Tensor
&
in
,
const
phi
::
Dense
Tensor
&
in
,
const
phi
::
DenseTensor
*
padding_data
,
bool
padding_trainable
,
const
int
context_start
,
...
...
@@ -218,7 +216,7 @@ template <typename DeviceContext, typename T>
class
ContextProjectGradFunctor
{
public:
void
operator
()(
const
DeviceContext
&
context
,
const
LoD
Tensor
&
in
,
const
phi
::
Dense
Tensor
&
in
,
bool
padding_trainable
,
const
int
context_start
,
const
int
context_length
,
...
...
paddle/fluid/operators/math/sequence_padding.h
浏览文件 @
30a31a53
...
...
@@ -82,8 +82,8 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
}
/*
* \brief Padding/Unpadding
LoDTensor to/from normal Tensor of the shap
e
*
[max_sequence_length, num_sequences, sequence_width].
* \brief Padding/Unpadding
phi::DenseTensor to/from normal Tensor of th
e
*
shape
[max_sequence_length, num_sequences, sequence_width].
*
* Padding sequence:
* padding[i] = seq[lod[level][i]]
...
...
@@ -97,13 +97,11 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims,
* padding (s0, s1, s2, s3; s0, s1, s2, 0; s0, 0, s2, 0; s0, 0, 0, 0)
*
* \param context device context of this functor.
* \param seq LoDTensor which is stored in sequence format, the shape
* is [total_sequence_length, sequence_width] where
* total_sequence_length is the sum of all sequences'
* length.
* \param padding Tensor which is padded to the same length, the shape is
* [max_sequence_length, num_sequences, sequence_width].
* \param norm_by_times whether dividing sequence's length.
* \param seq phi::DenseTensor which is stored in sequence format, the
* shape is [total_sequence_length, sequence_width] where total_sequence_length
* is the sum of all sequences' length. \param padding Tensor which is
* padded to the same length, the shape is [max_sequence_length, num_sequences,
* sequence_width]. \param norm_by_times whether dividing sequence's length.
*
* \note transposition is also done in this functor.
*/
...
...
paddle/fluid/operators/math/sequence_pooling.cc
浏览文件 @
30a31a53
...
...
@@ -25,7 +25,6 @@ namespace operators {
namespace
math
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
T
,
int
MajorType
=
Eigen
::
RowMajor
,
typename
IndexType
=
Eigen
::
DenseIndex
>
...
...
paddle/fluid/operators/math/sequence_scale.h
浏览文件 @
30a31a53
...
...
@@ -35,7 +35,8 @@ namespace math {
*
* \param context Device context of this functor.
* \param seq LoDTensor which is stored in sequence format, the shape
* \param seq phi::DenseTensor which is stored in sequence format, the
shape
* is [total_sequence_length, sequence_width] where
* total_sequence_length is the sum of all sequences'
* length.
...
...
paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
浏览文件 @
30a31a53
...
...
@@ -28,7 +28,6 @@ using dnnl::prop_kind;
using
dnnl
::
stream
;
using
framework
::
DDim
;
using
framework
::
ExecutionContext
;
using
LoDTensor
=
phi
::
DenseTensor
;
using
phi
::
funcs
::
OneDNNGetDataType
;
using
phi
::
funcs
::
to_void_cast
;
using
platform
::
MKLDNNDeviceContext
;
...
...
@@ -382,7 +381,7 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
void
PrepareSrcMem
(
const
std
::
shared_ptr
<
inner_product_forward
>&
fc_p
,
const
std
::
shared_ptr
<
dnnl
::
memory
>&
src_mem
,
const
LoD
Tensor
*
x
,
const
phi
::
Dense
Tensor
*
x
,
const
dnnl
::
engine
&
engine
)
const
{
auto
x_md
=
x
->
mem_desc
().
reshape
(
src_mem
->
get_desc
().
dims
());
if
(
x_md
!=
src_mem
->
get_desc
())
{
...
...
@@ -403,10 +402,10 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
ctx
.
template
device_context
<
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
auto
*
x
=
ctx
.
Input
<
LoD
Tensor
>
(
"Input"
);
const
auto
*
x
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Input"
);
const
auto
*
weights
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"W"
);
const
auto
*
bias
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Bias"
);
auto
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
const
auto
&
scale_weights
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"Scale_weights"
);
...
...
@@ -513,9 +512,9 @@ class FCMKLDNNKernel : public framework::OpKernel<T_in> {
}
void
RecomputeOutputDims
(
const
ExecutionContext
&
ctx
,
const
LoD
Tensor
*
x
,
const
phi
::
Dense
Tensor
*
x
,
const
phi
::
DenseTensor
*
weights
,
LoD
Tensor
*
out
)
const
{
phi
::
Dense
Tensor
*
out
)
const
{
int
in_num_col_dims
=
ctx
.
Attr
<
int
>
(
"in_num_col_dims"
);
bool
padding_weights
=
ctx
.
Attr
<
bool
>
(
"padding_weights"
);
PADDLE_ENFORCE_EQ
(
padding_weights
,
...
...
paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
浏览文件 @
30a31a53
...
...
@@ -26,7 +26,6 @@ namespace operators {
using
framework
::
DDim
;
using
framework
::
ExecutionContext
;
using
LoDTensor
=
phi
::
DenseTensor
;
using
platform
::
MatMulV2MKLDNNHandler
;
using
platform
::
MKLDNNDeviceContext
;
...
...
paddle/fluid/operators/nccl/nccl_op.cu.cc
浏览文件 @
30a31a53
...
...
@@ -19,8 +19,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
LoDTensor
=
phi
::
DenseTensor
;
using
platform
::
Communicator
;
template
<
typename
Type
>
...
...
@@ -62,8 +60,8 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
true
,
platform
::
errors
::
PreconditionNotMet
(
"This kernel only runs on GPU device."
));
auto
*
x
=
ctx
.
Input
<
LoD
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
x
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
auto
*
comm
=
ctx
.
Input
<
Communicator
>
(
"Communicator"
);
std
::
string
reduction
=
ctx
.
Attr
<
std
::
string
>
(
"reduction"
);
...
...
@@ -97,8 +95,8 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
true
,
platform
::
errors
::
InvalidArgument
(
"This kernel only runs on GPU device."
));
auto
x
=
ctx
.
Input
<
LoD
Tensor
>
(
"X"
);
// x0, x1, x2
auto
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
x
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"X"
);
// x0, x1, x2
auto
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
auto
*
comm
=
ctx
.
Input
<
Communicator
>
(
"Communicator"
);
int
root
=
ctx
.
Attr
<
int
>
(
"root"
);
std
::
string
reduction
=
ctx
.
Attr
<
std
::
string
>
(
"reduction"
);
...
...
@@ -144,7 +142,7 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
int
gpu_id
=
ctx
.
GetPlace
().
GetDeviceId
();
int
idx
=
comm
->
GetCommId
(
gpu_id
);
if
(
idx
==
root
)
{
auto
*
x
=
ctx
.
Input
<
LoD
Tensor
>
(
"X"
);
auto
*
x
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"X"
);
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke Bcast. send "
<<
x
->
numel
();
PADDLE_ENFORCE_GPU_SUCCESS
(
platform
::
dynload
::
ncclBcast
(
reinterpret_cast
<
void
*>
(
const_cast
<
T
*>
(
x
->
data
<
T
>
())),
...
...
@@ -155,7 +153,7 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
ctx
.
cuda_device_context
().
stream
()));
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" finished Bcast."
;
}
else
{
auto
*
out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Out"
);
auto
*
out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Out"
);
VLOG
(
3
)
<<
"gpu : "
<<
gpu_id
<<
" invoke Bcast. recv buffer "
<<
phi
::
product
(
out
->
dims
());
PADDLE_ENFORCE_GPU_SUCCESS
(
...
...
paddle/fluid/operators/optimizers/adam_op_mlu.cc
浏览文件 @
30a31a53
...
...
@@ -20,7 +20,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
T
>
class
AdamMLUKernel
:
public
framework
::
OpKernel
<
T
>
{
...
...
@@ -30,32 +29,32 @@ class AdamMLUKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
param
=
ctx
.
Input
<
LoD
Tensor
>
(
"Param"
);
auto
*
param
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Param"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Grad(%s)'s type should be
LoD
Tensor, "
"The Grad(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
grad
=
ctx
.
Input
<
LoD
Tensor
>
(
"Grad"
);
auto
*
mom1
=
ctx
.
Input
<
LoD
Tensor
>
(
"Moment1"
);
auto
*
mom2
=
ctx
.
Input
<
LoD
Tensor
>
(
"Moment2"
);
auto
*
lr
=
ctx
.
Input
<
LoD
Tensor
>
(
"LearningRate"
);
auto
*
grad
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Grad"
);
auto
*
mom1
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Moment1"
);
auto
*
mom2
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Moment2"
);
auto
*
lr
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LearningRate"
);
auto
*
beta1_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta1Pow"
);
auto
*
beta2_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta2Pow"
);
auto
*
param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"ParamOut"
);
auto
*
mom1_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Moment1Out"
);
auto
*
mom2_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Moment2Out"
);
auto
*
beta1_pow_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Beta1PowOut"
);
auto
*
beta2_pow_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Beta2PowOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"ParamOut"
);
auto
*
mom1_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Moment1Out"
);
auto
*
mom2_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Moment2Out"
);
auto
*
beta1_pow_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Beta1PowOut"
);
auto
*
beta2_pow_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Beta2PowOut"
);
bool
skip_update
=
false
;
if
(
ctx
.
HasInput
(
"SkipUpdate"
))
{
...
...
@@ -110,8 +109,8 @@ class AdamMLUKernel : public framework::OpKernel<T> {
mom1_out
->
ShareDataWith
(
*
mom1
);
mom2_out
->
ShareDataWith
(
*
mom2
);
LoD
Tensor
beta1_pow_tmp
;
LoD
Tensor
beta2_pow_tmp
;
phi
::
Dense
Tensor
beta1_pow_tmp
;
phi
::
Dense
Tensor
beta2_pow_tmp
;
if
(
beta1_pow
->
place
()
==
platform
::
CPUPlace
())
{
T
beta1
=
*
beta1_pow
->
data
<
T
>
();
beta1_pow_tmp
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
...
...
@@ -292,13 +291,13 @@ class AdamWMLUKernel : public AdamMLUKernel<T> {
}
bool
with_decay
=
ctx
.
Attr
<
bool
>
(
"with_decay"
);
const
bool
multi_precision
=
ctx
.
Attr
<
bool
>
(
"multi_precision"
);
auto
*
param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"ParamOut"
);
auto
*
master_param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"MasterParamOut"
);
const
auto
*
master_param
=
ctx
.
Input
<
LoD
Tensor
>
(
"MasterParam"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"ParamOut"
);
auto
*
master_param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"MasterParamOut"
);
const
auto
*
master_param
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"MasterParam"
);
VLOG
(
3
)
<<
"Skip update: "
<<
skip_update
<<
", With decay: "
<<
with_decay
;
if
(
!
skip_update
&&
with_decay
)
{
auto
*
param
=
ctx
.
Input
<
LoD
Tensor
>
(
"Param"
);
auto
*
param
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Param"
);
MLUCnnlTensorDesc
param_desc
(
*
param
);
if
(
multi_precision
)
{
VLOG
(
3
)
<<
"[adamw] multi_precision, cast masterparam to param."
;
...
...
@@ -328,12 +327,12 @@ class AdamWMLUKernel : public AdamMLUKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
lr
=
ctx
.
Input
<
LoD
Tensor
>
(
"LearningRate"
);
auto
*
lr
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LearningRate"
);
float
coeff
=
ctx
.
Attr
<
float
>
(
"coeff"
);
// update param with decay coeff: mul(-1 * lr, coeff * param) + param
...
...
@@ -502,8 +501,8 @@ class MergedAdamMLUKernel : public framework::OpKernel<T> {
mom1_outs
[
i
]
->
ShareDataWith
(
*
mom1s
[
i
]);
mom2_outs
[
i
]
->
ShareDataWith
(
*
mom2s
[
i
]);
LoD
Tensor
beta1_pow_tmp
;
LoD
Tensor
beta2_pow_tmp
;
phi
::
Dense
Tensor
beta1_pow_tmp
;
phi
::
Dense
Tensor
beta2_pow_tmp
;
if
(
beta1_pows
[
i
]
->
place
()
==
platform
::
CPUPlace
())
{
T
beta1
=
*
beta1_pows
[
i
]
->
data
<
T
>
();
beta1_pow_tmp
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/optimizers/adam_op_npu.cc
浏览文件 @
30a31a53
...
...
@@ -23,7 +23,6 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
DeviceContext
,
typename
T
>
class
AdamNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
...
...
@@ -33,32 +32,32 @@ class AdamNPUKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
param
=
ctx
.
Input
<
LoD
Tensor
>
(
"Param"
);
auto
*
param
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Param"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Grad(%s)'s type should be
LoD
Tensor, "
"The Grad(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
grad
=
ctx
.
Input
<
LoD
Tensor
>
(
"Grad"
);
auto
*
mom1
=
ctx
.
Input
<
LoD
Tensor
>
(
"Moment1"
);
auto
*
mom2
=
ctx
.
Input
<
LoD
Tensor
>
(
"Moment2"
);
auto
*
lr
=
ctx
.
Input
<
LoD
Tensor
>
(
"LearningRate"
);
auto
*
grad
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Grad"
);
auto
*
mom1
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Moment1"
);
auto
*
mom2
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Moment2"
);
auto
*
lr
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LearningRate"
);
auto
*
beta1_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta1Pow"
);
auto
*
beta2_pow
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Beta2Pow"
);
auto
*
param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"ParamOut"
);
auto
*
mom1_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Moment1Out"
);
auto
*
mom2_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Moment2Out"
);
auto
*
beta1_pow_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Beta1PowOut"
);
auto
*
beta2_pow_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"Beta2PowOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"ParamOut"
);
auto
*
mom1_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Moment1Out"
);
auto
*
mom2_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Moment2Out"
);
auto
*
beta1_pow_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Beta1PowOut"
);
auto
*
beta2_pow_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"Beta2PowOut"
);
bool
skip_update
=
false
;
if
(
ctx
.
HasInput
(
"SkipUpdate"
))
{
...
...
@@ -114,8 +113,8 @@ class AdamNPUKernel : public framework::OpKernel<T> {
// NOTE(zhiqiu): beta1_pow and beta2_pow may on CPU and not transform
// place.
LoD
Tensor
beta1_pow_tmp
;
LoD
Tensor
beta2_pow_tmp
;
phi
::
Dense
Tensor
beta1_pow_tmp
;
phi
::
Dense
Tensor
beta2_pow_tmp
;
if
(
beta1_pow
->
place
()
==
platform
::
CPUPlace
())
{
T
beta1
=
*
beta1_pow
->
data
<
T
>
();
beta1_pow_tmp
.
mutable_data
<
T
>
({
1
},
ctx
.
GetPlace
());
...
...
@@ -279,7 +278,7 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
bool
with_decay
=
ctx
.
Attr
<
bool
>
(
"with_decay"
);
if
(
!
skip_update
&&
with_decay
)
{
float
coeff
=
ctx
.
Attr
<
float
>
(
"coeff"
);
auto
*
lr
=
ctx
.
Input
<
LoD
Tensor
>
(
"LearningRate"
);
auto
*
lr
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LearningRate"
);
auto
place
=
ctx
.
GetPlace
();
...
...
@@ -308,18 +307,18 @@ class AdamWNPUKernel : public AdamNPUKernel<platform::NPUDeviceContext, T> {
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Master Parma is not supported on npu"
));
}
else
{
auto
*
param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"ParamOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"ParamOut"
);
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
const
auto
*
param_var
=
ctx
.
InputVar
(
"Param"
);
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
auto
*
param
=
ctx
.
Input
<
LoD
Tensor
>
(
"Param"
);
auto
*
param
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Param"
);
const
auto
&
runner
=
NpuOpRunner
(
"Mul"
,
...
...
paddle/fluid/operators/optimizers/decayed_adagrad_op.cc
浏览文件 @
30a31a53
...
...
@@ -32,20 +32,20 @@ class DecayedAdagradOp : public framework::OperatorWithKernel {
"Input"
,
"LearningRate"
,
"DecayedAdagradOp"
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be LoDTensor,
but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be LoDTensor,
but the received is %s"
,
ctx
->
Inputs
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"
but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"
but the received is %s"
,
ctx
->
Inputs
(
"Grad"
).
front
(),
ctx
->
GetInputsVarType
(
"Grad"
).
front
()));
OP_INOUT_CHECK
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output"
,
"ParamOut"
,
"DecayedAdagradOp"
);
...
...
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
浏览文件 @
30a31a53
...
...
@@ -27,7 +27,7 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
...
...
@@ -35,7 +35,7 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
())));
...
...
paddle/fluid/operators/optimizers/dpsgd_op.cc
浏览文件 @
30a31a53
...
...
@@ -36,18 +36,18 @@ class DpsgdOp : public framework::OperatorWithKernel {
true
,
platform
::
errors
::
NotFound
(
"Input(LearningRate) of DpsgdOp should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be LoDTensor,
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be LoDTensor,
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Grad"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Grad"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
...
...
paddle/fluid/operators/optimizers/dpsgd_op.h
浏览文件 @
30a31a53
...
...
@@ -32,7 +32,7 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
())));
...
...
@@ -41,7 +41,7 @@ class DpsgdOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
())));
...
...
paddle/fluid/operators/optimizers/lamb_op.cc
浏览文件 @
30a31a53
...
...
@@ -52,10 +52,10 @@ class LambOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"Param"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input parameter that has to be updated."
);
AddInput
(
"Grad"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input gradient of the parameter."
);
AddInput
(
"LearningRate"
,
"(Tensor) Learning rate."
);
AddInput
(
"Moment1"
,
"(Tensor) Input first moment."
);
...
...
@@ -63,7 +63,7 @@ class LambOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"Beta1Pow"
,
"(Tensor) Input beta1 power accumulator."
);
AddInput
(
"Beta2Pow"
,
"(Tensor) Input beta2 power accumulator."
);
AddInput
(
"MasterParam"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input master parameter that has to be updated."
)
.
AsDispensable
();
AddInput
(
...
...
paddle/fluid/operators/optimizers/lars_momentum_op.cc
浏览文件 @
30a31a53
...
...
@@ -37,12 +37,12 @@ class LarsMomentumOp : public framework::OperatorWithKernel {
"Output"
,
"VelocityOut"
,
"LarsMomentum"
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be LoDTensor,
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
auto
lr_dims
=
ctx
->
GetInputsDim
(
"LearningRate"
);
auto
grad_dim
=
ctx
->
GetInputsDim
(
"Grad"
);
...
...
@@ -102,7 +102,7 @@ class LarsMomentumOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Grad"
)[
i
],
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
->
Inputs
(
"Grad"
)[
i
].
front
(),
ctx
->
GetInputsVarType
(
"Grad"
)[
i
]));
...
...
@@ -145,31 +145,31 @@ class LarsMomentumOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void
Make
()
override
{
AddInput
(
"Param"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input parameter that has to be updated"
)
.
AsDuplicable
();
AddInput
(
"Grad"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input gradient of the parameter"
)
.
AsDuplicable
();
AddInput
(
"Velocity"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input velocity (corresponding to the parameter) "
"that has to be updated"
)
.
AsDuplicable
();
AddInput
(
"LearningRate"
,
"(
LoDTensor, default LoD
Tensor<float>) "
"(
phi::DenseTensor, default phi::Dense
Tensor<float>) "
"Input learning rate"
)
.
AsDuplicable
();
AddInput
(
"MasterParam"
,
"FP32 master weight for AMP."
)
.
AsDuplicable
()
.
AsDispensable
();
AddOutput
(
"ParamOut"
,
"(
LoD
Tensor) This output is updated parameter. "
"(
phi::Dense
Tensor) This output is updated parameter. "
"It shared memory with Input(Param)."
)
.
AsDuplicable
();
AddOutput
(
"VelocityOut"
,
"(
LoD
Tensor) This output is updated velocity. "
"(
phi::Dense
Tensor) This output is updated velocity. "
"It shared memory with Input(Velocity)."
)
.
AsDuplicable
();
AddOutput
(
"MasterParamOut"
,
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
30a31a53
...
...
@@ -54,12 +54,12 @@ class MomentumOp : public framework::OperatorWithKernel {
true
,
platform
::
errors
::
NotFound
(
"Input(LearningRate) of Momentum should not be null."
));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
,
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be LoDTensor,
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"Param"
).
front
(),
framework
::
proto
::
VarType
::
LOD_TENSOR
,
platform
::
errors
::
InvalidArgument
(
"The input var's type should be phi::DenseTensor, "
"
but the received is %s"
,
ctx
->
GetInputsVarType
(
"Param"
).
front
()));
PADDLE_ENFORCE_EQ
(
ctx
->
HasOutput
(
"ParamOut"
),
true
,
...
...
paddle/fluid/operators/optimizers/rmsprop_op_npu.cc
浏览文件 @
30a31a53
...
...
@@ -16,16 +16,15 @@ namespace paddle {
namespace
operators
{
using
Tensor
=
phi
::
DenseTensor
;
using
LoDTensor
=
phi
::
DenseTensor
;
template
<
typename
DeviceContext
,
typename
T
>
class
RMSPROPNPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
auto
*
param_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"ParamOut"
);
auto
*
moment_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"MomentOut"
);
auto
*
mean_square_out
=
ctx
.
Output
<
LoD
Tensor
>
(
"MeanSquareOut"
);
auto
*
param_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"ParamOut"
);
auto
*
moment_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"MomentOut"
);
auto
*
mean_square_out
=
ctx
.
Output
<
phi
::
Dense
Tensor
>
(
"MeanSquareOut"
);
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
moment_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
@@ -34,17 +33,17 @@ class RMSPROPNPUKernel : public framework::OpKernel<T> {
auto
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
auto
rho
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"decay"
));
auto
momentum
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"momentum"
));
auto
*
p_tensor
=
ctx
.
Input
<
LoD
Tensor
>
(
"Param"
);
auto
*
ms_tensor
=
ctx
.
Input
<
LoD
Tensor
>
(
"MeanSquare"
);
auto
*
lr_tensor
=
ctx
.
Input
<
LoD
Tensor
>
(
"LearningRate"
);
auto
*
mom_tensor
=
ctx
.
Input
<
LoD
Tensor
>
(
"Moment"
);
auto
*
p_tensor
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Param"
);
auto
*
ms_tensor
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"MeanSquare"
);
auto
*
lr_tensor
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"LearningRate"
);
auto
*
mom_tensor
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Moment"
);
bool
centered
=
ctx
.
Attr
<
bool
>
(
"centered"
);
auto
stream
=
ctx
.
template
device_context
<
paddle
::
platform
::
NPUDeviceContext
>()
.
stream
();
if
(
grad_var
->
IsType
<
LoD
Tensor
>
())
{
auto
*
grad_tensor
=
ctx
.
Input
<
LoD
Tensor
>
(
"Grad"
);
if
(
grad_var
->
IsType
<
phi
::
Dense
Tensor
>
())
{
auto
*
grad_tensor
=
ctx
.
Input
<
phi
::
Dense
Tensor
>
(
"Grad"
);
if
(
centered
)
{
framework
::
NPUAttributeMap
attr_input
=
{{
"use_locking"
,
false
}};
const
Tensor
*
rho_tensor
=
nullptr
;
...
...
paddle/fluid/operators/optimizers/sgd_op.cu
浏览文件 @
30a31a53
...
...
@@ -72,7 +72,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
param_var
->
IsType
<
phi
::
DenseTensor
>
(),
true
,
platform
::
errors
::
InvalidArgument
(
"The Var(%s)'s type should be
LoD
Tensor, "
"The Var(%s)'s type should be
phi::Dense
Tensor, "
"but the received is %s"
,
ctx
.
InputNames
(
"Param"
).
front
(),
paddle
::
framework
::
ToTypeName
(
param_var
->
Type
())));
...
...
@@ -107,7 +107,7 @@ class SGDOpKernel<phi::GPUContext, T> : public framework::OpKernel<T> {
?
master_param_out
->
mutable_data
<
MPDType
>
(
ctx
.
GetPlace
())
:
nullptr
;
// Actually, all tensors are
LoD
Tensor except SelectedRows.
// Actually, all tensors are
phi::Dense
Tensor except SelectedRows.
if
(
grad_var
->
IsType
<
phi
::
DenseTensor
>
())
{
auto
*
grad
=
ctx
.
Input
<
phi
::
DenseTensor
>
(
"Grad"
);
...
...
paddle/fluid/operators/optimizers/sgd_op.h
浏览文件 @
30a31a53
此差异已折叠。
点击以展开。
paddle/fluid/operators/pscore/distributed_lookup_table_op.cc
浏览文件 @
30a31a53
此差异已折叠。
点击以展开。
paddle/fluid/operators/pscore/distributed_push_sparse_op.cc
浏览文件 @
30a31a53
此差异已折叠。
点击以展开。
paddle/fluid/operators/pscore/fake_init_op.cc
浏览文件 @
30a31a53
此差异已折叠。
点击以展开。
paddle/fluid/operators/reader/create_py_reader_op.cc
浏览文件 @
30a31a53
此差异已折叠。
点击以展开。
paddle/fluid/operators/reader/read_op.cc
浏览文件 @
30a31a53
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录