Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
6641a314
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6641a314
编写于
2月 15, 2019
作者:
H
hjchen2
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Delivery input lod to output for elementwise_add/top_k/activation ops
上级
276ec6f9
变更
12
展开全部
显示空白变更内容
内联
并排
Showing
12 changed file
with
557 addition
and
325 deletion
+557
-325
src/operators/activation_op.cpp
src/operators/activation_op.cpp
+6
-5
src/operators/elementwise_add_op.cpp
src/operators/elementwise_add_op.cpp
+1
-0
src/operators/kernel/arm/beam_search_decode_kernel.cpp
src/operators/kernel/arm/beam_search_decode_kernel.cpp
+243
-10
src/operators/kernel/arm/conv_add_bn_relu_kernel.cpp
src/operators/kernel/arm/conv_add_bn_relu_kernel.cpp
+2
-2
src/operators/kernel/arm/conv_bn_add_relu_kernel.cpp
src/operators/kernel/arm/conv_bn_add_relu_kernel.cpp
+2
-2
src/operators/kernel/arm/conv_bn_relu_kernel.cpp
src/operators/kernel/arm/conv_bn_relu_kernel.cpp
+2
-2
src/operators/kernel/arm/conv_kernel.cpp
src/operators/kernel/arm/conv_kernel.cpp
+1
-1
src/operators/kernel/arm/dwconv_bn_relu_kernel.cpp
src/operators/kernel/arm/dwconv_bn_relu_kernel.cpp
+2
-2
src/operators/kernel/arm/sequence_softmax_kernel.cpp
src/operators/kernel/arm/sequence_softmax_kernel.cpp
+4
-6
src/operators/op_param.h
src/operators/op_param.h
+291
-291
src/operators/softmax_op.cpp
src/operators/softmax_op.cpp
+1
-0
src/operators/top_k_op.cpp
src/operators/top_k_op.cpp
+2
-4
未找到文件。
src/operators/activation_op.cpp
浏览文件 @
6641a314
...
...
@@ -22,6 +22,7 @@ namespace operators {
void OpName##Op<Dtype, T>::InferShape() const { \
const auto &input_dims = this->param_.InputX()->dims(); \
this->param_.Out()->Resize(input_dims); \
this->param_.Out()->set_lod(this->param_.InputX()->lod()); \
}
#ifdef RELU_OP
...
...
src/operators/elementwise_add_op.cpp
浏览文件 @
6641a314
...
...
@@ -23,6 +23,7 @@ template <typename Dtype, typename T>
void
ElementwiseAddOp
<
Dtype
,
T
>::
InferShape
()
const
{
auto
x_dim
=
this
->
param_
.
InputX
()
->
dims
();
this
->
param_
.
Out
()
->
Resize
(
x_dim
);
this
->
param_
.
Out
()
->
set_lod
(
this
->
param_
.
InputX
()
->
lod
());
}
}
// namespace operators
...
...
src/operators/kernel/arm/beam_search_decode_kernel.cpp
浏览文件 @
6641a314
...
...
@@ -15,27 +15,260 @@ limitations under the License. */
#ifdef BEAM_SEARCH_DECODE_OP
#include "operators/kernel/beam_search_decode_kernel.h"
#include "framework/data_type.h"
namespace
paddle_mobile
{
namespace
operators
{
using
LoDTensor
=
framework
::
LoDTensor
;
using
LoDTensorArray
=
framework
::
LoDTensorArray
;
// all the lod have 2 levels.
// The first is source level, the second is sentence level.
// source level describe how many prefixes (branchs) for each source sentece
// (beam). sentence level describe how these candidates belong to the prefixes.
const
size_t
kSourceLevel
=
0
;
const
size_t
kSentenceLevel
=
1
;
template
<
typename
T
>
struct
Sentence
{
std
::
vector
<
int64_t
>
word_ids
;
std
::
vector
<
T
>
scores
;
};
template
<
typename
T
>
using
SentenceVector
=
std
::
vector
<
Sentence
<
T
>>
;
template
<
typename
T
>
struct
BeamSearchDecoder
{
BeamSearchDecoder
(
size_t
beam_size
,
int
end_id
)
:
beam_size_
(
beam_size
),
end_id_
(
end_id
)
{}
/**
* convert the result sentence_vector for each source sentence into two
* LodTensor.
* One is all candidate sentences with word id, one is all candidate sentences
* with word score.
* Param:
* sentence_vector_list: sentence_vector for each source sentence.
* id_tensor: result LoDTensor for sentences of id.
* score_tensor: result LoDTensor for sentences of score.
* reverse: whether ids of sentence in sentence_vector_list is reversed
* sort_by_score: whether to sort hypotheses of each sentence by scores.
*/
void
ConvertSentenceVectorToLodTensor
(
std
::
vector
<
SentenceVector
<
T
>>
sentence_vector_list
,
LoDTensor
*
id_tensor
,
LoDTensor
*
score_tensor
,
bool
reverse
=
true
,
bool
sort_by_score
=
true
)
const
;
/**
* Gather the hypotheses for each source sentence by backtrace though the
* LoDTensorArray step_ids whose lods reserve the path in the tree.
*/
void
Backtrace
(
const
LoDTensorArray
&
step_ids
,
const
LoDTensorArray
&
step_scores
,
LoDTensor
*
id_tensor
,
LoDTensor
*
score_tensor
)
const
;
size_t
beam_size_
;
int
end_id_
;
};
template
<
typename
T
>
void
BeamSearchDecoder
<
T
>::
ConvertSentenceVectorToLodTensor
(
std
::
vector
<
SentenceVector
<
T
>>
sentence_vector_list
,
LoDTensor
*
id_tensor
,
LoDTensor
*
score_tensor
,
bool
reverse
,
bool
sort_by_score
)
const
{
size_t
src_num
=
sentence_vector_list
.
size
();
PADDLE_MOBILE_ENFORCE
(
src_num
>
0
,
"src_num should be larger than 0"
);
std
::
vector
<
size_t
>
source_level_lod
=
{
0
};
std
::
vector
<
size_t
>
sentence_level_lod
=
{
0
};
std
::
vector
<
int64_t
>
id_data
;
std
::
vector
<
T
>
score_data
;
for
(
size_t
src_idx
=
0
;
src_idx
<
src_num
;
++
src_idx
)
{
if
(
sort_by_score
)
{
sort
(
sentence_vector_list
[
src_idx
].
begin
(),
sentence_vector_list
[
src_idx
].
end
(),
[
reverse
](
const
Sentence
<
T
>&
a
,
const
Sentence
<
T
>&
b
)
{
if
(
reverse
)
return
a
.
scores
.
front
()
>
b
.
scores
.
front
();
else
return
a
.
scores
.
back
()
>
b
.
scores
.
back
();
});
}
for
(
Sentence
<
T
>&
sentence
:
sentence_vector_list
[
src_idx
])
{
if
(
reverse
)
{
id_data
.
insert
(
id_data
.
end
(),
sentence
.
word_ids
.
rbegin
(),
sentence
.
word_ids
.
rend
());
score_data
.
insert
(
score_data
.
end
(),
sentence
.
scores
.
rbegin
(),
sentence
.
scores
.
rend
());
}
else
{
id_data
.
insert
(
id_data
.
end
(),
sentence
.
word_ids
.
begin
(),
sentence
.
word_ids
.
end
());
score_data
.
insert
(
score_data
.
end
(),
sentence
.
scores
.
begin
(),
sentence
.
scores
.
end
());
}
sentence_level_lod
.
push_back
(
sentence_level_lod
.
back
()
+
sentence
.
word_ids
.
size
());
}
source_level_lod
.
push_back
(
source_level_lod
.
back
()
+
sentence_vector_list
[
src_idx
].
size
());
}
framework
::
LoD
lod
;
lod
.
push_back
(
source_level_lod
);
lod
.
push_back
(
sentence_level_lod
);
id_tensor
->
set_lod
(
lod
);
id_tensor
->
Resize
({
static_cast
<
int64_t
>
(
id_data
.
size
())});
id_tensor
->
mutable_data
<
int64_t
>
();
// framework::TensorFromVector<int64_t>(id_data, cpu_ctx, id_tensor);
score_tensor
->
set_lod
(
lod
);
score_tensor
->
Resize
({
static_cast
<
int64_t
>
(
score_data
.
size
())});
score_tensor
->
mutable_data
<
T
>
();
// framework::TensorFromVector<T>(score_data, cpu_ctx, score_tensor);
}
template
<
typename
T
>
void
BeamSearchDecoder
<
T
>::
Backtrace
(
const
LoDTensorArray
&
step_ids
,
const
LoDTensorArray
&
step_scores
,
LoDTensor
*
id_tensor
,
LoDTensor
*
score_tensor
)
const
{
PADDLE_MOBILE_ENFORCE
(
!
step_ids
.
empty
(),
"step num should be larger than 0"
);
PADDLE_MOBILE_ENFORCE
(
step_ids
.
size
()
==
step_scores
.
size
(),
"step_ids and step_scores should be the same"
);
const
size_t
step_num
=
step_ids
.
size
();
const
size_t
src_num
=
step_ids
.
at
(
0
).
lod
().
at
(
kSourceLevel
).
size
()
-
1
;
std
::
vector
<
SentenceVector
<
T
>>
sentence_vector_list
(
src_num
,
SentenceVector
<
T
>
(
beam_size_
));
std
::
vector
<
std
::
vector
<
size_t
>>
prefix_idx_vector_list
(
src_num
);
for
(
int
step_id
=
step_num
-
1
;
step_id
>=
0
;
--
step_id
)
{
auto
&
cur_ids
=
step_ids
.
at
(
step_id
);
auto
&
cur_scores
=
step_scores
.
at
(
step_id
);
for
(
size_t
src_idx
=
0
;
src_idx
<
src_num
;
++
src_idx
)
{
// for each source sentence
auto
&
sentence_vector
=
sentence_vector_list
.
at
(
src_idx
);
auto
&
prefix_idx_vector
=
prefix_idx_vector_list
.
at
(
src_idx
);
size_t
src_prefix_start
=
cur_ids
.
lod
().
at
(
kSourceLevel
)[
src_idx
];
size_t
src_prefix_end
=
cur_ids
.
lod
().
at
(
kSourceLevel
)[
src_idx
+
1
];
if
(
prefix_idx_vector
.
empty
())
{
// be finished and pruned at this step
// or the last time step
for
(
size_t
prefix_idx
=
src_prefix_start
;
prefix_idx
<
src_prefix_end
;
++
prefix_idx
)
{
size_t
candidate_start
=
cur_ids
.
lod
().
at
(
kSentenceLevel
)[
prefix_idx
];
size_t
candidate_end
=
cur_ids
.
lod
().
at
(
kSentenceLevel
)[
prefix_idx
+
1
];
for
(
size_t
candidate_idx
=
candidate_start
;
candidate_idx
<
candidate_end
;
++
candidate_idx
)
{
prefix_idx_vector
.
push_back
(
prefix_idx
);
size_t
idx
=
prefix_idx_vector
.
size
()
-
1
;
auto
cur_id
=
cur_ids
.
data
<
int64_t
>
()[
candidate_idx
];
auto
cur_score
=
cur_scores
.
data
<
T
>
()[
candidate_idx
];
sentence_vector
.
at
(
idx
).
word_ids
.
push_back
(
cur_id
);
sentence_vector
.
at
(
idx
).
scores
.
push_back
(
cur_score
);
}
}
}
else
{
// use prefix_idx_vector to backtrace
size_t
src_candidate_start
=
cur_ids
.
lod
().
at
(
kSentenceLevel
)[
src_prefix_start
];
size_t
prefix_idx
=
src_prefix_start
;
size_t
candidate_num
=
cur_ids
.
lod
().
at
(
kSentenceLevel
)[
prefix_idx
+
1
]
-
cur_ids
.
lod
().
at
(
kSentenceLevel
)[
prefix_idx
];
for
(
size_t
idx
=
0
;
idx
<
prefix_idx_vector
.
size
();
++
idx
)
{
auto
candidate_idx
=
prefix_idx_vector
.
at
(
idx
);
auto
cur_id
=
cur_ids
.
data
<
int64_t
>
()[
candidate_idx
];
auto
cur_score
=
cur_scores
.
data
<
T
>
()[
candidate_idx
];
if
(
cur_id
!=
end_id_
||
sentence_vector
.
at
(
idx
).
word_ids
.
empty
())
{
// to skip redundant end tokens
sentence_vector
.
at
(
idx
).
word_ids
.
push_back
(
cur_id
);
sentence_vector
.
at
(
idx
).
scores
.
push_back
(
cur_score
);
}
while
(
src_candidate_start
+
candidate_num
<=
candidate_idx
)
{
// search the corresponding prefix
prefix_idx
++
;
candidate_num
+=
cur_ids
.
lod
().
at
(
kSentenceLevel
)[
prefix_idx
+
1
]
-
cur_ids
.
lod
().
at
(
kSentenceLevel
)[
prefix_idx
];
}
prefix_idx_vector
.
at
(
idx
)
=
prefix_idx
;
}
}
}
}
ConvertSentenceVectorToLodTensor
(
sentence_vector_list
,
id_tensor
,
score_tensor
,
true
,
true
);
}
struct
BeamSearchDecodeFunctor
{
BeamSearchDecodeFunctor
(
const
LoDTensorArray
&
step_ids
,
const
LoDTensorArray
&
step_scores
,
LoDTensor
*
id_tensor
,
LoDTensor
*
score_tensor
,
size_t
beam_size
,
int
end_id
)
:
beam_size_
(
beam_size
),
end_id_
(
end_id
),
step_ids_
(
step_ids
),
step_scores_
(
step_scores
),
id_tensor_
(
id_tensor
),
score_tensor_
(
score_tensor
)
{}
template
<
typename
T
>
void
apply
()
const
;
size_t
beam_size_
;
int
end_id_
;
const
LoDTensorArray
&
step_ids_
;
const
LoDTensorArray
&
step_scores_
;
LoDTensor
*
id_tensor_
;
LoDTensor
*
score_tensor_
;
};
template
<
typename
T
>
void
BeamSearchDecodeFunctor
::
apply
()
const
{
BeamSearchDecoder
<
T
>
beam_search_decoder
(
beam_size_
,
end_id_
);
beam_search_decoder
.
Backtrace
(
step_ids_
,
step_scores_
,
id_tensor_
,
score_tensor_
);
}
template
<
>
void
BeamSearchDecodeFunctor
::
apply
<
bool
>
()
const
{
PADDLE_MOBILE_THROW_EXCEPTION
(
"beam search decode op does not support bool."
);
}
template
<
>
bool
BeamSearchDecodeKernel
<
CPU
,
float
>::
Init
(
BeamSearchDecodeParam
<
CPU
>
*
param
)
{
BeamSearchDecodeParam
<
CPU
>
*
param
)
{
return
true
;
}
template
<
>
void
BeamSearchDecodeKernel
<
CPU
,
float
>::
Compute
(
const
BeamSearchDecodeParam
<
CPU
>
&
param
)
{
// TODO(hjchen2)
DLOG
<<
"BeamSearchDecodeKernel"
;
param
.
sentence_scores_
->
Resize
(
framework
::
make_ddim
({
10
}));
param
.
sentence_scores_
->
mutable_data
<
float
>
();
DLOG
<<
"BeamSearchDecodeKernel"
;
param
.
sentence_ids_
->
Resize
(
framework
::
make_ddim
({
10
}));
param
.
sentence_ids_
->
mutable_data
<
int64_t
>
();
const
BeamSearchDecodeParam
<
CPU
>&
param
)
{
const
LoDTensorArray
*
ids
=
param
.
ids_
;
const
LoDTensorArray
*
scores
=
param
.
scores_
;
const
size_t
step_num
=
ids
->
size
();
PADDLE_MOBILE_ENFORCE
(
step_num
>
0
,
"beam search steps should be larger than 0"
);
for
(
size_t
i
=
0
;
i
<
step_num
;
++
i
)
{
PADDLE_MOBILE_ENFORCE
(
ids
->
at
(
i
).
lod
().
size
()
==
2
,
"Level of LodTensor should be 2"
);
}
const
size_t
source_num
=
ids
->
at
(
0
).
lod
().
at
(
0
).
size
()
-
1
;
PADDLE_MOBILE_ENFORCE
(
source_num
>
0
,
"source num should be larger than 0"
);
LoDTensor
*
sentence_ids
=
param
.
sentence_ids_
;
LoDTensor
*
sentence_scores
=
param
.
sentence_scores_
;
framework
::
VisitDataType
(
framework
::
ToDataType
(
scores
->
at
(
0
).
type
()),
BeamSearchDecodeFunctor
(
*
ids
,
*
scores
,
sentence_ids
,
sentence_scores
,
param
.
beam_size_
,
param
.
end_id_
));
}
}
// namespace operators
...
...
src/operators/kernel/arm/conv_add_bn_relu_kernel.cpp
浏览文件 @
6641a314
...
...
@@ -41,8 +41,8 @@ bool ConvAddBNReluKernel<CPU, float>::Init(
inv_std_ptr
[
i
]
=
1
/
static_cast
<
float
>
(
pow
((
variance_ptr
[
i
]
+
epsilon
),
0.5
));
}
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
LoDTensor
*
new_scale
=
new
LoD
Tensor
();
LoDTensor
*
new_bias
=
new
LoD
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
C
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
C
});
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
...
...
src/operators/kernel/arm/conv_bn_add_relu_kernel.cpp
浏览文件 @
6641a314
...
...
@@ -41,8 +41,8 @@ bool ConvBNAddReluKernel<CPU, float>::Init(
inv_std_ptr
[
i
]
=
1
/
static_cast
<
float
>
(
pow
((
variance_ptr
[
i
]
+
epsilon
),
0.5
));
}
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
LoDTensor
*
new_scale
=
new
LoD
Tensor
();
LoDTensor
*
new_bias
=
new
LoD
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
C
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
C
});
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
...
...
src/operators/kernel/arm/conv_bn_relu_kernel.cpp
浏览文件 @
6641a314
...
...
@@ -42,8 +42,8 @@ bool ConvBNReluKernel<CPU, float>::Init(FusionConvBNReluParam<CPU> *param) {
inv_std_ptr
[
i
]
=
1
/
static_cast
<
float
>
(
pow
((
variance_ptr
[
i
]
+
epsilon
),
0.5
));
}
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
LoDTensor
*
new_scale
=
new
LoD
Tensor
();
LoDTensor
*
new_bias
=
new
LoD
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
C
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
C
});
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
...
...
src/operators/kernel/arm/conv_kernel.cpp
浏览文件 @
6641a314
...
...
@@ -69,7 +69,7 @@ bool ConvKernel<CPU, float>::Init(ConvParam<CPU> *param) {
param
->
Input
()
->
dims
()[
2
]
<=
140
/* refered from ncnn */
)
{
param
->
ExecMode
()
=
ConvParam
<
CPU
>::
EXEC_WINOGRAD3X3_FLOAT
;
// transform weight
param
->
transformed_filter_
=
new
framework
::
Tensor
;
param
->
transformed_filter_
=
new
framework
::
LoD
Tensor
;
operators
::
math
::
winograd_transform_weight
<
8
,
3
>
(
*
param
->
Filter
(),
param
->
transformed_filter_
);
#endif
...
...
src/operators/kernel/arm/dwconv_bn_relu_kernel.cpp
浏览文件 @
6641a314
...
...
@@ -40,8 +40,8 @@ bool DWConvBNReluKernel<CPU, float>::Init(FusionDWConvBNReluParam<CPU> *param) {
inv_std_ptr
[
i
]
=
1
/
static_cast
<
float
>
(
pow
((
variance_ptr
[
i
]
+
epsilon
),
0.5
));
}
Tensor
*
new_scale
=
new
Tensor
();
Tensor
*
new_bias
=
new
Tensor
();
LoDTensor
*
new_scale
=
new
LoD
Tensor
();
LoDTensor
*
new_bias
=
new
LoD
Tensor
();
auto
new_scale_ptr
=
new_scale
->
mutable_data
<
float
>
({
C
});
auto
new_bias_ptr
=
new_bias
->
mutable_data
<
float
>
({
C
});
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
...
...
src/operators/kernel/arm/sequence_softmax_kernel.cpp
浏览文件 @
6641a314
...
...
@@ -29,12 +29,10 @@ class SequenceSoftmaxKernel<CPU, T>
void
Compute
(
const
SoftmaxParam
<
CPU
>
&
param
)
{
param
.
Out
()
->
mutable_data
<
float
>
();
/*
const
framework
::
LoDTensor
*
input
=
param
.
InputX
();
framework
::
LoDTensor
*
output
=
param
.
Out
();
math
::
SequenceSoftmaxFuntor
<
CPU
,
T
>
sequence_softmax
;
sequence_softmax
(
input
,
output
);
*/
}
};
...
...
src/operators/op_param.h
浏览文件 @
6641a314
此差异已折叠。
点击以展开。
src/operators/softmax_op.cpp
浏览文件 @
6641a314
...
...
@@ -21,6 +21,7 @@ namespace operators {
template
<
typename
DeviceType
,
typename
T
>
void
SoftmaxOp
<
DeviceType
,
T
>::
InferShape
()
const
{
this
->
param_
.
Out
()
->
Resize
(
this
->
param_
.
InputX
()
->
dims
());
this
->
param_
.
Out
()
->
set_lod
(
this
->
param_
.
InputX
()
->
lod
());
}
}
// namespace operators
...
...
src/operators/top_k_op.cpp
浏览文件 @
6641a314
...
...
@@ -26,11 +26,9 @@ void TopKOp<DeviceType, T>::InferShape() const {
// should check k <= dims[-1] && k >= 1
dims
[
dims
.
size
()
-
1
]
=
k
;
this
->
param_
.
output_
->
Resize
(
dims
);
// this->param_.output_->set_lod(this->param_.input_->lod());
this
->
param_
.
output_
->
set_lod
({{
0
,
1
}});
this
->
param_
.
indices_
->
Resize
(
dims
);
// this->param_.indices
_->set_lod(this->param_.input_->lod());
this
->
param_
.
indices_
->
set_lod
(
{{
0
,
1
}}
);
this
->
param_
.
output
_
->
set_lod
(
this
->
param_
.
input_
->
lod
());
this
->
param_
.
indices_
->
set_lod
(
this
->
param_
.
input_
->
lod
()
);
}
}
// namespace operators
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录