提交 6641a314 编写于 作者: H hjchen2

Delivery input lod to output for elementwise_add/top_k/activation ops

上级 276ec6f9
......@@ -17,11 +17,12 @@ limitations under the License. */
namespace paddle_mobile {
namespace operators {
#define DEFINE_ACTIVATION_INFERSHAPE(OpName) \
template <typename Dtype, typename T> \
void OpName##Op<Dtype, T>::InferShape() const { \
const auto &input_dims = this->param_.InputX()->dims(); \
this->param_.Out()->Resize(input_dims); \
#define DEFINE_ACTIVATION_INFERSHAPE(OpName) \
template <typename Dtype, typename T> \
void OpName##Op<Dtype, T>::InferShape() const { \
const auto &input_dims = this->param_.InputX()->dims(); \
this->param_.Out()->Resize(input_dims); \
this->param_.Out()->set_lod(this->param_.InputX()->lod()); \
}
#ifdef RELU_OP
......
......@@ -23,6 +23,7 @@ template <typename Dtype, typename T>
void ElementwiseAddOp<Dtype, T>::InferShape() const {
auto x_dim = this->param_.InputX()->dims();
this->param_.Out()->Resize(x_dim);
this->param_.Out()->set_lod(this->param_.InputX()->lod());
}
} // namespace operators
......
......@@ -15,27 +15,260 @@ limitations under the License. */
#ifdef BEAM_SEARCH_DECODE_OP
#include "operators/kernel/beam_search_decode_kernel.h"
#include "framework/data_type.h"
namespace paddle_mobile {
namespace operators {
using LoDTensor = framework::LoDTensor;
using LoDTensorArray = framework::LoDTensorArray;
// all the lod have 2 levels.
// The first is source level, the second is sentence level.
// source level describe how many prefixes (branchs) for each source sentece
// (beam). sentence level describe how these candidates belong to the prefixes.
const size_t kSourceLevel = 0;
const size_t kSentenceLevel = 1;
template <typename T>
struct Sentence {
std::vector<int64_t> word_ids;
std::vector<T> scores;
};
template <typename T>
using SentenceVector = std::vector<Sentence<T>>;
template <typename T>
struct BeamSearchDecoder {
BeamSearchDecoder(size_t beam_size, int end_id)
: beam_size_(beam_size), end_id_(end_id) {}
/**
* convert the result sentence_vector for each source sentence into two
* LodTensor.
* One is all candidate sentences with word id, one is all candidate sentences
* with word score.
* Param:
* sentence_vector_list: sentence_vector for each source sentence.
* id_tensor: result LoDTensor for sentences of id.
* score_tensor: result LoDTensor for sentences of score.
* reverse: whether ids of sentence in sentence_vector_list is reversed
* sort_by_score: whether to sort hypotheses of each sentence by scores.
*/
void ConvertSentenceVectorToLodTensor(
std::vector<SentenceVector<T>> sentence_vector_list, LoDTensor* id_tensor,
LoDTensor* score_tensor, bool reverse = true,
bool sort_by_score = true) const;
/**
* Gather the hypotheses for each source sentence by backtrace though the
* LoDTensorArray step_ids whose lods reserve the path in the tree.
*/
void Backtrace(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores, LoDTensor* id_tensor,
LoDTensor* score_tensor) const;
size_t beam_size_;
int end_id_;
};
template <typename T>
void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
std::vector<SentenceVector<T>> sentence_vector_list, LoDTensor* id_tensor,
LoDTensor* score_tensor, bool reverse, bool sort_by_score) const {
size_t src_num = sentence_vector_list.size();
PADDLE_MOBILE_ENFORCE(src_num > 0, "src_num should be larger than 0");
std::vector<size_t> source_level_lod = {0};
std::vector<size_t> sentence_level_lod = {0};
std::vector<int64_t> id_data;
std::vector<T> score_data;
for (size_t src_idx = 0; src_idx < src_num; ++src_idx) {
if (sort_by_score) {
sort(sentence_vector_list[src_idx].begin(),
sentence_vector_list[src_idx].end(),
[reverse](const Sentence<T>& a, const Sentence<T>& b) {
if (reverse)
return a.scores.front() > b.scores.front();
else
return a.scores.back() > b.scores.back();
});
}
for (Sentence<T>& sentence : sentence_vector_list[src_idx]) {
if (reverse) {
id_data.insert(id_data.end(), sentence.word_ids.rbegin(),
sentence.word_ids.rend());
score_data.insert(score_data.end(), sentence.scores.rbegin(),
sentence.scores.rend());
} else {
id_data.insert(id_data.end(), sentence.word_ids.begin(),
sentence.word_ids.end());
score_data.insert(score_data.end(), sentence.scores.begin(),
sentence.scores.end());
}
sentence_level_lod.push_back(sentence_level_lod.back() +
sentence.word_ids.size());
}
source_level_lod.push_back(source_level_lod.back() +
sentence_vector_list[src_idx].size());
}
framework::LoD lod;
lod.push_back(source_level_lod);
lod.push_back(sentence_level_lod);
id_tensor->set_lod(lod);
id_tensor->Resize({static_cast<int64_t>(id_data.size())});
id_tensor->mutable_data<int64_t>();
// framework::TensorFromVector<int64_t>(id_data, cpu_ctx, id_tensor);
score_tensor->set_lod(lod);
score_tensor->Resize({static_cast<int64_t>(score_data.size())});
score_tensor->mutable_data<T>();
// framework::TensorFromVector<T>(score_data, cpu_ctx, score_tensor);
}
template <typename T>
void BeamSearchDecoder<T>::Backtrace(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores,
LoDTensor* id_tensor,
LoDTensor* score_tensor) const {
PADDLE_MOBILE_ENFORCE(!step_ids.empty(), "step num should be larger than 0");
PADDLE_MOBILE_ENFORCE(step_ids.size() == step_scores.size(),
"step_ids and step_scores should be the same");
const size_t step_num = step_ids.size();
const size_t src_num = step_ids.at(0).lod().at(kSourceLevel).size() - 1;
std::vector<SentenceVector<T>> sentence_vector_list(
src_num, SentenceVector<T>(beam_size_));
std::vector<std::vector<size_t>> prefix_idx_vector_list(src_num);
for (int step_id = step_num - 1; step_id >= 0; --step_id) {
auto& cur_ids = step_ids.at(step_id);
auto& cur_scores = step_scores.at(step_id);
for (size_t src_idx = 0; src_idx < src_num; ++src_idx) {
// for each source sentence
auto& sentence_vector = sentence_vector_list.at(src_idx);
auto& prefix_idx_vector = prefix_idx_vector_list.at(src_idx);
size_t src_prefix_start = cur_ids.lod().at(kSourceLevel)[src_idx];
size_t src_prefix_end = cur_ids.lod().at(kSourceLevel)[src_idx + 1];
if (prefix_idx_vector.empty()) { // be finished and pruned at this step
// or the last time step
for (size_t prefix_idx = src_prefix_start; prefix_idx < src_prefix_end;
++prefix_idx) {
size_t candidate_start = cur_ids.lod().at(kSentenceLevel)[prefix_idx];
size_t candidate_end =
cur_ids.lod().at(kSentenceLevel)[prefix_idx + 1];
for (size_t candidate_idx = candidate_start;
candidate_idx < candidate_end; ++candidate_idx) {
prefix_idx_vector.push_back(prefix_idx);
size_t idx = prefix_idx_vector.size() - 1;
auto cur_id = cur_ids.data<int64_t>()[candidate_idx];
auto cur_score = cur_scores.data<T>()[candidate_idx];
sentence_vector.at(idx).word_ids.push_back(cur_id);
sentence_vector.at(idx).scores.push_back(cur_score);
}
}
} else { // use prefix_idx_vector to backtrace
size_t src_candidate_start =
cur_ids.lod().at(kSentenceLevel)[src_prefix_start];
size_t prefix_idx = src_prefix_start;
size_t candidate_num =
cur_ids.lod().at(kSentenceLevel)[prefix_idx + 1] -
cur_ids.lod().at(kSentenceLevel)[prefix_idx];
for (size_t idx = 0; idx < prefix_idx_vector.size(); ++idx) {
auto candidate_idx = prefix_idx_vector.at(idx);
auto cur_id = cur_ids.data<int64_t>()[candidate_idx];
auto cur_score = cur_scores.data<T>()[candidate_idx];
if (cur_id != end_id_ || sentence_vector.at(idx).word_ids.empty()) {
// to skip redundant end tokens
sentence_vector.at(idx).word_ids.push_back(cur_id);
sentence_vector.at(idx).scores.push_back(cur_score);
}
while (src_candidate_start + candidate_num <=
candidate_idx) { // search the corresponding prefix
prefix_idx++;
candidate_num += cur_ids.lod().at(kSentenceLevel)[prefix_idx + 1] -
cur_ids.lod().at(kSentenceLevel)[prefix_idx];
}
prefix_idx_vector.at(idx) = prefix_idx;
}
}
}
}
ConvertSentenceVectorToLodTensor(sentence_vector_list, id_tensor,
score_tensor, true, true);
}
struct BeamSearchDecodeFunctor {
BeamSearchDecodeFunctor(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores,
LoDTensor* id_tensor, LoDTensor* score_tensor,
size_t beam_size, int end_id)
: beam_size_(beam_size),
end_id_(end_id),
step_ids_(step_ids),
step_scores_(step_scores),
id_tensor_(id_tensor),
score_tensor_(score_tensor) {}
template <typename T>
void apply() const;
size_t beam_size_;
int end_id_;
const LoDTensorArray& step_ids_;
const LoDTensorArray& step_scores_;
LoDTensor* id_tensor_;
LoDTensor* score_tensor_;
};
template <typename T>
void BeamSearchDecodeFunctor::apply() const {
BeamSearchDecoder<T> beam_search_decoder(beam_size_, end_id_);
beam_search_decoder.Backtrace(step_ids_, step_scores_, id_tensor_,
score_tensor_);
}
template <>
void BeamSearchDecodeFunctor::apply<bool>() const {
PADDLE_MOBILE_THROW_EXCEPTION("beam search decode op does not support bool.");
}
template <>
bool BeamSearchDecodeKernel<CPU, float>::Init(
BeamSearchDecodeParam<CPU> *param) {
BeamSearchDecodeParam<CPU>* param) {
return true;
}
template <>
void BeamSearchDecodeKernel<CPU, float>::Compute(
const BeamSearchDecodeParam<CPU> &param) {
// TODO(hjchen2)
DLOG << "BeamSearchDecodeKernel";
param.sentence_scores_->Resize(framework::make_ddim({10}));
param.sentence_scores_->mutable_data<float>();
DLOG << "BeamSearchDecodeKernel";
param.sentence_ids_->Resize(framework::make_ddim({10}));
param.sentence_ids_->mutable_data<int64_t>();
const BeamSearchDecodeParam<CPU>& param) {
const LoDTensorArray* ids = param.ids_;
const LoDTensorArray* scores = param.scores_;
const size_t step_num = ids->size();
PADDLE_MOBILE_ENFORCE(step_num > 0,
"beam search steps should be larger than 0");
for (size_t i = 0; i < step_num; ++i) {
PADDLE_MOBILE_ENFORCE(ids->at(i).lod().size() == 2,
"Level of LodTensor should be 2");
}
const size_t source_num = ids->at(0).lod().at(0).size() - 1;
PADDLE_MOBILE_ENFORCE(source_num > 0, "source num should be larger than 0");
LoDTensor* sentence_ids = param.sentence_ids_;
LoDTensor* sentence_scores = param.sentence_scores_;
framework::VisitDataType(
framework::ToDataType(scores->at(0).type()),
BeamSearchDecodeFunctor(*ids, *scores, sentence_ids, sentence_scores,
param.beam_size_, param.end_id_));
}
} // namespace operators
......
......@@ -41,8 +41,8 @@ bool ConvAddBNReluKernel<CPU, float>::Init(
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
LoDTensor *new_scale = new LoDTensor();
LoDTensor *new_bias = new LoDTensor();
auto new_scale_ptr = new_scale->mutable_data<float>({C});
auto new_bias_ptr = new_bias->mutable_data<float>({C});
for (int i = 0; i < C; i++) {
......
......@@ -41,8 +41,8 @@ bool ConvBNAddReluKernel<CPU, float>::Init(
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
LoDTensor *new_scale = new LoDTensor();
LoDTensor *new_bias = new LoDTensor();
auto new_scale_ptr = new_scale->mutable_data<float>({C});
auto new_bias_ptr = new_bias->mutable_data<float>({C});
for (int i = 0; i < C; i++) {
......
......@@ -42,8 +42,8 @@ bool ConvBNReluKernel<CPU, float>::Init(FusionConvBNReluParam<CPU> *param) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
LoDTensor *new_scale = new LoDTensor();
LoDTensor *new_bias = new LoDTensor();
auto new_scale_ptr = new_scale->mutable_data<float>({C});
auto new_bias_ptr = new_bias->mutable_data<float>({C});
for (int i = 0; i < C; i++) {
......
......@@ -69,7 +69,7 @@ bool ConvKernel<CPU, float>::Init(ConvParam<CPU> *param) {
param->Input()->dims()[2] <= 140 /* refered from ncnn */) {
param->ExecMode() = ConvParam<CPU>::EXEC_WINOGRAD3X3_FLOAT;
// transform weight
param->transformed_filter_ = new framework::Tensor;
param->transformed_filter_ = new framework::LoDTensor;
operators::math::winograd_transform_weight<8, 3>(
*param->Filter(), param->transformed_filter_);
#endif
......
......@@ -40,8 +40,8 @@ bool DWConvBNReluKernel<CPU, float>::Init(FusionDWConvBNReluParam<CPU> *param) {
inv_std_ptr[i] =
1 / static_cast<float>(pow((variance_ptr[i] + epsilon), 0.5));
}
Tensor *new_scale = new Tensor();
Tensor *new_bias = new Tensor();
LoDTensor *new_scale = new LoDTensor();
LoDTensor *new_bias = new LoDTensor();
auto new_scale_ptr = new_scale->mutable_data<float>({C});
auto new_bias_ptr = new_bias->mutable_data<float>({C});
for (int i = 0; i < C; i++) {
......
......@@ -29,12 +29,10 @@ class SequenceSoftmaxKernel<CPU, T>
void Compute(const SoftmaxParam<CPU> &param) {
param.Out()->mutable_data<float>();
/*
const framework::LoDTensor *input = param.InputX();
framework::LoDTensor *output = param.Out();
math::SequenceSoftmaxFuntor<CPU, T> sequence_softmax;
sequence_softmax(input, output);
*/
const framework::LoDTensor *input = param.InputX();
framework::LoDTensor *output = param.Out();
math::SequenceSoftmaxFuntor<CPU, T> sequence_softmax;
sequence_softmax(input, output);
}
};
......
此差异已折叠。
......@@ -21,6 +21,7 @@ namespace operators {
template <typename DeviceType, typename T>
void SoftmaxOp<DeviceType, T>::InferShape() const {
this->param_.Out()->Resize(this->param_.InputX()->dims());
this->param_.Out()->set_lod(this->param_.InputX()->lod());
}
} // namespace operators
......
......@@ -26,11 +26,9 @@ void TopKOp<DeviceType, T>::InferShape() const {
// should check k <= dims[-1] && k >= 1
dims[dims.size() - 1] = k;
this->param_.output_->Resize(dims);
// this->param_.output_->set_lod(this->param_.input_->lod());
this->param_.output_->set_lod({{0, 1}});
this->param_.indices_->Resize(dims);
// this->param_.indices_->set_lod(this->param_.input_->lod());
this->param_.indices_->set_lod({{0, 1}});
this->param_.output_->set_lod(this->param_.input_->lod());
this->param_.indices_->set_lod(this->param_.input_->lod());
}
} // namespace operators
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册