提交 884ce5d5 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into cmake_speed

......@@ -54,7 +54,6 @@ void MKLDNNAddtoLayer::reshape(
ow = iw;
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNAddtoLayer::resetFwd(std::vector<primitive>& pipeline,
......
......@@ -125,7 +125,6 @@ void MKLDNNBatchNormLayer::reshape(
<< "Input channel can not be changed";
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNBatchNormLayer::resetFwd(std::vector<primitive>& pipeline,
......
......@@ -102,8 +102,6 @@ void MKLDNNConvLayer::reshape(
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNConvLayer::resetFwd(std::vector<primitive>& pipeline,
......
......@@ -92,7 +92,7 @@ public:
void printSizeInfo() override {
MKLDNNLayer::printSizeInfo();
VLOG(MKLDNN_SIZES) << getName() << ": fh: " << fh_ << ", fw: " << fw_
<< ": ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_
<< ", ph: " << ph_ << ", pw: " << pw_ << ", sh: " << sh_
<< ", sw: " << sw_ << ", dh: " << dh_ << ", dw: " << dw_;
}
......
......@@ -84,8 +84,6 @@ void MKLDNNFcLayer::reshape(
reshapeOutput(oh, ow);
resizeOutput(bs, oc);
printSizeInfo();
}
void MKLDNNFcLayer::resetFwd(std::vector<primitive>& pipeline,
......
......@@ -71,8 +71,6 @@ void MKLDNNPoolLayer::reshape(
reshapeOutput(oh, ow);
resizeOutput(bs, oc * oh * ow);
printSizeInfo();
}
void MKLDNNPoolLayer::resetFwd(std::vector<primitive>& pipeline,
......
......@@ -98,7 +98,7 @@ void ROIPoolLayer::forward(PassType passType) {
size_t roiStartH = round(bottomROIs[2] * spatialScale_);
size_t roiEndW = round(bottomROIs[3] * spatialScale_);
size_t roiEndH = round(bottomROIs[4] * spatialScale_);
CHECK_GE(roiBatchIdx, 0);
CHECK_GE(roiBatchIdx, 0UL);
CHECK_LT(roiBatchIdx, batchSize);
size_t roiHeight = std::max(roiEndH - roiStartH + 1, 1UL);
size_t roiWidth = std::max(roiEndW - roiStartW + 1, 1UL);
......
......@@ -297,7 +297,7 @@ static void getAddtoConfig(TestConfig& cfg,
}
void testAddtoLayer(const testImageDesc& pm, const size_t nInputs) {
CHECK_GE(nInputs, 1);
CHECK_GE(nInputs, 1UL);
TestConfig dnnConfig;
getAddtoConfig(dnnConfig, pm, nInputs);
dnnConfig.layerConfig.set_type("mkldnn_addto");
......
......@@ -152,12 +152,7 @@ void MKLDNNMatrix::downSpatial() {
}
memory::desc md = memory::desc(dstDims, getDtype(), dstFmt);
memory::primitive_desc pd = memory::primitive_desc(md, getEngine());
mkldnn_primitive_t result;
mkldnn::error::wrap_c_api(
mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
"could not create a memory primitive");
reset(result);
set_data_handle(data_);
resetMKLDNNMemory(pd, data_);
}
} // namespace paddle
......@@ -145,6 +145,27 @@ public:
m_.reset();
}
/**
* override the CpuMatrix::resize
*/
void resize(size_t newHeight, size_t newWidth) override {
m_->resize(newHeight, newWidth);
if (data_ == m_->getData() && elementCnt_ == newHeight * newWidth) {
return;
}
CpuMatrix::setData(data_);
height_ = newHeight;
width_ = newWidth;
elementCnt_ = newHeight * newWidth;
stride_ = width_;
auto pd = mkldnn::memory::primitive_desc(
mkldnn::memory::desc({(int)newHeight, (int)newWidth},
getDtype(),
mkldnn::memory::format::nc),
getEngine());
resetMKLDNNMemory(pd, data_);
}
/**
* override Matrix::getData
* check data before return
......@@ -215,6 +236,17 @@ protected:
memory::format srcFmt,
memory::format dstFmt,
memory::dims dm);
/**
* reset this MKLDNN Memory from primitve desc
*/
void resetMKLDNNMemory(memory::primitive_desc pd, real* data) {
mkldnn_primitive_t result;
mkldnn::error::wrap_c_api(
mkldnn_primitive_create(&result, pd.get(), nullptr, nullptr),
"could not create a memory primitive");
reset(result);
set_data_handle(data);
}
private:
// save the CpuMatrixPtr in case the buffer released outside
......
......@@ -225,6 +225,7 @@ set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")
cc_test(gather_test SRCS gather_test.cc DEPS tensor)
cc_test(net_op_test SRCS net_op_test.cc DEPS net_op)
cc_test(scatter_test SRCS scatter_test.cc DEPS tensor)
cc_test(beam_search_decode_op_test SRCS beam_search_decode_op_test.cc DEPS lod_tensor)
cc_test(strided_memcpy_test SRCS strided_memcpy_test.cc DEPS tensor paddle_memory)
cc_test(dynamic_recurrent_op_test SRCS dynamic_recurrent_op_test.cc
rnn/recurrent_op_utils.cc
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/beam_search_decode_op.h"
namespace paddle {
namespace operators {
class BeamSearchDecodeOp : public framework::OperatorBase {
public:
BeamSearchDecodeOp(const std::string& type,
const framework::VariableNameMap& inputs,
const framework::VariableNameMap& outputs,
const framework::AttributeMap& attrs)
: OperatorBase(type, inputs, outputs, attrs) {}
void Run(const framework::Scope& scope,
const platform::DeviceContext& dev_ctx) const override {
framework::ExecutionContext ctx(*this, scope, dev_ctx);
const LoDTensorArray* ids = ctx.Input<LoDTensorArray>("Ids");
const LoDTensorArray* scores = ctx.Input<LoDTensorArray>("Scores");
const size_t step_num = ids->size();
PADDLE_ENFORCE_GT(step_num, 0UL,
"beam search steps should be larger than 0");
const size_t source_num = ids->at(0).lod().at(0).size() - 1;
PADDLE_ENFORCE_GT(source_num, 0UL, "source num should be larger than 0");
for (size_t i = 0; i < step_num; ++i) {
PADDLE_ENFORCE_EQ(ids->at(i).lod().size(), 2UL,
"Level of LodTensor should be 2");
}
// prepare output
LoDTensor* sentenceIds = ctx.Output<LoDTensor>("SentenceIds");
LoDTensor* sentenceScores = ctx.Output<LoDTensor>("SentenceScores");
BeamSearchDecoder<float> beam_search_decoder;
beam_search_decoder.PackAllSteps(*ids, *scores, sentenceIds,
sentenceScores);
}
};
class BeamSearchDecodeOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public:
BeamSearchDecodeOpProtoMaker(framework::OpProto* proto,
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("Ids",
"(LodTensorArray)"
"score of the candidate words in each step");
AddInput("Scores",
"(LodTensorArray)"
"score of the candidate words in each step");
AddOutput("SentenceIds",
"(LodTensor)"
"All possible result sentences of word ids");
AddOutput("SentenceScores",
"(LodTensor)"
"All possible result sentences of word scores");
AddComment(R"DOC(
Pack the result of Beam search op into SentenceIds and SentenceScores.
)DOC");
}
};
class BeamSearchDecodeInferShape : public framework::InferShapeBase {
public:
void operator()(framework::InferShapeContext* context) const override {
PADDLE_ENFORCE(context->HasInput("Ids"),
"BeamSearchDecodeOp must has input Ids");
PADDLE_ENFORCE(context->HasInput("Scores"),
"BeamSearchDecodeOp must has input Scores");
PADDLE_ENFORCE(context->HasOutput("SentenceIds"),
"BeamSearchDecodeOp must has output SentenceIds");
PADDLE_ENFORCE(context->HasOutput("SentenceScores"),
"BeamSearchDecodeOp must has output SentenceScores");
}
};
class BeamSearchDecodeInferVarType : public framework::VarTypeInference {
public:
void operator()(const framework::OpDescBind& op_desc,
framework::BlockDescBind* block) const override {
for (auto& o : op_desc.Output("SentenceIds")) {
block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR);
}
for (auto& o : op_desc.Output("SentenceScores")) {
block->Var(o)->SetType(framework::VarDesc::LOD_TENSOR);
}
}
};
} // namespace operators
} // namespace paddle
REGISTER_OPERATOR(beam_search_decode, paddle::operators::BeamSearchDecodeOp,
paddle::operators::BeamSearchDecodeOpProtoMaker,
paddle::operators::BeamSearchDecodeInferShape,
paddle::operators::BeamSearchDecodeInferVarType,
paddle::framework::EmptyGradOpMaker);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h"
namespace paddle {
namespace operators {
using LoDTensor = framework::LoDTensor;
using LoDTensorArray = framework::LoDTensorArray;
// all the lod have 2 levels.
// The First is source level, the second is sentence level.
// source level describe how many candidate words for this source.
// sentence level describe these candidates belong to which prefix
const size_t kSourceLevel = 0;
const size_t kSentenceLevel = 1;
template <typename T>
struct BeamNode {
BeamNode(int64_t word_id, T score) : word_id_(word_id), score_(score) {}
~BeamNode() {
if (parent_) {
parent_->DropKid(this);
if (parent_->kids_.size() == 0UL) {
delete parent_;
}
}
VLOG(3) << "Delete BeamNode root with word_id:" << this->word_id_;
}
void AppendTo(BeamNode* parent) {
parent_ = parent;
parent->kids_.insert(this);
}
void DropKid(BeamNode* kid) { kids_.erase(kid); }
BeamNode* parent_ = nullptr;
std::unordered_set<BeamNode*> kids_;
int64_t word_id_;
T score_;
};
template <typename T>
using BeamNodeVector = std::vector<std::unique_ptr<BeamNode<T>>>;
template <typename T>
struct Sentence {
std::vector<int64_t> word_ids;
std::vector<T> scores;
};
template <typename T>
using SentenceVector = std::vector<Sentence<T>>;
template <typename T>
struct BeamSearchDecoder {
/**
* make a BeamNode and all it's related prefix BeanNode into a Sentence.
*/
Sentence<T> MakeSentence(const BeamNode<T>* node) const;
/**
* Param:
* cur_ids: LoDTensor of One step for word ID
* cur_scores: LoDTensor of One Step for word score
* prefixes_list: prefixes for each source sentence.
* sentence_vector_list: result sentence_vector for each source sentence.
* Return:
* a new prefixes list for each source of current step
*/
std::vector<BeamNodeVector<T>> PackTwoSteps(
const LoDTensor& cur_ids, const LoDTensor& cur_scores,
std::vector<BeamNodeVector<T>>& prefixes_list,
std::vector<SentenceVector<T>>* sentence_vector_list) const;
/**
* convert the result sentence_vector for each source sentence into two
* LodTensor.
* One is all candidate sentences with word id, one is all candidate sentences
* with word score.
* Param:
* sentence_vector_list: sentence_vector for each source sentence.
* id_tensor: result LoDTensor for sentences of id.
* score_tensor: result LoDTensor for sentences of score.
*/
void ConvertSentenceVectorToLodTensor(
std::vector<SentenceVector<T>> sentence_vector_list, LoDTensor* id_tensor,
LoDTensor* score_tensor) const;
/**
* Pack all steps of id/score LodTensor into sentence LoDTensor
* it's main logic is:
* ```python
* prefix
* result_sentence
* result_lod_tensor
*
* for (step in steps):
* prefix = PackTwoSteps(prefix, step, &result_sentence)
* ConvertSentenceVector<T>ToLodTensor(result_sentence, &result_lod_tensor)
* ```
*/
void PackAllSteps(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores, LoDTensor* id_tensor,
LoDTensor* score_tensor) const;
};
template <typename T>
Sentence<T> BeamSearchDecoder<T>::MakeSentence(const BeamNode<T>* node) const {
Sentence<T> sentence;
while (node != nullptr) {
sentence.word_ids.emplace_back(node->word_id_);
sentence.scores.emplace_back(node->score_);
node = node->parent_;
}
std::reverse(std::begin(sentence.word_ids), std::end(sentence.word_ids));
std::reverse(std::begin(sentence.scores), std::end(sentence.scores));
return sentence;
}
template <typename T>
std::vector<BeamNodeVector<T>> BeamSearchDecoder<T>::PackTwoSteps(
const LoDTensor& cur_ids, const LoDTensor& cur_scores,
std::vector<BeamNodeVector<T>>& prefixes_list,
std::vector<SentenceVector<T>>* sentence_vector_list) const {
std::vector<BeamNodeVector<T>> result;
for (size_t src_idx = 0; src_idx < cur_ids.lod()[kSourceLevel].size() - 1;
++src_idx) {
size_t src_start = cur_ids.lod().at(kSourceLevel)[src_idx];
size_t src_end = cur_ids.lod().at(kSourceLevel)[src_idx + 1];
BeamNodeVector<T> beam_nodes;
// if prefixes size is 0, it means this is the first step. In this step,
// all candidate id is the start of candidate sentences.
if (prefixes_list.empty()) {
PADDLE_ENFORCE_EQ(cur_ids.lod().at(kSourceLevel).back(),
cur_ids.lod().at(kSentenceLevel).back(),
"in the first step");
for (size_t id_idx = src_start; id_idx < src_end; ++id_idx) {
beam_nodes.push_back(std::unique_ptr<BeamNode<T>>(new BeamNode<T>(
cur_ids.data<int64_t>()[id_idx], cur_scores.data<T>()[id_idx])));
}
} else {
BeamNodeVector<T>& prefixes = prefixes_list[src_idx];
SentenceVector<T>& sentence_vector = (*sentence_vector_list)[src_idx];
PADDLE_ENFORCE_EQ(src_end - src_start, prefixes.size(),
"prefix and candidate set number should be the same");
auto candidate_offset = cur_ids.lod()[kSentenceLevel];
for (size_t prefix_idx = 0; prefix_idx < prefixes.size(); ++prefix_idx) {
std::unique_ptr<BeamNode<T>>& prefix = prefixes[prefix_idx];
size_t candidate_start = candidate_offset[src_start + prefix_idx];
size_t candidate_end = candidate_offset[src_start + prefix_idx + 1];
if (candidate_start == candidate_end) {
VLOG(3) << "this sentence has no more candidate, "
"add to result sentence and rm it from beam tree";
sentence_vector.push_back(MakeSentence(prefix.get()));
prefix.reset();
} else {
for (size_t candidate_idx = candidate_start;
candidate_idx < candidate_end; ++candidate_idx) {
auto* candidate =
new BeamNode<T>(cur_ids.data<int64_t>()[candidate_idx],
cur_scores.data<T>()[candidate_idx]);
candidate->AppendTo(prefix.get());
beam_nodes.push_back(std::unique_ptr<BeamNode<T>>(candidate));
}
prefix.release();
}
}
}
result.push_back(std::move(beam_nodes));
}
return result;
}
template <typename T>
void BeamSearchDecoder<T>::ConvertSentenceVectorToLodTensor(
std::vector<SentenceVector<T>> sentence_vector_list, LoDTensor* id_tensor,
LoDTensor* score_tensor) const {
size_t src_num = sentence_vector_list.size();
PADDLE_ENFORCE_NE(src_num, 0, "src_num should not be 0");
std::vector<size_t> source_level_lod = {0};
std::vector<size_t> sentence_level_lod = {0};
std::vector<int64_t> id_data;
std::vector<T> score_data;
for (size_t src_idx = 0; src_idx < src_num; ++src_idx) {
for (Sentence<T>& sentence : sentence_vector_list[src_idx]) {
id_data.insert(id_data.end(), sentence.word_ids.begin(),
sentence.word_ids.end());
score_data.insert(score_data.end(), sentence.scores.begin(),
sentence.scores.end());
sentence_level_lod.push_back(sentence_level_lod.back() +
sentence.word_ids.size());
}
source_level_lod.push_back(source_level_lod.back() +
sentence_vector_list[src_idx].size());
}
auto cpu_place = new paddle::platform::CPUPlace();
paddle::platform::CPUDeviceContext cpu_ctx(*cpu_place);
framework::LoD lod;
lod.push_back(source_level_lod);
lod.push_back(sentence_level_lod);
id_tensor->set_lod(lod);
id_tensor->Resize({static_cast<int64_t>(id_data.size())});
id_tensor->mutable_data<int64_t>(paddle::platform::CPUPlace());
id_tensor->CopyFromVector<int64_t>(id_data, cpu_ctx);
score_tensor->set_lod(lod);
score_tensor->Resize({static_cast<int64_t>(score_data.size())});
score_tensor->mutable_data<T>(paddle::platform::CPUPlace());
score_tensor->CopyFromVector<T>(score_data, cpu_ctx);
}
template <typename T>
void BeamSearchDecoder<T>::PackAllSteps(const LoDTensorArray& step_ids,
const LoDTensorArray& step_scores,
LoDTensor* id_tensor,
LoDTensor* score_tensor) const {
PADDLE_ENFORCE(!step_ids.empty(), "step num should be larger than 0");
PADDLE_ENFORCE_EQ(step_ids.size(), step_scores.size(),
"step_ids and step_scores should be the same");
const size_t step_num = step_ids.size();
const size_t src_num = step_ids.at(0).lod().at(kSourceLevel).size() - 1;
PADDLE_ENFORCE_GT(src_num, 0UL, "source num should be larger than 0");
// previous prefixes for each step,
// the init length is 0, means this is the first step.
std::vector<BeamNodeVector<T>> beamnode_vector_list(0);
std::vector<SentenceVector<T>> sentence_vector_list(src_num);
// pack all steps for one batch first, then another batch
for (size_t step_id = 0; step_id < step_num; ++step_id) {
beamnode_vector_list =
PackTwoSteps(step_ids.at(step_id), step_scores.at(step_id),
beamnode_vector_list, &sentence_vector_list);
}
// append last beam_node to result
for (size_t src_idx = 0; src_idx < src_num; ++src_idx) {
for (auto& beam_node : beamnode_vector_list.at(src_idx)) {
sentence_vector_list[src_idx].push_back(MakeSentence(beam_node.get()));
beam_node.reset();
}
}
ConvertSentenceVectorToLodTensor(sentence_vector_list, id_tensor,
score_tensor);
}
} // namespace operators
} // namespace paddle
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/beam_search_decode_op.h"
#include "gtest/gtest.h"
using CPUPlace = paddle::platform::CPUPlace;
using LoD = paddle::framework::LoD;
using LoDTensor = paddle::framework::LoDTensor;
using LoDTensorArray = paddle::framework::LoDTensorArray;
template <typename T>
using BeamNode = paddle::operators::BeamNode<T>;
template <typename T>
using BeamSearchDecoder = paddle::operators::BeamSearchDecoder<T>;
template <typename T>
using Sentence = paddle::operators::Sentence<T>;
template <typename T>
using BeamNodeVector = paddle::operators::BeamNodeVector<T>;
template <typename T>
using SentenceVector = paddle::operators::SentenceVector<T>;
namespace paddle {
namespace test {
void GenerateExample(const std::vector<size_t>& level_0,
const std::vector<size_t>& level_1,
const std::vector<int>& data, LoDTensorArray* ids,
LoDTensorArray* scores) {
PADDLE_ENFORCE_EQ(level_0.back(), level_1.size() - 1,
"source level is used to describe candidate set");
PADDLE_ENFORCE_EQ(level_1.back(), data.size(),
"the lowest level is used to describe data"
", so it's last element should be data length");
CPUPlace place;
LoD lod;
lod.push_back(level_0);
lod.push_back(level_1);
// Ids
LoDTensor tensor_id;
tensor_id.set_lod(lod);
tensor_id.Resize({static_cast<int64_t>(data.size())});
// malloc memory
int64_t* id_ptr = tensor_id.mutable_data<int64_t>(place);
for (size_t i = 0; i < data.size(); ++i) {
id_ptr[i] = static_cast<int64_t>(data.at(i));
}
// Scores
LoDTensor tensor_score;
tensor_score.set_lod(lod);
tensor_score.Resize({static_cast<int64_t>(data.size())});
// malloc memory
float* score_ptr = tensor_score.mutable_data<float>(place);
for (size_t i = 0; i < data.size(); ++i) {
score_ptr[i] = static_cast<float>(data.at(i));
}
ids->push_back(tensor_id);
scores->push_back(tensor_score);
}
} // namespace test
} // namespace paddle
TEST(BeamSearchDecodeOp, DeleteBeamNode) {
auto* root = new BeamNode<float>(0, 0);
auto* b1 = new BeamNode<float>(1, 1);
auto* b2 = new BeamNode<float>(2, 2);
auto* b3 = new BeamNode<float>(3, 3);
b1->AppendTo(root);
b2->AppendTo(root);
b3->AppendTo(b1);
delete b3;
delete b2;
}
TEST(BeamSearchDecodeOp, MakeSentence) {
auto* root = new BeamNode<float>(0, 0);
auto* b1 = new BeamNode<float>(1, 1);
auto* end = new BeamNode<float>(2, 2);
b1->AppendTo(root);
end->AppendTo(b1);
BeamSearchDecoder<float> helper;
Sentence<float> sentence = helper.MakeSentence(end);
delete end;
std::vector<int64_t> expect_ids = {0, 1, 2};
ASSERT_EQ(sentence.word_ids, expect_ids);
std::vector<float> expect_scores = {0, 1, 2};
ASSERT_EQ(sentence.scores, expect_scores);
}
TEST(BeamSearchDecodeOp, PackTwoStepsFistStep) {
CPUPlace place;
LoDTensorArray ids;
LoDTensorArray scores;
paddle::test::GenerateExample(
std::vector<size_t>{0, 2, 6}, std::vector<size_t>{0, 1, 2, 3, 4, 5, 6},
std::vector<int>{1, 2, 3, 4, 5, 6}, &ids, &scores);
std::vector<BeamNodeVector<float>> beamnode_vector_list;
std::vector<SentenceVector<float>> sentence_vector_list(
2, SentenceVector<float>());
BeamSearchDecoder<float> helper;
beamnode_vector_list = helper.PackTwoSteps(
ids[0], scores[0], beamnode_vector_list, &sentence_vector_list);
ASSERT_EQ(beamnode_vector_list.size(), 2UL);
ASSERT_EQ(beamnode_vector_list[0].size(), 2UL);
ASSERT_EQ(beamnode_vector_list[1].size(), 4UL);
}
TEST(BeamSearchDecodeOp, PackTwoSteps) {
CPUPlace place;
// first source has three prefix
BeamNodeVector<float> source0_prefixes;
source0_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(1, 1)));
source0_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(0, 0)));
source0_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(3, 3)));
// second source has two prefix
BeamNodeVector<float> source1_prefixes;
source1_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(4, 4)));
source1_prefixes.push_back(
std::unique_ptr<BeamNode<float>>(new BeamNode<float>(5, 5)));
std::vector<BeamNodeVector<float>> beamnode_vector_list;
std::vector<SentenceVector<float>> sentence_vector_list(
2, SentenceVector<float>());
beamnode_vector_list.push_back(std::move(source0_prefixes));
beamnode_vector_list.push_back(std::move(source1_prefixes));
// generate data for one step
LoDTensorArray ids;
LoDTensorArray scores;
paddle::test::GenerateExample(std::vector<size_t>{0, 3, 5},
std::vector<size_t>{0, 1, 1, 3, 4, 5},
std::vector<int>{0, 1, 2, 3, 4}, &ids, &scores);
BeamSearchDecoder<float> helper1;
beamnode_vector_list = helper1.PackTwoSteps(
ids[0], scores[0], beamnode_vector_list, &sentence_vector_list);
ASSERT_EQ(sentence_vector_list[0].size(), 1UL);
ASSERT_EQ(sentence_vector_list[1].size(), 0UL);
ASSERT_EQ(beamnode_vector_list[0].size(), 3UL);
ASSERT_EQ(beamnode_vector_list[1].size(), 2UL);
}
TEST(BeamSearchDecodeOp, PackAllSteps) {
CPUPlace place;
// we will constuct a sample data with 3 steps and 2 source sentences
LoDTensorArray ids;
LoDTensorArray scores;
paddle::test::GenerateExample(
std::vector<size_t>{0, 3, 6}, std::vector<size_t>{0, 1, 2, 3, 4, 5, 6},
std::vector<int>{1, 2, 3, 4, 5, 6}, &ids, &scores);
paddle::test::GenerateExample(
std::vector<size_t>{0, 3, 6}, std::vector<size_t>{0, 1, 1, 3, 5, 5, 6},
std::vector<int>{0, 1, 2, 3, 4, 5}, &ids, &scores);
paddle::test::GenerateExample(std::vector<size_t>{0, 3, 6},
std::vector<size_t>{0, 0, 1, 2, 3, 4, 5},
std::vector<int>{0, 1, 2, 3, 4}, &ids, &scores);
ASSERT_EQ(ids.size(), 3UL);
ASSERT_EQ(scores.size(), 3UL);
BeamSearchDecoder<float> helper;
LoDTensor id_tensor;
LoDTensor score_tensor;
helper.PackAllSteps(ids, scores, &id_tensor, &score_tensor);
LoD lod = id_tensor.lod();
std::vector<size_t> expect_source_lod = {0, 4, 8};
EXPECT_EQ(lod[0], expect_source_lod);
std::vector<size_t> expect_sentence_lod = {0, 1, 3, 6, 9, 10, 13, 16, 19};
EXPECT_EQ(lod[1], expect_sentence_lod);
// 2| 1, 0| 3, 1, 0| 3, 2, 1| 5| 4, 3, 2| 4, 4, 3| 6, 5, 4
std::vector<int> expect_data = {2, 1, 0, 3, 1, 0, 3, 2, 1, 5,
4, 3, 2, 4, 4, 3, 6, 5, 4};
ASSERT_EQ(id_tensor.dims()[0], static_cast<int64_t>(expect_data.size()));
for (size_t i = 0; i < expect_data.size(); ++i) {
ASSERT_EQ(id_tensor.data<int64_t>()[i],
static_cast<int64_t>(expect_data[i]));
}
for (int64_t i = 0; i < id_tensor.dims()[0]; ++i) {
ASSERT_EQ(score_tensor.data<float>()[i],
static_cast<float>(id_tensor.data<int64_t>()[i]));
}
}
......@@ -47,7 +47,7 @@ class SequenceConcatOpMaker : public framework::OpProtoAndCheckerMaker {
framework::OpAttrChecker* op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("X",
"(vector<LoDTensor>) Input is a vector of LoDTensor, "
"(LodTensorArray) Input is a vector of LoDTensor, "
"each of which is a variable-length sequence or nested sequence.")
.AsDuplicable();
AddOutput("Out",
......
......@@ -27,11 +27,12 @@ namespace platform {
This wrap is a hack to avoid this bug.
*/
template <class Callable, class... Args>
template <typename Callable, typename... Args>
inline void call_once(std::once_flag& flag, Callable&& f, Args&&... args) {
bool good = false;
std::exception ex;
std::call_once(flag, [&]() {
std::call_once(flag,
[&](Args&&... args) {
try {
f(args...);
good = true;
......@@ -40,7 +41,8 @@ inline void call_once(std::once_flag& flag, Callable&& f, Args&&... args) {
} catch (...) {
ex = std::runtime_error("excption caught in call_once");
}
});
},
args...);
if (!good) {
throw std::exception(ex);
}
......
......@@ -4,7 +4,7 @@ import itertools
from paddle.v2.framework.framework import Variable, g_main_program, \
g_startup_program, unique_name, Program
from paddle.v2.framework.initializer import ConstantInitializer, \
UniformInitializer
UniformInitializer, XavierInitializer
class LayerHelper(object):
......@@ -61,7 +61,7 @@ class LayerHelper(object):
@property
def param_attr(self):
default = {'name': None, 'initializer': UniformInitializer()}
default = {'name': None, 'initializer': XavierInitializer()}
actual = self.kwargs.get('param_attr', None)
if actual is None:
actual = default
......@@ -70,10 +70,11 @@ class LayerHelper(object):
actual[default_field] = default[default_field]
return actual
@property
def bias_attr(self):
default = {'name': None, 'initializer': ConstantInitializer()}
default = {'name': None, 'initializer': XavierInitializer()}
bias_attr = self.kwargs.get('bias_attr', None)
if bias_attr is True:
if bias_attr is None:
bias_attr = default
if isinstance(bias_attr, dict):
......@@ -166,7 +167,7 @@ class LayerHelper(object):
num_flatten_dims = 1
size = list(input_var.shape[num_flatten_dims:])
bias_attr = self.bias_attr()
bias_attr = self.bias_attr
if not bias_attr:
return input_var
......
import paddle.v2.framework.core as core
import paddle.v2.framework.proto.framework_pb2 as framework_pb2
from paddle.v2.framework.framework import OpProtoHolder, Variable, Program, \
Operator
from paddle.v2.framework.initializer import ConstantInitializer, \
NormalInitializer
from paddle.v2.framework.layer_helper import LayerHelper, unique_name
import re
import cStringIO
__all__ = [
'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat',
......@@ -16,7 +18,7 @@ __all__ = [
def fc(input,
size,
param_attr=None,
bias_attr=True,
bias_attr=None,
name=None,
act=None,
num_flatten_dims=1,
......@@ -125,6 +127,55 @@ def embedding(input,
return tmp
# TODO(qijun): expose H0 and C0
def dynamic_lstm(input,
size,
data_type='float32',
param_attr=None,
bias_attr=None,
use_peepholes=True,
is_reverse=False,
gate_activation='sigmoid',
cell_activation='tanh',
candidate_activation='tanh',
main_program=None,
startup_program=None):
helper = LayerHelper('lstm', **locals())
size = size / 4
weight = helper.create_parameter(
attr=helper.param_attr, shape=[size, 4 * size], dtype=data_type)
bias_size = [1, 7 * size]
if not use_peepholes:
bias_size[1] = 4 * size
bias = helper.create_parameter(
attr=helper.bias_attr, shape=bias_size, dtype=data_type, suffix='b')
hidden = helper.create_tmp_variable(data_type)
cell = helper.create_tmp_variable(data_type)
batch_gate = helper.create_tmp_variable(data_type)
batch_cell_pre_act = helper.create_tmp_variable(data_type)
helper.append_op(
type='lstm',
inputs={'Input': input,
'Weight': weight,
'Bias': bias},
outputs={
'Hidden': hidden,
'Cell': cell,
'BatchGate': batch_gate,
'BatchCellPreAct': batch_cell_pre_act
},
attrs={
'use_peepholes': use_peepholes,
'is_reverse': is_reverse,
'gate_activation': gate_activation,
'cell_activation': cell_activation,
'candidate_activation': candidate_activation
})
return hidden, cell
def data(name,
shape,
data_type='float32',
......@@ -191,6 +242,58 @@ def _convert_(name):
return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
def _generate_doc_string_(op_proto):
"""
Generate docstring by OpProto
Args:
op_proto (framework_pb2.OpProto): a protobuf message typed OpProto
Returns:
str: the document string
"""
def _type_to_str_(tp):
return framework_pb2.AttrType.Name(tp)
if not isinstance(op_proto, framework_pb2.OpProto):
raise TypeError("OpProto should be `framework_pb2.OpProto`")
buf = cStringIO.StringIO()
buf.write(op_proto.comment)
buf.write('\nArgs:\n')
for each_input in op_proto.inputs:
line_begin = ' {0}: '.format(_convert_(each_input.name))
buf.write(line_begin)
buf.write(each_input.comment)
buf.write('\n')
buf.write(' ' * len(line_begin))
buf.write('Duplicable: ')
buf.write(str(each_input.duplicable))
buf.write(' Optional: ')
buf.write(str(each_input.dispensable))
buf.write('\n')
for each_attr in op_proto.attrs:
buf.write(' ')
buf.write(each_attr.name)
buf.write(' (')
buf.write(_type_to_str_(each_attr.type))
buf.write('): ')
buf.write(each_attr.comment)
buf.write('\n')
if len(op_proto.outputs) != 0:
buf.write('\nReturns:\n')
buf.write(' ')
for each_opt in op_proto.outputs:
if not each_opt.intermediate:
break
buf.write(each_opt.comment)
return buf.getvalue()
def _create_op_func_(op_type):
"""
Create an Operator for a Function.
......@@ -249,11 +352,6 @@ def _create_op_func_(op_type):
return dtype
def func(**kwargs):
"""
This function implements the function for the operator. This process
involves doing the sanity check (using the function above), reading
inputs from protobuf and applying the activations on top.
"""
helper = LayerHelper(op_type, **kwargs)
dtype = infer_and_check_data_type(op_proto, **kwargs)
......@@ -277,6 +375,7 @@ def _create_op_func_(op_type):
func.__name__ = op_type
globals()[op_type] = func
func.__doc__ = _generate_doc_string_(op_proto)
global __all__
__all__.append(op_type)
......
......@@ -35,15 +35,21 @@ class Optimizer(object):
"""
raise NotImplementedError()
def _initialize_tensors(self, block):
"""Create all necessary tensors, that will be shared for all parameter updates.
Tensors like learning rate should be initialized here.
Args:
block: the block in which the loss variable is present
"""
pass
def _create_param_lr(self, param_and_grad):
# create learning rate variable for every parameter
param = param_and_grad[0]
param_lr = param.optimize_attr['learning_rate']
param_lr_shape = [1]
param_lr_var = self.helper.create_global_variable(
name=unique_name("learning_rate"),
dtype='float32',
shape=param_lr_shape,
lod_level=1,
persistable=True)
param_lr = param_lr * self._learning_rate
self.helper.set_variable_initializer(
var=param_lr_var, initializer=ConstantInitializer(param_lr))
return param_lr_var
def _create_accumulators(self, block, parameters):
"""Create all accumulators needed by the parameters
......@@ -161,8 +167,6 @@ class Optimizer(object):
startup_program=startup_program)
self._create_accumulators(loss.block,
[p[0] for p in parameters_and_grads])
# Create any necessary tensors
self._initialize_tensors(loss.block)
optimize_ops = []
for param_and_grad in parameters_and_grads:
......@@ -214,27 +218,16 @@ class SGDOptimizer(Optimizer):
self.type = "sgd"
self._learning_rate = learning_rate
def _initialize_tensors(self, block):
lr_shape = [1]
# create a variable for learning_rate
self._lr = self.helper.create_global_variable(
name=unique_name("learning_rate"),
dtype='float32',
shape=lr_shape,
lod_level=1,
persistable=True)
self.helper.set_variable_initializer(
var=self._lr, initializer=ConstantInitializer(self._learning_rate))
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
# create the optimize op
sgd_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"LearningRate": self._lr
"LearningRate": self._create_param_lr(param_and_grad)
},
outputs={"ParamOut": param_and_grad[0]})
......@@ -259,19 +252,6 @@ class MomentumOptimizer(Optimizer):
self._momentum = momentum
self._use_nesterov = bool(use_nesterov)
def _initialize_tensors(self, block):
assert isinstance(block, framework.Block)
lr_shape = [1]
# create a variable for learning_rate
self._lr = self.helper.create_global_variable(
name=unique_name("learning_rate"),
dtype='float32',
shape=lr_shape,
lod_level=1,
persistable=True)
self.helper.set_variable_initializer(
var=self._lr, initializer=ConstantInitializer(self._learning_rate))
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
......@@ -290,7 +270,7 @@ class MomentumOptimizer(Optimizer):
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"Velocity": velocity_acc,
"LearningRate": self._lr
"LearningRate": self._create_param_lr(param_and_grad)
},
outputs={
"ParamOut": param_and_grad[0],
......@@ -315,18 +295,6 @@ class AdagradOptimizer(Optimizer):
self._learning_rate = learning_rate
self._epsilon = epsilon
def _initialize_tensors(self, block):
lr_shape = [1]
# create a variable for learning_rate
self._lr = self.helper.create_global_variable(
name=unique_name("learning_rate"),
dtype='float32',
shape=lr_shape,
lod_level=1,
persistable=True)
self.helper.set_variable_initializer(
var=self._lr, initializer=ConstantInitializer(self._learning_rate))
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
......@@ -346,7 +314,7 @@ class AdagradOptimizer(Optimizer):
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"Moment": moment_acc,
"LearningRate": self._lr
"LearningRate": self._create_param_lr(param_and_grad)
},
outputs={"ParamOut": param_and_grad[0],
"MomentOut": moment_acc},
......@@ -378,18 +346,6 @@ class AdamOptimizer(Optimizer):
self._beta2 = beta2
self._epsilon = epsilon
def _initialize_tensors(self, block):
lr_shape = [1]
# create a variable for learning_rate
self._lr = self.helper.create_global_variable(
name=unique_name("learning_rate"),
dtype='float32',
shape=lr_shape,
lod_level=1,
persistable=True)
self.helper.set_variable_initializer(
var=self._lr, initializer=ConstantInitializer(self._learning_rate))
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
......@@ -433,7 +389,7 @@ class AdamOptimizer(Optimizer):
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"LearningRate": self._lr,
"LearningRate": self._create_param_lr(param_and_grad),
"Moment1": moment1,
"Moment2": moment2,
"Beta1Pow": self._beta1_pow_acc,
......@@ -495,18 +451,6 @@ class AdamaxOptimizer(Optimizer):
self._beta2 = beta2
self._epsilon = epsilon
def _initialize_tensors(self, block):
lr_shape = [1]
# create a variable for learning_rate
self._lr = self.helper.create_global_variable(
name=unique_name("learning_rate"),
dtype='float32',
shape=lr_shape,
lod_level=1,
persistable=True)
self.helper.set_variable_initializer(
var=self._lr, initializer=ConstantInitializer(self._learning_rate))
def _create_accumulators(self, block, parameters):
# Create beta1 power accumulator tensor
beta_shape = [1]
......@@ -536,7 +480,7 @@ class AdamaxOptimizer(Optimizer):
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"LearningRate": self._lr,
"LearningRate": self._create_param_lr(param_and_grad),
"Moment": moment,
"InfNorm": inf_norm,
"Beta1Pow": self._beta1_pow_acc
......
import unittest
import paddle.v2.framework.layers as layers
class TestDocString(unittest.TestCase):
def test_layer_doc_string(self):
print layers.dropout.__doc__
if __name__ == '__main__':
unittest.main()
import paddle.v2 as paddle
import paddle.v2.framework.layers as layers
import paddle.v2.framework.nets as nets
import paddle.v2.framework.core as core
import paddle.v2.framework.optimizer as optimizer
from paddle.v2.framework.framework import Program, g_main_program, g_startup_program
from paddle.v2.framework.executor import Executor
import numpy as np
def stacked_lstm_net(input_dim,
class_dim=2,
emb_dim=128,
hid_dim=512,
stacked_num=3):
assert stacked_num % 2 == 1
data = layers.data(name="words", shape=[1], data_type="int64")
label = layers.data(name="label", shape=[1], data_type="int64")
emb = layers.embedding(input=data, size=[input_dim, emb_dim])
# add bias attr
# TODO(qijun) linear act
fc1 = layers.fc(input=emb, size=hid_dim)
lstm1, cell1 = layers.dynamic_lstm(input=fc1, size=hid_dim)
inputs = [fc1, lstm1]
for i in range(2, stacked_num + 1):
fc = layers.fc(input=inputs, size=hid_dim)
lstm, cell = layers.dynamic_lstm(
input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
inputs = [fc, lstm]
fc_last = layers.sequence_pool(input=inputs[0], pool_type='max')
lstm_last = layers.sequence_pool(input=inputs[1], pool_type='max')
prediction = layers.fc(input=[fc_last, lstm_last],
size=class_dim,
act='softmax')
cost = layers.cross_entropy(input=prediction, label=label)
avg_cost = layers.mean(x=cost)
adam_optimizer = optimizer.AdamOptimizer(learning_rate=0.002)
opts = adam_optimizer.minimize(avg_cost)
acc = layers.accuracy(input=prediction, label=label)
return avg_cost, acc
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
lod = [cur_len]
for l in seq_lens:
cur_len += l
lod.append(cur_len)
flattened_data = np.concatenate(data, axis=0).astype("int64")
flattened_data = flattened_data.reshape([len(flattened_data), 1])
res = core.LoDTensor()
res.set(flattened_data, place)
res.set_lod([lod])
return res
def main():
BATCH_SIZE = 100
PASS_NUM = 5
word_dict = paddle.dataset.imdb.word_dict()
print "load word dict successfully"
dict_dim = len(word_dict)
class_dim = 2
cost, acc = stacked_lstm_net(input_dim=dict_dim, class_dim=class_dim)
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.imdb.train(word_dict), buf_size=1000),
batch_size=BATCH_SIZE)
place = core.CPUPlace()
exe = Executor(place)
exe.run(g_startup_program)
for pass_id in xrange(PASS_NUM):
for data in train_data():
tensor_words = to_lodtensor(map(lambda x: x[0], data), place)
label = np.array(map(lambda x: x[1], data)).astype("int64")
label = label.reshape([BATCH_SIZE, 1])
tensor_label = core.LoDTensor()
tensor_label.set(label, place)
outs = exe.run(g_main_program,
feed={"words": tensor_words,
"label": tensor_label},
fetch_list=[cost, acc])
cost_val = np.array(outs[0])
acc_val = np.array(outs[1])
print("cost=" + str(cost_val) + " acc=" + str(acc_val))
if cost_val < 1.0 and acc_val > 0.7:
exit(0)
exit(1)
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册