From 9ff558a46f64895518d572104f5079eda0592ead Mon Sep 17 00:00:00 2001 From: hutuxian Date: Wed, 22 Apr 2020 19:54:14 +0800 Subject: [PATCH] Optimize DataFeed (#23957) * Make batch_float_feasigns & batch_uint64_feasigns as member variable --- paddle/fluid/framework/data_feed.cc | 97 ++++++++++++++++------------- paddle/fluid/framework/data_feed.h | 4 ++ 2 files changed, 59 insertions(+), 42 deletions(-) diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index 12e517dec62..a40409fb8f9 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -798,6 +798,19 @@ void MultiSlotInMemoryDataFeed::Init( } } feed_vec_.resize(use_slots_.size()); + const int kEstimatedFeasignNumPerSlot = 5; // Magic Number + for (size_t i = 0; i < all_slot_num; i++) { + batch_float_feasigns_.push_back(std::vector()); + batch_uint64_feasigns_.push_back(std::vector()); + batch_float_feasigns_[i].reserve(default_batch_size_ * + kEstimatedFeasignNumPerSlot); + batch_uint64_feasigns_[i].reserve(default_batch_size_ * + kEstimatedFeasignNumPerSlot); + offset_.push_back(std::vector()); + offset_[i].reserve(default_batch_size_ + + 1); // Each lod info will prepend a zero + } + visit_.resize(all_slot_num, false); pipe_command_ = data_feed_desc.pipe_command(); finish_init_ = true; } @@ -989,13 +1002,12 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstance(Record* instance) { void MultiSlotInMemoryDataFeed::PutToFeedVec( const std::vector& ins_vec) { #ifdef _LINUX - std::vector> batch_float_feasigns(use_slots_.size(), - std::vector()); - std::vector> batch_uint64_feasigns( - use_slots_.size(), std::vector()); - std::vector> offset(use_slots_.size(), - std::vector{0}); - std::vector visit(use_slots_.size(), false); + for (size_t i = 0; i < batch_float_feasigns_.size(); ++i) { + batch_float_feasigns_[i].clear(); + batch_uint64_feasigns_[i].clear(); + offset_[i].clear(); + offset_[i].push_back(0); + } ins_content_vec_.clear(); ins_content_vec_.reserve(ins_vec.size()); ins_id_vec_.clear(); @@ -1005,30 +1017,31 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec( ins_id_vec_.push_back(r.ins_id_); ins_content_vec_.push_back(r.content_); for (auto& item : r.float_feasigns_) { - batch_float_feasigns[item.slot()].push_back(item.sign().float_feasign_); - visit[item.slot()] = true; + batch_float_feasigns_[item.slot()].push_back(item.sign().float_feasign_); + visit_[item.slot()] = true; } for (auto& item : r.uint64_feasigns_) { - batch_uint64_feasigns[item.slot()].push_back(item.sign().uint64_feasign_); - visit[item.slot()] = true; + batch_uint64_feasigns_[item.slot()].push_back( + item.sign().uint64_feasign_); + visit_[item.slot()] = true; } for (size_t j = 0; j < use_slots_.size(); ++j) { const auto& type = all_slots_type_[j]; - if (visit[j]) { - visit[j] = false; + if (visit_[j]) { + visit_[j] = false; } else { // fill slot value with default value 0 if (type[0] == 'f') { // float - batch_float_feasigns[j].push_back(0.0); + batch_float_feasigns_[j].push_back(0.0); } else if (type[0] == 'u') { // uint64 - batch_uint64_feasigns[j].push_back(0); + batch_uint64_feasigns_[j].push_back(0); } } // get offset of this ins in this slot if (type[0] == 'f') { // float - offset[j].push_back(batch_float_feasigns[j].size()); + offset_[j].push_back(batch_float_feasigns_[j].size()); } else if (type[0] == 'u') { // uint64 - offset[j].push_back(batch_uint64_feasigns[j].size()); + offset_[j].push_back(batch_uint64_feasigns_[j].size()); } } } @@ -1037,21 +1050,21 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec( if (feed_vec_[i] == nullptr) { continue; } - int total_instance = offset[i].back(); + int total_instance = offset_[i].back(); const auto& type = all_slots_type_[i]; if (type[0] == 'f') { // float - float* feasign = batch_float_feasigns[i].data(); + float* feasign = batch_float_feasigns_[i].data(); float* tensor_ptr = feed_vec_[i]->mutable_data({total_instance, 1}, this->place_); CopyToFeedTensor(tensor_ptr, feasign, total_instance * sizeof(float)); } else if (type[0] == 'u') { // uint64 // no uint64_t type in paddlepaddle - uint64_t* feasign = batch_uint64_feasigns[i].data(); + uint64_t* feasign = batch_uint64_feasigns_[i].data(); int64_t* tensor_ptr = feed_vec_[i]->mutable_data( {total_instance, 1}, this->place_); CopyToFeedTensor(tensor_ptr, feasign, total_instance * sizeof(int64_t)); } - auto& slot_offset = offset[i]; + auto& slot_offset = offset_[i]; LoD data_lod{slot_offset}; feed_vec_[i]->set_lod(data_lod); if (use_slots_is_dense_[i]) { @@ -1427,13 +1440,12 @@ int PaddleBoxDataFeed::GetCurrentPhase() { void PaddleBoxDataFeed::PutToFeedVec(const std::vector& ins_vec) { #ifdef _LINUX - std::vector> batch_float_feasigns(use_slots_.size(), - std::vector()); - std::vector> batch_uint64_feasigns( - use_slots_.size(), std::vector()); - std::vector> offset(use_slots_.size(), - std::vector{0}); - std::vector visit(use_slots_.size(), false); + for (size_t i = 0; i < batch_float_feasigns_.size(); ++i) { + batch_float_feasigns_[i].clear(); + batch_uint64_feasigns_[i].clear(); + offset_[i].clear(); + offset_[i].push_back(0); + } ins_content_vec_.clear(); ins_content_vec_.reserve(ins_vec.size()); ins_id_vec_.clear(); @@ -1443,30 +1455,31 @@ void PaddleBoxDataFeed::PutToFeedVec(const std::vector& ins_vec) { ins_id_vec_.push_back(r->ins_id_); ins_content_vec_.push_back(r->content_); for (auto& item : r->float_feasigns_) { - batch_float_feasigns[item.slot()].push_back(item.sign().float_feasign_); - visit[item.slot()] = true; + batch_float_feasigns_[item.slot()].push_back(item.sign().float_feasign_); + visit_[item.slot()] = true; } for (auto& item : r->uint64_feasigns_) { - batch_uint64_feasigns[item.slot()].push_back(item.sign().uint64_feasign_); - visit[item.slot()] = true; + batch_uint64_feasigns_[item.slot()].push_back( + item.sign().uint64_feasign_); + visit_[item.slot()] = true; } for (size_t j = 0; j < use_slots_.size(); ++j) { const auto& type = all_slots_type_[j]; - if (visit[j]) { - visit[j] = false; + if (visit_[j]) { + visit_[j] = false; } else { // fill slot value with default value 0 if (type[0] == 'f') { // float - batch_float_feasigns[j].push_back(0.0); + batch_float_feasigns_[j].push_back(0.0); } else if (type[0] == 'u') { // uint64 - batch_uint64_feasigns[j].push_back(0); + batch_uint64_feasigns_[j].push_back(0); } } // get offset of this ins in this slot if (type[0] == 'f') { // float - offset[j].push_back(batch_float_feasigns[j].size()); + offset_[j].push_back(batch_float_feasigns_[j].size()); } else if (type[0] == 'u') { // uint64 - offset[j].push_back(batch_uint64_feasigns[j].size()); + offset_[j].push_back(batch_uint64_feasigns_[j].size()); } } } @@ -1475,21 +1488,21 @@ void PaddleBoxDataFeed::PutToFeedVec(const std::vector& ins_vec) { if (feed_vec_[i] == nullptr) { continue; } - int total_instance = offset[i].back(); + int total_instance = offset_[i].back(); const auto& type = all_slots_type_[i]; if (type[0] == 'f') { // float - float* feasign = batch_float_feasigns[i].data(); + float* feasign = batch_float_feasigns_[i].data(); float* tensor_ptr = feed_vec_[i]->mutable_data({total_instance, 1}, this->place_); CopyToFeedTensor(tensor_ptr, feasign, total_instance * sizeof(float)); } else if (type[0] == 'u') { // uint64 // no uint64_t type in paddlepaddle - uint64_t* feasign = batch_uint64_feasigns[i].data(); + uint64_t* feasign = batch_uint64_feasigns_[i].data(); int64_t* tensor_ptr = feed_vec_[i]->mutable_data( {total_instance, 1}, this->place_); CopyToFeedTensor(tensor_ptr, feasign, total_instance * sizeof(int64_t)); } - auto& slot_offset = offset[i]; + auto& slot_offset = offset_[i]; LoD data_lod{slot_offset}; feed_vec_[i]->set_lod(data_lod); if (use_slots_is_dense_[i]) { diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h index a52cadcbc0a..daa54b0b334 100644 --- a/paddle/fluid/framework/data_feed.h +++ b/paddle/fluid/framework/data_feed.h @@ -597,6 +597,10 @@ class MultiSlotInMemoryDataFeed : public InMemoryDataFeed { virtual void PutToFeedVec(const std::vector& ins_vec); virtual void GetMsgFromLogKey(const std::string& log_key, uint64_t* search_id, uint32_t* cmatch, uint32_t* rank); + std::vector> batch_float_feasigns_; + std::vector> batch_uint64_feasigns_; + std::vector> offset_; + std::vector visit_; }; class PaddleBoxDataFeed : public MultiSlotInMemoryDataFeed { -- GitLab