未验证 提交 9ff558a4 编写于 作者: H hutuxian 提交者: GitHub

Optimize DataFeed (#23957)

* Make batch_float_feasigns & batch_uint64_feasigns as member variable
上级 5cccc69f
......@@ -798,6 +798,19 @@ void MultiSlotInMemoryDataFeed::Init(
}
}
feed_vec_.resize(use_slots_.size());
const int kEstimatedFeasignNumPerSlot = 5; // Magic Number
for (size_t i = 0; i < all_slot_num; i++) {
batch_float_feasigns_.push_back(std::vector<float>());
batch_uint64_feasigns_.push_back(std::vector<uint64_t>());
batch_float_feasigns_[i].reserve(default_batch_size_ *
kEstimatedFeasignNumPerSlot);
batch_uint64_feasigns_[i].reserve(default_batch_size_ *
kEstimatedFeasignNumPerSlot);
offset_.push_back(std::vector<size_t>());
offset_[i].reserve(default_batch_size_ +
1); // Each lod info will prepend a zero
}
visit_.resize(all_slot_num, false);
pipe_command_ = data_feed_desc.pipe_command();
finish_init_ = true;
}
......@@ -989,13 +1002,12 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstance(Record* instance) {
void MultiSlotInMemoryDataFeed::PutToFeedVec(
const std::vector<Record>& ins_vec) {
#ifdef _LINUX
std::vector<std::vector<float>> batch_float_feasigns(use_slots_.size(),
std::vector<float>());
std::vector<std::vector<uint64_t>> batch_uint64_feasigns(
use_slots_.size(), std::vector<uint64_t>());
std::vector<std::vector<size_t>> offset(use_slots_.size(),
std::vector<size_t>{0});
std::vector<bool> visit(use_slots_.size(), false);
for (size_t i = 0; i < batch_float_feasigns_.size(); ++i) {
batch_float_feasigns_[i].clear();
batch_uint64_feasigns_[i].clear();
offset_[i].clear();
offset_[i].push_back(0);
}
ins_content_vec_.clear();
ins_content_vec_.reserve(ins_vec.size());
ins_id_vec_.clear();
......@@ -1005,30 +1017,31 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
ins_id_vec_.push_back(r.ins_id_);
ins_content_vec_.push_back(r.content_);
for (auto& item : r.float_feasigns_) {
batch_float_feasigns[item.slot()].push_back(item.sign().float_feasign_);
visit[item.slot()] = true;
batch_float_feasigns_[item.slot()].push_back(item.sign().float_feasign_);
visit_[item.slot()] = true;
}
for (auto& item : r.uint64_feasigns_) {
batch_uint64_feasigns[item.slot()].push_back(item.sign().uint64_feasign_);
visit[item.slot()] = true;
batch_uint64_feasigns_[item.slot()].push_back(
item.sign().uint64_feasign_);
visit_[item.slot()] = true;
}
for (size_t j = 0; j < use_slots_.size(); ++j) {
const auto& type = all_slots_type_[j];
if (visit[j]) {
visit[j] = false;
if (visit_[j]) {
visit_[j] = false;
} else {
// fill slot value with default value 0
if (type[0] == 'f') { // float
batch_float_feasigns[j].push_back(0.0);
batch_float_feasigns_[j].push_back(0.0);
} else if (type[0] == 'u') { // uint64
batch_uint64_feasigns[j].push_back(0);
batch_uint64_feasigns_[j].push_back(0);
}
}
// get offset of this ins in this slot
if (type[0] == 'f') { // float
offset[j].push_back(batch_float_feasigns[j].size());
offset_[j].push_back(batch_float_feasigns_[j].size());
} else if (type[0] == 'u') { // uint64
offset[j].push_back(batch_uint64_feasigns[j].size());
offset_[j].push_back(batch_uint64_feasigns_[j].size());
}
}
}
......@@ -1037,21 +1050,21 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
if (feed_vec_[i] == nullptr) {
continue;
}
int total_instance = offset[i].back();
int total_instance = offset_[i].back();
const auto& type = all_slots_type_[i];
if (type[0] == 'f') { // float
float* feasign = batch_float_feasigns[i].data();
float* feasign = batch_float_feasigns_[i].data();
float* tensor_ptr =
feed_vec_[i]->mutable_data<float>({total_instance, 1}, this->place_);
CopyToFeedTensor(tensor_ptr, feasign, total_instance * sizeof(float));
} else if (type[0] == 'u') { // uint64
// no uint64_t type in paddlepaddle
uint64_t* feasign = batch_uint64_feasigns[i].data();
uint64_t* feasign = batch_uint64_feasigns_[i].data();
int64_t* tensor_ptr = feed_vec_[i]->mutable_data<int64_t>(
{total_instance, 1}, this->place_);
CopyToFeedTensor(tensor_ptr, feasign, total_instance * sizeof(int64_t));
}
auto& slot_offset = offset[i];
auto& slot_offset = offset_[i];
LoD data_lod{slot_offset};
feed_vec_[i]->set_lod(data_lod);
if (use_slots_is_dense_[i]) {
......@@ -1427,13 +1440,12 @@ int PaddleBoxDataFeed::GetCurrentPhase() {
void PaddleBoxDataFeed::PutToFeedVec(const std::vector<Record*>& ins_vec) {
#ifdef _LINUX
std::vector<std::vector<float>> batch_float_feasigns(use_slots_.size(),
std::vector<float>());
std::vector<std::vector<uint64_t>> batch_uint64_feasigns(
use_slots_.size(), std::vector<uint64_t>());
std::vector<std::vector<size_t>> offset(use_slots_.size(),
std::vector<size_t>{0});
std::vector<bool> visit(use_slots_.size(), false);
for (size_t i = 0; i < batch_float_feasigns_.size(); ++i) {
batch_float_feasigns_[i].clear();
batch_uint64_feasigns_[i].clear();
offset_[i].clear();
offset_[i].push_back(0);
}
ins_content_vec_.clear();
ins_content_vec_.reserve(ins_vec.size());
ins_id_vec_.clear();
......@@ -1443,30 +1455,31 @@ void PaddleBoxDataFeed::PutToFeedVec(const std::vector<Record*>& ins_vec) {
ins_id_vec_.push_back(r->ins_id_);
ins_content_vec_.push_back(r->content_);
for (auto& item : r->float_feasigns_) {
batch_float_feasigns[item.slot()].push_back(item.sign().float_feasign_);
visit[item.slot()] = true;
batch_float_feasigns_[item.slot()].push_back(item.sign().float_feasign_);
visit_[item.slot()] = true;
}
for (auto& item : r->uint64_feasigns_) {
batch_uint64_feasigns[item.slot()].push_back(item.sign().uint64_feasign_);
visit[item.slot()] = true;
batch_uint64_feasigns_[item.slot()].push_back(
item.sign().uint64_feasign_);
visit_[item.slot()] = true;
}
for (size_t j = 0; j < use_slots_.size(); ++j) {
const auto& type = all_slots_type_[j];
if (visit[j]) {
visit[j] = false;
if (visit_[j]) {
visit_[j] = false;
} else {
// fill slot value with default value 0
if (type[0] == 'f') { // float
batch_float_feasigns[j].push_back(0.0);
batch_float_feasigns_[j].push_back(0.0);
} else if (type[0] == 'u') { // uint64
batch_uint64_feasigns[j].push_back(0);
batch_uint64_feasigns_[j].push_back(0);
}
}
// get offset of this ins in this slot
if (type[0] == 'f') { // float
offset[j].push_back(batch_float_feasigns[j].size());
offset_[j].push_back(batch_float_feasigns_[j].size());
} else if (type[0] == 'u') { // uint64
offset[j].push_back(batch_uint64_feasigns[j].size());
offset_[j].push_back(batch_uint64_feasigns_[j].size());
}
}
}
......@@ -1475,21 +1488,21 @@ void PaddleBoxDataFeed::PutToFeedVec(const std::vector<Record*>& ins_vec) {
if (feed_vec_[i] == nullptr) {
continue;
}
int total_instance = offset[i].back();
int total_instance = offset_[i].back();
const auto& type = all_slots_type_[i];
if (type[0] == 'f') { // float
float* feasign = batch_float_feasigns[i].data();
float* feasign = batch_float_feasigns_[i].data();
float* tensor_ptr =
feed_vec_[i]->mutable_data<float>({total_instance, 1}, this->place_);
CopyToFeedTensor(tensor_ptr, feasign, total_instance * sizeof(float));
} else if (type[0] == 'u') { // uint64
// no uint64_t type in paddlepaddle
uint64_t* feasign = batch_uint64_feasigns[i].data();
uint64_t* feasign = batch_uint64_feasigns_[i].data();
int64_t* tensor_ptr = feed_vec_[i]->mutable_data<int64_t>(
{total_instance, 1}, this->place_);
CopyToFeedTensor(tensor_ptr, feasign, total_instance * sizeof(int64_t));
}
auto& slot_offset = offset[i];
auto& slot_offset = offset_[i];
LoD data_lod{slot_offset};
feed_vec_[i]->set_lod(data_lod);
if (use_slots_is_dense_[i]) {
......
......@@ -597,6 +597,10 @@ class MultiSlotInMemoryDataFeed : public InMemoryDataFeed<Record> {
virtual void PutToFeedVec(const std::vector<Record>& ins_vec);
virtual void GetMsgFromLogKey(const std::string& log_key, uint64_t* search_id,
uint32_t* cmatch, uint32_t* rank);
std::vector<std::vector<float>> batch_float_feasigns_;
std::vector<std::vector<uint64_t>> batch_uint64_feasigns_;
std::vector<std::vector<size_t>> offset_;
std::vector<bool> visit_;
};
class PaddleBoxDataFeed : public MultiSlotInMemoryDataFeed {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册