提交 71cbc8bd 编写于 作者: Q Qiao Longfei

optimize code

上级 694e8945
...@@ -122,10 +122,9 @@ class MultiGzipReader : public Reader { ...@@ -122,10 +122,9 @@ class MultiGzipReader : public Reader {
size_t current_reader_index_ = 0; size_t current_reader_index_ = 0;
}; };
void CTRReader::ReadThread(const std::vector<std::string>& file_list, void ReadThread(const std::vector<std::string>& file_list,
const std::vector<std::string>& slots, const std::vector<std::string>& slots, int batch_size,
int batch_size, std::shared_ptr<LoDTensorBlockingQueue> queue) {
std::shared_ptr<LoDTensorBlockingQueue> queue) {
std::string line; std::string line;
std::vector<std::unordered_map<std::string, std::vector<int64_t>>> batch_data; std::vector<std::unordered_map<std::string, std::vector<int64_t>>> batch_data;
......
...@@ -30,19 +30,23 @@ namespace paddle { ...@@ -30,19 +30,23 @@ namespace paddle {
namespace operators { namespace operators {
namespace reader { namespace reader {
void ReadThread(const std::vector<std::string>& file_list,
const std::vector<std::string>& slots, int batch_size,
std::shared_ptr<LoDTensorBlockingQueue> queue);
class CTRReader : public framework::FileReader { class CTRReader : public framework::FileReader {
public: public:
explicit CTRReader(const std::shared_ptr<LoDTensorBlockingQueue>& queue, explicit CTRReader(const std::shared_ptr<LoDTensorBlockingQueue>& queue,
int batch_size, int thread_num, int batch_size, int thread_num,
const std::vector<std::string>& slots, const std::vector<std::string>& slots,
const std::vector<std::string>& file_list) const std::vector<std::string>& file_list)
: framework::FileReader() { : thread_num_(thread_num),
thread_num_ = thread_num; batch_size_(batch_size),
batch_size_ = batch_size; slots_(slots),
file_list_(file_list) {
PADDLE_ENFORCE(queue != nullptr, "LoDTensorBlockingQueue must not be null"); PADDLE_ENFORCE(queue != nullptr, "LoDTensorBlockingQueue must not be null");
queue_ = queue; queue_ = queue;
slots_ = slots; SplitFiles();
file_list_ = file_list;
} }
~CTRReader() { queue_->Close(); } ~CTRReader() { queue_->Close(); }
...@@ -53,30 +57,41 @@ class CTRReader : public framework::FileReader { ...@@ -53,30 +57,41 @@ class CTRReader : public framework::FileReader {
if (!success) out->clear(); if (!success) out->clear();
} }
void Shutdown() override { queue_->Close(); } void Shutdown() override {
VLOG(3) << "Shutdown reader";
for (auto& read_thread : read_threads_) {
read_thread->join();
}
read_threads_.clear();
queue_->Close();
}
void Start() override { void Start() override {
VLOG(3) << "Start reader";
queue_->ReOpen(); queue_->ReOpen();
// for (int i = 0; i < thread_num_; i++) { for (int i = 0; i < file_groups_.size(); i++) {
// read_threads_.emplace_back( read_threads_.emplace_back(new std::thread(std::bind(
// new std::thread(std::bind(&CTRReader::ReadThread, this, &ReadThread, file_groups_[i], slots_, batch_size_, queue_)));
// file_list_, }
// slots_, batch_size_, queue_)));
// }
} }
private: private:
void ReadThread(const std::vector<std::string>& file_list, void SplitFiles() {
const std::vector<std::string>& slots, int batch_size, file_groups_.resize(file_list_.size() > thread_num_ ? thread_num_
std::shared_ptr<LoDTensorBlockingQueue> queue); : file_list_.size());
for (int i = 0; i < file_list_.size(); ++i) {
file_groups_[i % thread_num_].push_back(file_list_[i]);
}
}
private: private:
const int thread_num_;
const int batch_size_;
const std::vector<std::string> slots_;
const std::vector<std::string> file_list_;
std::shared_ptr<LoDTensorBlockingQueue> queue_; std::shared_ptr<LoDTensorBlockingQueue> queue_;
std::vector<std::unique_ptr<std::thread>> read_threads_; std::vector<std::unique_ptr<std::thread>> read_threads_;
int thread_num_; std::vector<std::vector<std::string>> file_groups_;
int batch_size_;
std::vector<std::string> slots_;
std::vector<std::string> file_list_;
}; };
} // namespace reader } // namespace reader
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册