提交 0f3ece77 编写于 作者: Q Qiao Longfei

use gzstream

上级 a1e0f5ab
...@@ -16,7 +16,7 @@ function(reader_library TARGET_NAME) ...@@ -16,7 +16,7 @@ function(reader_library TARGET_NAME)
endfunction() endfunction()
cc_library(buffered_reader SRCS buffered_reader.cc DEPS reader simple_threadpool) cc_library(buffered_reader SRCS buffered_reader.cc DEPS reader simple_threadpool)
cc_library(ctr_reader SRCS ctr_reader.cc DEPS reader simple_threadpool boost) cc_library(ctr_reader SRCS ctr_reader.cc DEPS reader simple_threadpool boost gzstream)
reader_library(open_files_op SRCS open_files_op.cc DEPS buffered_reader) reader_library(open_files_op SRCS open_files_op.cc DEPS buffered_reader)
reader_library(create_ctr_reader_op SRCS create_ctr_reader_op.cc DEPS ctr_reader) reader_library(create_ctr_reader_op SRCS create_ctr_reader_op.cc DEPS ctr_reader)
reader_library(create_random_data_generator_op SRCS create_random_data_generator_op.cc) reader_library(create_random_data_generator_op SRCS create_random_data_generator_op.cc)
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "paddle/fluid/operators/reader/ctr_reader.h" #include "paddle/fluid/operators/reader/ctr_reader.h"
#include <gzstream.h>
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
...@@ -24,10 +26,6 @@ ...@@ -24,10 +26,6 @@
#include <algorithm> #include <algorithm>
#include <random> #include <random>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace reader { namespace reader {
...@@ -75,23 +73,19 @@ static inline void parse_line( ...@@ -75,23 +73,19 @@ static inline void parse_line(
class GzipReader { class GzipReader {
public: public:
explicit GzipReader(const std::string& file_name) : instream_(&inbuf_) { explicit GzipReader(const std::string& file_name)
file_ = std::ifstream(file_name, std::ios_base::in | std::ios_base::binary); : gzstream_(file_name.c_str()) {}
inbuf_.push(boost::iostreams::gzip_decompressor());
inbuf_.push(file_);
// Convert streambuf to istream
}
~GzipReader() { file_.close(); } ~GzipReader() {}
bool HasNext() { return instream_.peek() != EOF; } bool HasNext() { return gzstream_.peek() != EOF; }
void NextLine(std::string& line) { std::getline(instream_, line); } // NOLINT void NextLine(std::string* line) { // NOLINT
std::getline(gzstream_, line);
}
private: private:
boost::iostreams::filtering_streambuf<boost::iostreams::input> inbuf_; igzstream gzstream_;
std::ifstream file_;
std::istream instream_;
}; };
class MultiGzipReader { class MultiGzipReader {
...@@ -113,8 +107,8 @@ class MultiGzipReader { ...@@ -113,8 +107,8 @@ class MultiGzipReader {
return true; return true;
} }
void NextLine(std::string& line) { // NOLINT void NextLine(std::string* line) {
readers_[current_reader_index_]->NextLine(line); readers_[current_reader_index_]->NextLine(*line);
} }
private: private:
...@@ -122,12 +116,6 @@ class MultiGzipReader { ...@@ -122,12 +116,6 @@ class MultiGzipReader {
size_t current_reader_index_ = 0; size_t current_reader_index_ = 0;
}; };
// void CTRReader::ReadThread(
// const std::vector<std::string> &file_list,
// const std::vector<std::string>& slots,
// int batch_size,
// std::shared_ptr<LoDTensorBlockingQueue>& queue) {}
void CTRReader::ReadThread(const std::vector<std::string>& file_list, void CTRReader::ReadThread(const std::vector<std::string>& file_list,
const std::vector<std::string>& slots, const std::vector<std::string>& slots,
int batch_size, int batch_size,
...@@ -135,14 +123,12 @@ void CTRReader::ReadThread(const std::vector<std::string>& file_list, ...@@ -135,14 +123,12 @@ void CTRReader::ReadThread(const std::vector<std::string>& file_list,
std::string line; std::string line;
// read all files // read all files
std::vector<std::string> all_lines;
MultiGzipReader reader(file_list); MultiGzipReader reader(file_list);
reader.NextLine(&line);
for (int j = 0; j < all_lines.size(); ++j) {
std::unordered_map<std::string, std::vector<int64_t>> slots_to_data; std::unordered_map<std::string, std::vector<int64_t>> slots_to_data;
int64_t label; int64_t label;
parse_line(all_lines[j], slots, &label, &slots_to_data); parse_line(line, slots, &label, &slots_to_data);
}
} }
} // namespace reader } // namespace reader
......
...@@ -22,10 +22,6 @@ ...@@ -22,10 +22,6 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include <boost/iostreams/copy.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filtering_streambuf.hpp>
#include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/threadpool.h" #include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册