提交 92cbaa41 编写于 作者: Q Qiao Longfei

add GetTimeInSec

上级 dd2dfeb6
......@@ -44,4 +44,4 @@ SET_PROPERTY(TARGET gzstream PROPERTY IMPORTED_LOCATION
"${GZSTREAM_INSTALL_DIR}/lib/libgzstream.a")
include_directories(${GZSTREAM_INCLUDE_DIR})
ADD_DEPENDENCIES(gzstream extern_gzstream)
ADD_DEPENDENCIES(gzstream extern_gzstream zlib)
......@@ -16,7 +16,7 @@ function(reader_library TARGET_NAME)
endfunction()
cc_library(buffered_reader SRCS buffered_reader.cc DEPS reader simple_threadpool)
cc_library(ctr_reader SRCS ctr_reader.cc DEPS reader simple_threadpool boost gzstream)
cc_library(ctr_reader SRCS ctr_reader.cc DEPS gzstream reader zlib)
cc_test(ctr_reader_test SRCS ctr_reader_test.cc DEPS ctr_reader)
reader_library(open_files_op SRCS open_files_op.cc DEPS buffered_reader)
reader_library(create_ctr_reader_op SRCS create_ctr_reader_op.cc DEPS ctr_reader)
......
......@@ -58,10 +58,8 @@ static inline void parse_line(
const std::string& item = ret[i];
std::vector<std::string> feasign_and_slot;
string_split(item, ':', &feasign_and_slot);
auto& slot = feasign_and_slot[1];
if (feasign_and_slot.size() == 2 &&
slot_to_index.find(slot) != slot_to_index.end()) {
const std::string& slot = feasign_and_slot[1];
slot_to_index.find(feasign_and_slot[1]) != slot_to_index.end()) {
int64_t feasign = std::strtoll(feasign_and_slot[0].c_str(), NULL, 10);
(*slot_to_data)[feasign_and_slot[1]].push_back(feasign);
}
......@@ -164,7 +162,7 @@ void ReadThread(const std::vector<std::string>& file_list,
VLOG(3) << "reader inited";
clock_t t0 = clock();
uint64_t t0 = GetTimeInSec();
int i = 0;
......@@ -219,13 +217,12 @@ void ReadThread(const std::vector<std::string>& file_list,
memcpy(label_tensor_data, batch_label.data(), batch_label.size());
lod_datas.push_back(label_tensor);
// queue->Push(lod_datas);
queue->Push(lod_datas);
VLOG(4) << "push one data, queue_size=" << queue->Size();
if (i != 0 && i % 100 == 0) {
clock_t t1 = clock();
float line_per_s = 100 * batch_size * static_cast<int64>(CLOCKS_PER_SEC) /
static_cast<int>(t1 - t0);
uint64_t t1 = GetTimeInSec();
float line_per_s = 100 * batch_size / static_cast<int>(t1 - t0);
VLOG(3) << "[" << thread_id << "]"
<< " line_per_second = " << line_per_s;
t0 = t1;
......
......@@ -14,6 +14,8 @@
#pragma once
#include <sys/time.h>
#include <cstdlib>
#include <fstream>
#include <iostream>
......@@ -37,6 +39,15 @@ void ReadThread(const std::vector<std::string>& file_list,
int thread_id, std::vector<ReaderThreadStatus>* thread_status,
std::shared_ptr<LoDTensorBlockingQueue> queue);
inline uint64_t GetTimeInSec() {
using clock = std::conditional<std::chrono::high_resolution_clock::is_steady,
std::chrono::high_resolution_clock,
std::chrono::steady_clock>::type;
return std::chrono::duration_cast<std::chrono::seconds>(
clock::now().time_since_epoch())
.count();
}
class CTRReader : public framework::FileReader {
public:
explicit CTRReader(const std::shared_ptr<LoDTensorBlockingQueue>& queue,
......@@ -88,7 +99,7 @@ class CTRReader : public framework::FileReader {
private:
void SplitFiles() {
file_groups_.resize(thread_num_);
for (int i = 0; i < file_list_.size(); ++i) {
for (size_t i = 0; i < file_list_.size(); ++i) {
auto& file_name = file_list_[i];
std::ifstream f(file_name.c_str());
PADDLE_ENFORCE(f.good(), "file %s not exist!", file_name);
......
......@@ -25,16 +25,17 @@ using paddle::operators::reader::LoDTensorBlockingQueue;
using paddle::operators::reader::LoDTensorBlockingQueueHolder;
using paddle::operators::reader::CTRReader;
using paddle::framework::LoDTensor;
using paddle::operators::reader::GetTimeInSec;
TEST(CTR_READER, read_data) {
LoDTensorBlockingQueueHolder queue_holder;
int capacity = 64;
queue_holder.InitOnce(capacity, {}, true);
queue_holder.InitOnce(capacity, {}, false);
std::shared_ptr<LoDTensorBlockingQueue> queue = queue_holder.GetQueue();
int batch_size = 10;
int thread_num = 2;
int thread_num = 4;
std::vector<std::string> slots = {
"6002", "6003", "6004", "6005", "6006", "6007", "6008", "6009", "6010",
"6011", "6012", "6013", "6014", "6015", "6016", "6017", "6018", "6019",
......@@ -109,7 +110,8 @@ TEST(CTR_READER, read_data) {
std::vector<std::string> file_list = {
"/Users/qiaolongfei/project/gzip_test/part-00000-A.gz",
"/Users/qiaolongfei/project/gzip_test/part-00001-A.gz",
"/Users/qiaolongfei/project/gzip_test/part-00002-A.gz"};
"/Users/qiaolongfei/project/gzip_test/part-00002-A.gz",
"/Users/qiaolongfei/project/gzip_test/part-00003-A.gz"};
CTRReader reader(queue, batch_size, thread_num, slots, file_list);
......@@ -118,13 +120,11 @@ TEST(CTR_READER, read_data) {
std::cout << "start to reader data" << std::endl;
std::vector<LoDTensor> out;
int read_batch = 1000;
clock_t t0 = clock();
uint64_t t0 = GetTimeInSec();
for (int i = 0; i < read_batch; ++i) {
reader.ReadNext(&out);
}
clock_t t1 = clock();
float line_per_s = read_batch * batch_size *
static_cast<int64>(CLOCKS_PER_SEC) /
static_cast<int>(t1 - t0);
uint64_t t1 = GetTimeInSec();
float line_per_s = read_batch * batch_size / static_cast<int>(t1 - t0);
VLOG(3) << "line_per_second = " << line_per_s;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册