提交 ecae157e 编写于 作者: T Tao Luo

simplify some data record in analyzer_tester

test=develop
上级 05f1b65d
......@@ -19,11 +19,9 @@ namespace inference {
using contrib::AnalysisConfig;
struct DataRecord {
std::vector<std::vector<int64_t>> query_data_all, title_data_all;
std::vector<std::vector<int64_t>> query, title;
std::vector<size_t> lod1, lod2;
size_t batch_iter{0};
size_t batch_size{1};
size_t num_samples; // total number of samples
size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) {
......@@ -33,22 +31,9 @@ struct DataRecord {
DataRecord data;
size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided.
if (batch_end <= query_data_all.size()) {
data.query_data_all.assign(query_data_all.begin() + batch_iter,
query_data_all.begin() + batch_end);
data.title_data_all.assign(title_data_all.begin() + batch_iter,
title_data_all.begin() + batch_end);
// Prepare LoDs
data.lod1.push_back(0);
data.lod2.push_back(0);
CHECK(!data.query_data_all.empty());
CHECK(!data.title_data_all.empty());
CHECK_EQ(data.query_data_all.size(), data.title_data_all.size());
for (size_t j = 0; j < data.query_data_all.size(); j++) {
// calculate lod
data.lod1.push_back(data.lod1.back() + data.query_data_all[j].size());
data.lod2.push_back(data.lod2.back() + data.title_data_all[j].size());
}
if (batch_end <= query.size()) {
GetInputPerBatch(query, &data.query, &data.lod1, batch_iter, batch_end);
GetInputPerBatch(title, &data.title, &data.lod2, batch_iter, batch_end);
}
batch_iter += batch_size;
return data;
......@@ -67,8 +52,8 @@ struct DataRecord {
// load title data
std::vector<int64_t> title_data;
split_to_int64(data[1], ' ', &title_data);
query_data_all.push_back(std::move(query_data));
title_data_all.push_back(std::move(title_data));
query.push_back(std::move(query_data));
title.push_back(std::move(title_data));
}
num_samples = num_lines;
}
......@@ -81,10 +66,8 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_title_tensor.name = "right";
auto one_batch = data->NextBatch();
// assign data
TensorAssignData<int64_t>(&lod_query_tensor, one_batch.query_data_all,
one_batch.lod1);
TensorAssignData<int64_t>(&lod_title_tensor, one_batch.title_data_all,
one_batch.lod2);
TensorAssignData<int64_t>(&lod_query_tensor, one_batch.query, one_batch.lod1);
TensorAssignData<int64_t>(&lod_title_tensor, one_batch.title, one_batch.lod2);
// Set inputs.
input_slots->assign({lod_query_tensor, lod_title_tensor});
for (auto &tensor : *input_slots) {
......
......@@ -19,11 +19,9 @@ namespace inference {
using contrib::AnalysisConfig;
struct DataRecord {
std::vector<std::vector<int64_t>> word_data_all, mention_data_all;
std::vector<std::vector<int64_t>> word, mention;
std::vector<size_t> lod; // two inputs have the same lod info.
size_t batch_iter{0};
size_t batch_size{1};
size_t num_samples; // total number of samples
size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) {
......@@ -33,20 +31,10 @@ struct DataRecord {
DataRecord data;
size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided.
if (batch_end <= word_data_all.size()) {
data.word_data_all.assign(word_data_all.begin() + batch_iter,
word_data_all.begin() + batch_end);
data.mention_data_all.assign(mention_data_all.begin() + batch_iter,
mention_data_all.begin() + batch_end);
// Prepare LoDs
data.lod.push_back(0);
CHECK(!data.word_data_all.empty());
CHECK(!data.mention_data_all.empty());
CHECK_EQ(data.word_data_all.size(), data.mention_data_all.size());
for (size_t j = 0; j < data.word_data_all.size(); j++) {
// calculate lod
data.lod.push_back(data.lod.back() + data.word_data_all[j].size());
}
if (batch_end <= word.size()) {
GetInputPerBatch(word, &data.word, &data.lod, batch_iter, batch_end);
GetInputPerBatch(mention, &data.mention, &data.lod, batch_iter,
batch_end);
}
batch_iter += batch_size;
return data;
......@@ -65,8 +53,8 @@ struct DataRecord {
// load mention data
std::vector<int64_t> mention_data;
split_to_int64(data[3], ' ', &mention_data);
word_data_all.push_back(std::move(word_data));
mention_data_all.push_back(std::move(mention_data));
word.push_back(std::move(word_data));
mention.push_back(std::move(mention_data));
}
num_samples = num_lines;
}
......@@ -79,9 +67,8 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_mention_tensor.name = "mention";
auto one_batch = data->NextBatch();
// assign data
TensorAssignData<int64_t>(&lod_word_tensor, one_batch.word_data_all,
one_batch.lod);
TensorAssignData<int64_t>(&lod_mention_tensor, one_batch.mention_data_all,
TensorAssignData<int64_t>(&lod_word_tensor, one_batch.word, one_batch.lod);
TensorAssignData<int64_t>(&lod_mention_tensor, one_batch.mention,
one_batch.lod);
// Set inputs.
input_slots->assign({lod_word_tensor, lod_mention_tensor});
......
......@@ -18,12 +18,9 @@ namespace paddle {
namespace inference {
struct DataRecord {
std::vector<std::vector<int64_t>> title1_all, title2_all, title3_all, l1_all;
std::vector<std::vector<int64_t>> title1, title2, title3, l1;
std::vector<size_t> title1_lod, title2_lod, title3_lod, l1_lod;
size_t batch_iter{0};
size_t batch_size{1};
size_t num_samples; // total number of samples
std::vector<size_t> lod1, lod2, lod3, l1_lod;
size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) {
......@@ -33,41 +30,11 @@ struct DataRecord {
DataRecord data;
size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided.
if (batch_end <= title1_all.size()) {
data.title1_all.assign(title1_all.begin() + batch_iter,
title1_all.begin() + batch_end);
data.title2_all.assign(title2_all.begin() + batch_iter,
title2_all.begin() + batch_end);
data.title3_all.assign(title3_all.begin() + batch_iter,
title3_all.begin() + batch_end);
data.l1_all.assign(l1_all.begin() + batch_iter,
l1_all.begin() + batch_end);
// Prepare LoDs
data.title1_lod.push_back(0);
data.title2_lod.push_back(0);
data.title3_lod.push_back(0);
data.l1_lod.push_back(0);
CHECK(!data.title1_all.empty());
CHECK(!data.title2_all.empty());
CHECK(!data.title3_all.empty());
CHECK(!data.l1_all.empty());
CHECK_EQ(data.title1_all.size(), data.title2_all.size());
CHECK_EQ(data.title1_all.size(), data.title3_all.size());
CHECK_EQ(data.title1_all.size(), data.l1_all.size());
for (size_t j = 0; j < data.title1_all.size(); j++) {
data.title1.push_back(data.title1_all[j]);
data.title2.push_back(data.title2_all[j]);
data.title3.push_back(data.title3_all[j]);
data.l1.push_back(data.l1_all[j]);
// calculate lod
data.title1_lod.push_back(data.title1_lod.back() +
data.title1_all[j].size());
data.title2_lod.push_back(data.title2_lod.back() +
data.title2_all[j].size());
data.title3_lod.push_back(data.title3_lod.back() +
data.title3_all[j].size());
data.l1_lod.push_back(data.l1_lod.back() + data.l1_all[j].size());
}
if (batch_end <= title1.size()) {
GetInputPerBatch(title1, &data.title1, &data.lod1, batch_iter, batch_end);
GetInputPerBatch(title2, &data.title2, &data.lod2, batch_iter, batch_end);
GetInputPerBatch(title3, &data.title3, &data.lod3, batch_iter, batch_end);
GetInputPerBatch(l1, &data.l1, &data.l1_lod, batch_iter, batch_end);
}
batch_iter += batch_size;
return data;
......@@ -92,10 +59,10 @@ struct DataRecord {
// load l1 data
std::vector<int64_t> l1_data;
split_to_int64(data[3], ' ', &l1_data);
title1_all.push_back(std::move(title1_data));
title2_all.push_back(std::move(title2_data));
title3_all.push_back(std::move(title3_data));
l1_all.push_back(std::move(l1_data));
title1.push_back(std::move(title1_data));
title2.push_back(std::move(title2_data));
title3.push_back(std::move(title3_data));
l1.push_back(std::move(l1_data));
}
num_samples = num_lines;
}
......@@ -110,12 +77,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
l1_tensor.name = "l1";
auto one_batch = data->NextBatch();
// assign data
TensorAssignData<int64_t>(&title1_tensor, one_batch.title1,
one_batch.title1_lod);
TensorAssignData<int64_t>(&title2_tensor, one_batch.title2,
one_batch.title2_lod);
TensorAssignData<int64_t>(&title3_tensor, one_batch.title3,
one_batch.title3_lod);
TensorAssignData<int64_t>(&title1_tensor, one_batch.title1, one_batch.lod1);
TensorAssignData<int64_t>(&title2_tensor, one_batch.title2, one_batch.lod2);
TensorAssignData<int64_t>(&title3_tensor, one_batch.title3, one_batch.lod3);
TensorAssignData<int64_t>(&l1_tensor, one_batch.l1, one_batch.l1_lod);
// Set inputs.
input_slots->assign({title1_tensor, title2_tensor, title3_tensor, l1_tensor});
......
......@@ -169,6 +169,18 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
(*inputs).emplace_back(input_slots);
}
void GetInputPerBatch(const std::vector<std::vector<int64_t>> &in,
std::vector<std::vector<int64_t>> *out,
std::vector<size_t> *lod, size_t batch_iter,
size_t batch_end) {
lod->clear();
lod->push_back(0);
for (auto it = in.begin() + batch_iter; it < in.begin() + batch_end; it++) {
out->push_back(*it);
lod->push_back(lod->back() + (*it).size()); // calculate lod
}
}
void TestOneThreadPrediction(
const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册