提交 ecae157e 编写于 作者: T Tao Luo

simplify some data record in analyzer_tester

test=develop
上级 05f1b65d
...@@ -19,11 +19,9 @@ namespace inference { ...@@ -19,11 +19,9 @@ namespace inference {
using contrib::AnalysisConfig; using contrib::AnalysisConfig;
struct DataRecord { struct DataRecord {
std::vector<std::vector<int64_t>> query_data_all, title_data_all; std::vector<std::vector<int64_t>> query, title;
std::vector<size_t> lod1, lod2; std::vector<size_t> lod1, lod2;
size_t batch_iter{0}; size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
size_t batch_size{1};
size_t num_samples; // total number of samples
DataRecord() = default; DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1) explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) { : batch_size(batch_size) {
...@@ -33,22 +31,9 @@ struct DataRecord { ...@@ -33,22 +31,9 @@ struct DataRecord {
DataRecord data; DataRecord data;
size_t batch_end = batch_iter + batch_size; size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided. // NOTE skip the final batch, if no enough data is provided.
if (batch_end <= query_data_all.size()) { if (batch_end <= query.size()) {
data.query_data_all.assign(query_data_all.begin() + batch_iter, GetInputPerBatch(query, &data.query, &data.lod1, batch_iter, batch_end);
query_data_all.begin() + batch_end); GetInputPerBatch(title, &data.title, &data.lod2, batch_iter, batch_end);
data.title_data_all.assign(title_data_all.begin() + batch_iter,
title_data_all.begin() + batch_end);
// Prepare LoDs
data.lod1.push_back(0);
data.lod2.push_back(0);
CHECK(!data.query_data_all.empty());
CHECK(!data.title_data_all.empty());
CHECK_EQ(data.query_data_all.size(), data.title_data_all.size());
for (size_t j = 0; j < data.query_data_all.size(); j++) {
// calculate lod
data.lod1.push_back(data.lod1.back() + data.query_data_all[j].size());
data.lod2.push_back(data.lod2.back() + data.title_data_all[j].size());
}
} }
batch_iter += batch_size; batch_iter += batch_size;
return data; return data;
...@@ -67,8 +52,8 @@ struct DataRecord { ...@@ -67,8 +52,8 @@ struct DataRecord {
// load title data // load title data
std::vector<int64_t> title_data; std::vector<int64_t> title_data;
split_to_int64(data[1], ' ', &title_data); split_to_int64(data[1], ' ', &title_data);
query_data_all.push_back(std::move(query_data)); query.push_back(std::move(query_data));
title_data_all.push_back(std::move(title_data)); title.push_back(std::move(title_data));
} }
num_samples = num_lines; num_samples = num_lines;
} }
...@@ -81,10 +66,8 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -81,10 +66,8 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_title_tensor.name = "right"; lod_title_tensor.name = "right";
auto one_batch = data->NextBatch(); auto one_batch = data->NextBatch();
// assign data // assign data
TensorAssignData<int64_t>(&lod_query_tensor, one_batch.query_data_all, TensorAssignData<int64_t>(&lod_query_tensor, one_batch.query, one_batch.lod1);
one_batch.lod1); TensorAssignData<int64_t>(&lod_title_tensor, one_batch.title, one_batch.lod2);
TensorAssignData<int64_t>(&lod_title_tensor, one_batch.title_data_all,
one_batch.lod2);
// Set inputs. // Set inputs.
input_slots->assign({lod_query_tensor, lod_title_tensor}); input_slots->assign({lod_query_tensor, lod_title_tensor});
for (auto &tensor : *input_slots) { for (auto &tensor : *input_slots) {
......
...@@ -19,11 +19,9 @@ namespace inference { ...@@ -19,11 +19,9 @@ namespace inference {
using contrib::AnalysisConfig; using contrib::AnalysisConfig;
struct DataRecord { struct DataRecord {
std::vector<std::vector<int64_t>> word_data_all, mention_data_all; std::vector<std::vector<int64_t>> word, mention;
std::vector<size_t> lod; // two inputs have the same lod info. std::vector<size_t> lod; // two inputs have the same lod info.
size_t batch_iter{0}; size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
size_t batch_size{1};
size_t num_samples; // total number of samples
DataRecord() = default; DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1) explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) { : batch_size(batch_size) {
...@@ -33,20 +31,10 @@ struct DataRecord { ...@@ -33,20 +31,10 @@ struct DataRecord {
DataRecord data; DataRecord data;
size_t batch_end = batch_iter + batch_size; size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided. // NOTE skip the final batch, if no enough data is provided.
if (batch_end <= word_data_all.size()) { if (batch_end <= word.size()) {
data.word_data_all.assign(word_data_all.begin() + batch_iter, GetInputPerBatch(word, &data.word, &data.lod, batch_iter, batch_end);
word_data_all.begin() + batch_end); GetInputPerBatch(mention, &data.mention, &data.lod, batch_iter,
data.mention_data_all.assign(mention_data_all.begin() + batch_iter, batch_end);
mention_data_all.begin() + batch_end);
// Prepare LoDs
data.lod.push_back(0);
CHECK(!data.word_data_all.empty());
CHECK(!data.mention_data_all.empty());
CHECK_EQ(data.word_data_all.size(), data.mention_data_all.size());
for (size_t j = 0; j < data.word_data_all.size(); j++) {
// calculate lod
data.lod.push_back(data.lod.back() + data.word_data_all[j].size());
}
} }
batch_iter += batch_size; batch_iter += batch_size;
return data; return data;
...@@ -65,8 +53,8 @@ struct DataRecord { ...@@ -65,8 +53,8 @@ struct DataRecord {
// load mention data // load mention data
std::vector<int64_t> mention_data; std::vector<int64_t> mention_data;
split_to_int64(data[3], ' ', &mention_data); split_to_int64(data[3], ' ', &mention_data);
word_data_all.push_back(std::move(word_data)); word.push_back(std::move(word_data));
mention_data_all.push_back(std::move(mention_data)); mention.push_back(std::move(mention_data));
} }
num_samples = num_lines; num_samples = num_lines;
} }
...@@ -79,9 +67,8 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -79,9 +67,8 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_mention_tensor.name = "mention"; lod_mention_tensor.name = "mention";
auto one_batch = data->NextBatch(); auto one_batch = data->NextBatch();
// assign data // assign data
TensorAssignData<int64_t>(&lod_word_tensor, one_batch.word_data_all, TensorAssignData<int64_t>(&lod_word_tensor, one_batch.word, one_batch.lod);
one_batch.lod); TensorAssignData<int64_t>(&lod_mention_tensor, one_batch.mention,
TensorAssignData<int64_t>(&lod_mention_tensor, one_batch.mention_data_all,
one_batch.lod); one_batch.lod);
// Set inputs. // Set inputs.
input_slots->assign({lod_word_tensor, lod_mention_tensor}); input_slots->assign({lod_word_tensor, lod_mention_tensor});
......
...@@ -18,12 +18,9 @@ namespace paddle { ...@@ -18,12 +18,9 @@ namespace paddle {
namespace inference { namespace inference {
struct DataRecord { struct DataRecord {
std::vector<std::vector<int64_t>> title1_all, title2_all, title3_all, l1_all;
std::vector<std::vector<int64_t>> title1, title2, title3, l1; std::vector<std::vector<int64_t>> title1, title2, title3, l1;
std::vector<size_t> title1_lod, title2_lod, title3_lod, l1_lod; std::vector<size_t> lod1, lod2, lod3, l1_lod;
size_t batch_iter{0}; size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
size_t batch_size{1};
size_t num_samples; // total number of samples
DataRecord() = default; DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1) explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) { : batch_size(batch_size) {
...@@ -33,41 +30,11 @@ struct DataRecord { ...@@ -33,41 +30,11 @@ struct DataRecord {
DataRecord data; DataRecord data;
size_t batch_end = batch_iter + batch_size; size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided. // NOTE skip the final batch, if no enough data is provided.
if (batch_end <= title1_all.size()) { if (batch_end <= title1.size()) {
data.title1_all.assign(title1_all.begin() + batch_iter, GetInputPerBatch(title1, &data.title1, &data.lod1, batch_iter, batch_end);
title1_all.begin() + batch_end); GetInputPerBatch(title2, &data.title2, &data.lod2, batch_iter, batch_end);
data.title2_all.assign(title2_all.begin() + batch_iter, GetInputPerBatch(title3, &data.title3, &data.lod3, batch_iter, batch_end);
title2_all.begin() + batch_end); GetInputPerBatch(l1, &data.l1, &data.l1_lod, batch_iter, batch_end);
data.title3_all.assign(title3_all.begin() + batch_iter,
title3_all.begin() + batch_end);
data.l1_all.assign(l1_all.begin() + batch_iter,
l1_all.begin() + batch_end);
// Prepare LoDs
data.title1_lod.push_back(0);
data.title2_lod.push_back(0);
data.title3_lod.push_back(0);
data.l1_lod.push_back(0);
CHECK(!data.title1_all.empty());
CHECK(!data.title2_all.empty());
CHECK(!data.title3_all.empty());
CHECK(!data.l1_all.empty());
CHECK_EQ(data.title1_all.size(), data.title2_all.size());
CHECK_EQ(data.title1_all.size(), data.title3_all.size());
CHECK_EQ(data.title1_all.size(), data.l1_all.size());
for (size_t j = 0; j < data.title1_all.size(); j++) {
data.title1.push_back(data.title1_all[j]);
data.title2.push_back(data.title2_all[j]);
data.title3.push_back(data.title3_all[j]);
data.l1.push_back(data.l1_all[j]);
// calculate lod
data.title1_lod.push_back(data.title1_lod.back() +
data.title1_all[j].size());
data.title2_lod.push_back(data.title2_lod.back() +
data.title2_all[j].size());
data.title3_lod.push_back(data.title3_lod.back() +
data.title3_all[j].size());
data.l1_lod.push_back(data.l1_lod.back() + data.l1_all[j].size());
}
} }
batch_iter += batch_size; batch_iter += batch_size;
return data; return data;
...@@ -92,10 +59,10 @@ struct DataRecord { ...@@ -92,10 +59,10 @@ struct DataRecord {
// load l1 data // load l1 data
std::vector<int64_t> l1_data; std::vector<int64_t> l1_data;
split_to_int64(data[3], ' ', &l1_data); split_to_int64(data[3], ' ', &l1_data);
title1_all.push_back(std::move(title1_data)); title1.push_back(std::move(title1_data));
title2_all.push_back(std::move(title2_data)); title2.push_back(std::move(title2_data));
title3_all.push_back(std::move(title3_data)); title3.push_back(std::move(title3_data));
l1_all.push_back(std::move(l1_data)); l1.push_back(std::move(l1_data));
} }
num_samples = num_lines; num_samples = num_lines;
} }
...@@ -110,12 +77,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -110,12 +77,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
l1_tensor.name = "l1"; l1_tensor.name = "l1";
auto one_batch = data->NextBatch(); auto one_batch = data->NextBatch();
// assign data // assign data
TensorAssignData<int64_t>(&title1_tensor, one_batch.title1, TensorAssignData<int64_t>(&title1_tensor, one_batch.title1, one_batch.lod1);
one_batch.title1_lod); TensorAssignData<int64_t>(&title2_tensor, one_batch.title2, one_batch.lod2);
TensorAssignData<int64_t>(&title2_tensor, one_batch.title2, TensorAssignData<int64_t>(&title3_tensor, one_batch.title3, one_batch.lod3);
one_batch.title2_lod);
TensorAssignData<int64_t>(&title3_tensor, one_batch.title3,
one_batch.title3_lod);
TensorAssignData<int64_t>(&l1_tensor, one_batch.l1, one_batch.l1_lod); TensorAssignData<int64_t>(&l1_tensor, one_batch.l1, one_batch.l1_lod);
// Set inputs. // Set inputs.
input_slots->assign({title1_tensor, title2_tensor, title3_tensor, l1_tensor}); input_slots->assign({title1_tensor, title2_tensor, title3_tensor, l1_tensor});
......
...@@ -169,6 +169,18 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs, ...@@ -169,6 +169,18 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
(*inputs).emplace_back(input_slots); (*inputs).emplace_back(input_slots);
} }
void GetInputPerBatch(const std::vector<std::vector<int64_t>> &in,
std::vector<std::vector<int64_t>> *out,
std::vector<size_t> *lod, size_t batch_iter,
size_t batch_end) {
lod->clear();
lod->push_back(0);
for (auto it = in.begin() + batch_iter; it < in.begin() + batch_end; it++) {
out->push_back(*it);
lod->push_back(lod->back() + (*it).size()); // calculate lod
}
}
void TestOneThreadPrediction( void TestOneThreadPrediction(
const PaddlePredictor::Config *config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册