未验证 提交 85471533 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #15079 from luotao1/analysis_test

simplify analysis tests
...@@ -113,6 +113,16 @@ static void TensorAssignData(PaddleTensor *tensor, ...@@ -113,6 +113,16 @@ static void TensorAssignData(PaddleTensor *tensor,
} }
} }
template <typename T>
static void TensorAssignData(PaddleTensor *tensor,
const std::vector<std::vector<T>> &data,
const std::vector<size_t> &lod) {
int size = lod[lod.size() - 1];
tensor->shape.assign({size, 1});
tensor->lod.assign({lod});
TensorAssignData(tensor, data);
}
template <typename T> template <typename T>
static int ZeroCopyTensorAssignData(ZeroCopyTensor *tensor, static int ZeroCopyTensorAssignData(ZeroCopyTensor *tensor,
const std::vector<std::vector<T>> &data) { const std::vector<std::vector<T>> &data) {
......
...@@ -98,10 +98,8 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -98,10 +98,8 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
auto one_batch = data->NextBatch(); auto one_batch = data->NextBatch();
PaddleTensor input_tensor; PaddleTensor input_tensor;
input_tensor.name = "word"; input_tensor.name = "word";
input_tensor.shape.assign({static_cast<int>(one_batch.data.size()), 1});
input_tensor.lod.assign({one_batch.lod});
input_tensor.dtype = PaddleDType::INT64; input_tensor.dtype = PaddleDType::INT64;
TensorAssignData<int64_t>(&input_tensor, {one_batch.data}); TensorAssignData<int64_t>(&input_tensor, {one_batch.data}, one_batch.lod);
PADDLE_ENFORCE_EQ(batch_size, static_cast<int>(one_batch.lod.size() - 1)); PADDLE_ENFORCE_EQ(batch_size, static_cast<int>(one_batch.lod.size() - 1));
input_slots->assign({input_tensor}); input_slots->assign({input_tensor});
} }
......
...@@ -19,11 +19,9 @@ namespace inference { ...@@ -19,11 +19,9 @@ namespace inference {
using contrib::AnalysisConfig; using contrib::AnalysisConfig;
struct DataRecord { struct DataRecord {
std::vector<std::vector<int64_t>> query_data_all, title_data_all; std::vector<std::vector<int64_t>> query, title;
std::vector<size_t> lod1, lod2; std::vector<size_t> lod1, lod2;
size_t batch_iter{0}; size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
size_t batch_size{1};
size_t num_samples; // total number of samples
DataRecord() = default; DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1) explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) { : batch_size(batch_size) {
...@@ -33,22 +31,9 @@ struct DataRecord { ...@@ -33,22 +31,9 @@ struct DataRecord {
DataRecord data; DataRecord data;
size_t batch_end = batch_iter + batch_size; size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided. // NOTE skip the final batch, if no enough data is provided.
if (batch_end <= query_data_all.size()) { if (batch_end <= query.size()) {
data.query_data_all.assign(query_data_all.begin() + batch_iter, GetInputPerBatch(query, &data.query, &data.lod1, batch_iter, batch_end);
query_data_all.begin() + batch_end); GetInputPerBatch(title, &data.title, &data.lod2, batch_iter, batch_end);
data.title_data_all.assign(title_data_all.begin() + batch_iter,
title_data_all.begin() + batch_end);
// Prepare LoDs
data.lod1.push_back(0);
data.lod2.push_back(0);
CHECK(!data.query_data_all.empty());
CHECK(!data.title_data_all.empty());
CHECK_EQ(data.query_data_all.size(), data.title_data_all.size());
for (size_t j = 0; j < data.query_data_all.size(); j++) {
// calculate lod
data.lod1.push_back(data.lod1.back() + data.query_data_all[j].size());
data.lod2.push_back(data.lod2.back() + data.title_data_all[j].size());
}
} }
batch_iter += batch_size; batch_iter += batch_size;
return data; return data;
...@@ -67,8 +52,8 @@ struct DataRecord { ...@@ -67,8 +52,8 @@ struct DataRecord {
// load title data // load title data
std::vector<int64_t> title_data; std::vector<int64_t> title_data;
split_to_int64(data[1], ' ', &title_data); split_to_int64(data[1], ' ', &title_data);
query_data_all.push_back(std::move(query_data)); query.push_back(std::move(query_data));
title_data_all.push_back(std::move(title_data)); title.push_back(std::move(title_data));
} }
num_samples = num_lines; num_samples = num_lines;
} }
...@@ -80,15 +65,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -80,15 +65,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_query_tensor.name = "left"; lod_query_tensor.name = "left";
lod_title_tensor.name = "right"; lod_title_tensor.name = "right";
auto one_batch = data->NextBatch(); auto one_batch = data->NextBatch();
int size1 = one_batch.lod1[one_batch.lod1.size() - 1]; // token batch size
int size2 = one_batch.lod2[one_batch.lod2.size() - 1]; // token batch size
lod_query_tensor.shape.assign({size1, 1});
lod_query_tensor.lod.assign({one_batch.lod1});
lod_title_tensor.shape.assign({size2, 1});
lod_title_tensor.lod.assign({one_batch.lod2});
// assign data // assign data
TensorAssignData<int64_t>(&lod_query_tensor, one_batch.query_data_all); TensorAssignData<int64_t>(&lod_query_tensor, one_batch.query, one_batch.lod1);
TensorAssignData<int64_t>(&lod_title_tensor, one_batch.title_data_all); TensorAssignData<int64_t>(&lod_title_tensor, one_batch.title, one_batch.lod2);
// Set inputs. // Set inputs.
input_slots->assign({lod_query_tensor, lod_title_tensor}); input_slots->assign({lod_query_tensor, lod_title_tensor});
for (auto &tensor : *input_slots) { for (auto &tensor : *input_slots) {
......
...@@ -19,11 +19,9 @@ namespace inference { ...@@ -19,11 +19,9 @@ namespace inference {
using contrib::AnalysisConfig; using contrib::AnalysisConfig;
struct DataRecord { struct DataRecord {
std::vector<std::vector<int64_t>> word_data_all, mention_data_all; std::vector<std::vector<int64_t>> word, mention;
std::vector<size_t> lod; // two inputs have the same lod info. std::vector<size_t> lod; // two inputs have the same lod info.
size_t batch_iter{0}; size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
size_t batch_size{1};
size_t num_samples; // total number of samples
DataRecord() = default; DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1) explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) { : batch_size(batch_size) {
...@@ -33,20 +31,10 @@ struct DataRecord { ...@@ -33,20 +31,10 @@ struct DataRecord {
DataRecord data; DataRecord data;
size_t batch_end = batch_iter + batch_size; size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided. // NOTE skip the final batch, if no enough data is provided.
if (batch_end <= word_data_all.size()) { if (batch_end <= word.size()) {
data.word_data_all.assign(word_data_all.begin() + batch_iter, GetInputPerBatch(word, &data.word, &data.lod, batch_iter, batch_end);
word_data_all.begin() + batch_end); GetInputPerBatch(mention, &data.mention, &data.lod, batch_iter,
data.mention_data_all.assign(mention_data_all.begin() + batch_iter, batch_end);
mention_data_all.begin() + batch_end);
// Prepare LoDs
data.lod.push_back(0);
CHECK(!data.word_data_all.empty());
CHECK(!data.mention_data_all.empty());
CHECK_EQ(data.word_data_all.size(), data.mention_data_all.size());
for (size_t j = 0; j < data.word_data_all.size(); j++) {
// calculate lod
data.lod.push_back(data.lod.back() + data.word_data_all[j].size());
}
} }
batch_iter += batch_size; batch_iter += batch_size;
return data; return data;
...@@ -65,8 +53,8 @@ struct DataRecord { ...@@ -65,8 +53,8 @@ struct DataRecord {
// load mention data // load mention data
std::vector<int64_t> mention_data; std::vector<int64_t> mention_data;
split_to_int64(data[3], ' ', &mention_data); split_to_int64(data[3], ' ', &mention_data);
word_data_all.push_back(std::move(word_data)); word.push_back(std::move(word_data));
mention_data_all.push_back(std::move(mention_data)); mention.push_back(std::move(mention_data));
} }
num_samples = num_lines; num_samples = num_lines;
} }
...@@ -78,14 +66,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -78,14 +66,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_word_tensor.name = "word"; lod_word_tensor.name = "word";
lod_mention_tensor.name = "mention"; lod_mention_tensor.name = "mention";
auto one_batch = data->NextBatch(); auto one_batch = data->NextBatch();
int size = one_batch.lod[one_batch.lod.size() - 1]; // token batch size
lod_word_tensor.shape.assign({size, 1});
lod_word_tensor.lod.assign({one_batch.lod});
lod_mention_tensor.shape.assign({size, 1});
lod_mention_tensor.lod.assign({one_batch.lod});
// assign data // assign data
TensorAssignData<int64_t>(&lod_word_tensor, one_batch.word_data_all); TensorAssignData<int64_t>(&lod_word_tensor, one_batch.word, one_batch.lod);
TensorAssignData<int64_t>(&lod_mention_tensor, one_batch.mention_data_all); TensorAssignData<int64_t>(&lod_mention_tensor, one_batch.mention,
one_batch.lod);
// Set inputs. // Set inputs.
input_slots->assign({lod_word_tensor, lod_mention_tensor}); input_slots->assign({lod_word_tensor, lod_mention_tensor});
for (auto &tensor : *input_slots) { for (auto &tensor : *input_slots) {
......
...@@ -18,12 +18,9 @@ namespace paddle { ...@@ -18,12 +18,9 @@ namespace paddle {
namespace inference { namespace inference {
struct DataRecord { struct DataRecord {
std::vector<std::vector<int64_t>> title1_all, title2_all, title3_all, l1_all;
std::vector<std::vector<int64_t>> title1, title2, title3, l1; std::vector<std::vector<int64_t>> title1, title2, title3, l1;
std::vector<size_t> title1_lod, title2_lod, title3_lod, l1_lod; std::vector<size_t> lod1, lod2, lod3, l1_lod;
size_t batch_iter{0}; size_t batch_iter{0}, batch_size{1}, num_samples; // total number of samples
size_t batch_size{1};
size_t num_samples; // total number of samples
DataRecord() = default; DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1) explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) { : batch_size(batch_size) {
...@@ -33,41 +30,11 @@ struct DataRecord { ...@@ -33,41 +30,11 @@ struct DataRecord {
DataRecord data; DataRecord data;
size_t batch_end = batch_iter + batch_size; size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided. // NOTE skip the final batch, if no enough data is provided.
if (batch_end <= title1_all.size()) { if (batch_end <= title1.size()) {
data.title1_all.assign(title1_all.begin() + batch_iter, GetInputPerBatch(title1, &data.title1, &data.lod1, batch_iter, batch_end);
title1_all.begin() + batch_end); GetInputPerBatch(title2, &data.title2, &data.lod2, batch_iter, batch_end);
data.title2_all.assign(title2_all.begin() + batch_iter, GetInputPerBatch(title3, &data.title3, &data.lod3, batch_iter, batch_end);
title2_all.begin() + batch_end); GetInputPerBatch(l1, &data.l1, &data.l1_lod, batch_iter, batch_end);
data.title3_all.assign(title3_all.begin() + batch_iter,
title3_all.begin() + batch_end);
data.l1_all.assign(l1_all.begin() + batch_iter,
l1_all.begin() + batch_end);
// Prepare LoDs
data.title1_lod.push_back(0);
data.title2_lod.push_back(0);
data.title3_lod.push_back(0);
data.l1_lod.push_back(0);
CHECK(!data.title1_all.empty());
CHECK(!data.title2_all.empty());
CHECK(!data.title3_all.empty());
CHECK(!data.l1_all.empty());
CHECK_EQ(data.title1_all.size(), data.title2_all.size());
CHECK_EQ(data.title1_all.size(), data.title3_all.size());
CHECK_EQ(data.title1_all.size(), data.l1_all.size());
for (size_t j = 0; j < data.title1_all.size(); j++) {
data.title1.push_back(data.title1_all[j]);
data.title2.push_back(data.title2_all[j]);
data.title3.push_back(data.title3_all[j]);
data.l1.push_back(data.l1_all[j]);
// calculate lod
data.title1_lod.push_back(data.title1_lod.back() +
data.title1_all[j].size());
data.title2_lod.push_back(data.title2_lod.back() +
data.title2_all[j].size());
data.title3_lod.push_back(data.title3_lod.back() +
data.title3_all[j].size());
data.l1_lod.push_back(data.l1_lod.back() + data.l1_all[j].size());
}
} }
batch_iter += batch_size; batch_iter += batch_size;
return data; return data;
...@@ -92,10 +59,10 @@ struct DataRecord { ...@@ -92,10 +59,10 @@ struct DataRecord {
// load l1 data // load l1 data
std::vector<int64_t> l1_data; std::vector<int64_t> l1_data;
split_to_int64(data[3], ' ', &l1_data); split_to_int64(data[3], ' ', &l1_data);
title1_all.push_back(std::move(title1_data)); title1.push_back(std::move(title1_data));
title2_all.push_back(std::move(title2_data)); title2.push_back(std::move(title2_data));
title3_all.push_back(std::move(title3_data)); title3.push_back(std::move(title3_data));
l1_all.push_back(std::move(l1_data)); l1.push_back(std::move(l1_data));
} }
num_samples = num_lines; num_samples = num_lines;
} }
...@@ -109,24 +76,11 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data, ...@@ -109,24 +76,11 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
title3_tensor.name = "title3"; title3_tensor.name = "title3";
l1_tensor.name = "l1"; l1_tensor.name = "l1";
auto one_batch = data->NextBatch(); auto one_batch = data->NextBatch();
int title1_size = one_batch.title1_lod[one_batch.title1_lod.size() - 1];
title1_tensor.shape.assign({title1_size, 1});
title1_tensor.lod.assign({one_batch.title1_lod});
int title2_size = one_batch.title2_lod[one_batch.title2_lod.size() - 1];
title2_tensor.shape.assign({title2_size, 1});
title2_tensor.lod.assign({one_batch.title2_lod});
int title3_size = one_batch.title3_lod[one_batch.title3_lod.size() - 1];
title3_tensor.shape.assign({title3_size, 1});
title3_tensor.lod.assign({one_batch.title3_lod});
int l1_size = one_batch.l1_lod[one_batch.l1_lod.size() - 1];
l1_tensor.shape.assign({l1_size, 1});
l1_tensor.lod.assign({one_batch.l1_lod});
// assign data // assign data
TensorAssignData<int64_t>(&title1_tensor, one_batch.title1); TensorAssignData<int64_t>(&title1_tensor, one_batch.title1, one_batch.lod1);
TensorAssignData<int64_t>(&title2_tensor, one_batch.title2); TensorAssignData<int64_t>(&title2_tensor, one_batch.title2, one_batch.lod2);
TensorAssignData<int64_t>(&title3_tensor, one_batch.title3); TensorAssignData<int64_t>(&title3_tensor, one_batch.title3, one_batch.lod3);
TensorAssignData<int64_t>(&l1_tensor, one_batch.l1); TensorAssignData<int64_t>(&l1_tensor, one_batch.l1, one_batch.l1_lod);
// Set inputs. // Set inputs.
input_slots->assign({title1_tensor, title2_tensor, title3_tensor, l1_tensor}); input_slots->assign({title1_tensor, title2_tensor, title3_tensor, l1_tensor});
for (auto &tensor : *input_slots) { for (auto &tensor : *input_slots) {
......
...@@ -176,6 +176,18 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs, ...@@ -176,6 +176,18 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
(*inputs).emplace_back(input_slots); (*inputs).emplace_back(input_slots);
} }
void GetInputPerBatch(const std::vector<std::vector<int64_t>> &in,
std::vector<std::vector<int64_t>> *out,
std::vector<size_t> *lod, size_t batch_iter,
size_t batch_end) {
lod->clear();
lod->push_back(0);
for (auto it = in.begin() + batch_iter; it < in.begin() + batch_end; it++) {
out->push_back(*it);
lod->push_back(lod->back() + (*it).size()); // calculate lod
}
}
void TestOneThreadPrediction( void TestOneThreadPrediction(
const PaddlePredictor::Config *config, const PaddlePredictor::Config *config,
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册