提交 9c7fde45 编写于 作者: L luotao1

enhance test_analyzer to profile ditu inference demo

上级 decda738
......@@ -163,8 +163,8 @@ TEST(GraphPatternDetecter, MultiSubgraph) {
// 3. Detect op2 -> var2 -> op4
// 4. Detect op2 -> var3 -> op5
// But 2 and 3 and 4 overlapped, so keep 2, so the final choices are 1 and 2
ASSERT_GE(count, 1UL);
ASSERT_LE(count, 2UL);
ASSERT_GE(count, 1);
ASSERT_LE(count, 2);
}
} // namespace ir
......
......@@ -139,7 +139,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown) {
}
auto write_iter = id_to_index_.find(key);
if (write_iter == id_to_index_.end()) {
size_t row_num = rows_.size();
int row_num = rows_.size();
if (row_num == value_->dims()[0]) {
rwlock_->UNLock();
PADDLE_THROW("selected rows is full, then length exceed %d", row_num);
......@@ -182,7 +182,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
"output tensor should have the same shape with table "
"except the dims[0].");
for (size_t i = 0; i < ids.numel(); ++i) {
for (int i = 0; i < ids.numel(); ++i) {
int64_t index = AutoGrownIndex(ids.data<int64_t>()[i], auto_grown);
framework::VisitDataType(
framework::ToDataType(value_->type()),
......
......@@ -23,6 +23,8 @@
DEFINE_string(infer_ditu_rnn_model, "", "model path for ditu RNN");
DEFINE_string(infer_ditu_rnn_data, "", "data path for ditu RNN");
DEFINE_int32(batch_size, 10, "batch size.");
DEFINE_int32(repeat, 1, "Running the inference program repeat times.");
namespace paddle {
namespace inference {
......@@ -92,7 +94,7 @@ struct DataRecord {
size_t batch_iter{0};
size_t batch_size{1};
DataRecord() = default;
DataRecord(const std::string &path, int batch_size = 1)
explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) {
Load(path);
}
......@@ -165,7 +167,6 @@ struct DataRecord {
};
void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
int batch_size) {
// DataRecord data(FLAGS_datapath, batch_size);
PaddleTensor lod_attention_tensor, init_zero_tensor, lod_tensor_tensor,
week_tensor, minute_tensor;
lod_attention_tensor.name = "data_lod_attention";
......@@ -174,28 +175,33 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
week_tensor.name = "week";
minute_tensor.name = "minute";
auto one_batch = data->NextBatch();
// clang-format off
std::vector<int> rnn_link_data_shape
({static_cast<int>(one_batch.rnn_link_data.size()), static_cast<int>(one_batch.rnn_link_data.front().size())});
std::vector<int> rnn_link_data_shape(
{static_cast<int>(one_batch.rnn_link_data.size()),
static_cast<int>(one_batch.rnn_link_data.front().size())});
lod_attention_tensor.shape.assign({1, 2});
lod_attention_tensor.lod.assign({one_batch.lod1, one_batch.lod2});
init_zero_tensor.shape.assign({batch_size, 15});
init_zero_tensor.lod.assign({one_batch.lod3});
lod_tensor_tensor.shape = rnn_link_data_shape;
lod_tensor_tensor.lod.assign({one_batch.lod1});
week_tensor.shape.assign({(int) one_batch.rnn_week_datas.size(), (int) one_batch.rnn_week_datas.front().size()});
// clang-format off
week_tensor.shape.assign(
{static_cast<int>(one_batch.rnn_week_datas.size()),
static_cast<int>(one_batch.rnn_week_datas.front().size())});
week_tensor.lod.assign({one_batch.lod3});
minute_tensor.shape.assign({(int) one_batch.rnn_minute_datas.size(),
(int) one_batch.rnn_minute_datas.front().size()});
minute_tensor.shape.assign(
{static_cast<int>(one_batch.rnn_minute_datas.size()),
static_cast<int>(one_batch.rnn_minute_datas.front().size())});
minute_tensor.lod.assign({one_batch.lod3});
// clang-format on
// assign data
TensorAssignData(&lod_attention_tensor, std::vector<std::vector<float>>({{0, 0}}));
TensorAssignData(&lod_attention_tensor,
std::vector<std::vector<float>>({{0, 0}}));
std::vector<float> tmp_zeros(batch_size * 15, 0.);
TensorAssignData(&init_zero_tensor, {tmp_zeros});
TensorAssignData(&lod_tensor_tensor, one_batch.rnn_link_data);
TensorAssignData(&week_tensor, one_batch.rnn_week_datas);
TensorAssignData(&minute_tensor, one_batch.rnn_minute_datas);
// clang-format on
// Set inputs.
auto init_zero_tensor1 = init_zero_tensor;
init_zero_tensor1.name = "hidden_init";
......@@ -231,12 +237,9 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
os << "\n";
os << " - data: ";
// clang-format off
int dim = std::accumulate(tensor.shape.begin(),
tensor.shape.end(),
1,
[](int a, int b) { return a * b; }); // clang-format on
for (size_t i = 0; i < dim; i++) {
int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1,
[](int a, int b) { return a * b; });
for (int i = 0; i < dim; i++) {
os << static_cast<float *>(tensor.data.data())[i] << " ";
}
os << '\n';
......@@ -300,13 +303,16 @@ void TestDituRNNPrediction(const std::string &model_path,
for (int i = 0; i < num_times; i++) {
predictor->Run(input_slots, &outputs);
}
LOG(INFO) << "time/batch: " << timer.toc() / num_times;
LOG(INFO) << "===========profile result===========";
LOG(INFO) << "batch_size: " << batch_size << ", repeat: " << num_times
<< ", latency: " << timer.toc() / num_times << "ms";
LOG(INFO) << "=====================================";
for (auto &out : outputs) {
size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
[](int a, int b) { return a * b; });
float *data = static_cast<float *>(out.data.data());
for (int i = 0;
for (size_t i = 0;
i < std::min(sizeof(ditu_rnn_target_data) / sizeof(float), size);
i++) {
EXPECT_NEAR(data[i], ditu_rnn_target_data[i], 1e-3);
......@@ -336,7 +342,7 @@ TEST(Analyzer, SupportIRPass) {
// Directly infer with the original model.
TEST(Analyzer, DituRNN_without_analysis) {
TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
10, false, false);
FLAGS_batch_size, false, false, FLAGS_repeat);
}
// Inference with the original model with the analysis turned on, the analysis
......@@ -344,14 +350,14 @@ TEST(Analyzer, DituRNN_without_analysis) {
TEST(Analyzer, DituRNN_with_analysis) {
LOG(INFO) << "ditu rnn with analysis";
TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
10, true, false, 1);
FLAGS_batch_size, true, false, FLAGS_repeat);
}
// Inference with analysis and IR. The IR module will fuse some large kernels.
TEST(Analyzer, DituRNN_with_analysis_with_IR) {
LOG(INFO) << "ditu rnn with analysis and IR fuse";
TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data,
10, true, true, 1);
FLAGS_batch_size, true, true, FLAGS_repeat);
}
} // namespace analysis
......
......@@ -54,7 +54,7 @@ class SamplingIdKernel : public framework::OpKernel<T> {
static_cast<T>(context.Attr<float>("max")));
std::vector<T> ids(batch_size);
for (size_t i = 0; i < batch_size; ++i) {
for (int i = 0; i < batch_size; ++i) {
T r = dist(engine);
int idx = width - 1;
for (int j = 0; j < width; ++j) {
......
......@@ -116,7 +116,6 @@ function cmake_gen() {
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_CONTRIB=${WITH_CONTRIB:-ON}
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF}
-DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON}
-DPY_VERSION=${PY_VERSION:-2.7}
========================================
EOF
......@@ -146,7 +145,6 @@ EOF
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \
-DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} \
-DPY_VERSION=${PY_VERSION:-2.7}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册