From 9c7fde45a7fec127e3f7dc7e1c161ec647e5683b Mon Sep 17 00:00:00 2001 From: luotao1 Date: Thu, 23 Aug 2018 13:32:02 +0800 Subject: [PATCH] enhance test_analyzer to profile ditu inference demo --- .../ir/graph_pattern_detecter_tester.cc | 4 +- paddle/fluid/framework/selected_rows.cc | 4 +- .../inference/analysis/analyzer_tester.cc | 48 +++++++++++-------- paddle/fluid/operators/sampling_id_op.h | 2 +- paddle/scripts/paddle_build.sh | 2 - 5 files changed, 32 insertions(+), 28 deletions(-) diff --git a/paddle/fluid/framework/ir/graph_pattern_detecter_tester.cc b/paddle/fluid/framework/ir/graph_pattern_detecter_tester.cc index 993c885a810..06f9df55469 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detecter_tester.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detecter_tester.cc @@ -163,8 +163,8 @@ TEST(GraphPatternDetecter, MultiSubgraph) { // 3. Detect op2 -> var2 -> op4 // 4. Detect op2 -> var3 -> op5 // But 2 and 3 and 4 overlapped, so keep 2, so the final choices are 1 and 2 - ASSERT_GE(count, 1UL); - ASSERT_LE(count, 2UL); + ASSERT_GE(count, 1); + ASSERT_LE(count, 2); } } // namespace ir diff --git a/paddle/fluid/framework/selected_rows.cc b/paddle/fluid/framework/selected_rows.cc index c202b0a5be1..a4319ffabb0 100644 --- a/paddle/fluid/framework/selected_rows.cc +++ b/paddle/fluid/framework/selected_rows.cc @@ -139,7 +139,7 @@ int64_t SelectedRows::AutoGrownIndex(int64_t key, bool auto_grown) { } auto write_iter = id_to_index_.find(key); if (write_iter == id_to_index_.end()) { - size_t row_num = rows_.size(); + int row_num = rows_.size(); if (row_num == value_->dims()[0]) { rwlock_->UNLock(); PADDLE_THROW("selected rows is full, then length exceed %d", row_num); @@ -182,7 +182,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value, PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0], "output tensor should have the same shape with table " "except the dims[0]."); - for (size_t i = 0; i < ids.numel(); ++i) { + for (int i = 0; i < ids.numel(); ++i) { int64_t index = AutoGrownIndex(ids.data()[i], auto_grown); framework::VisitDataType( framework::ToDataType(value_->type()), diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc index 52f5c4f5aea..baa7600283a 100644 --- a/paddle/fluid/inference/analysis/analyzer_tester.cc +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -23,6 +23,8 @@ DEFINE_string(infer_ditu_rnn_model, "", "model path for ditu RNN"); DEFINE_string(infer_ditu_rnn_data, "", "data path for ditu RNN"); +DEFINE_int32(batch_size, 10, "batch size."); +DEFINE_int32(repeat, 1, "Running the inference program repeat times."); namespace paddle { namespace inference { @@ -92,7 +94,7 @@ struct DataRecord { size_t batch_iter{0}; size_t batch_size{1}; DataRecord() = default; - DataRecord(const std::string &path, int batch_size = 1) + explicit DataRecord(const std::string &path, int batch_size = 1) : batch_size(batch_size) { Load(path); } @@ -165,7 +167,6 @@ struct DataRecord { }; void PrepareInputs(std::vector *input_slots, DataRecord *data, int batch_size) { - // DataRecord data(FLAGS_datapath, batch_size); PaddleTensor lod_attention_tensor, init_zero_tensor, lod_tensor_tensor, week_tensor, minute_tensor; lod_attention_tensor.name = "data_lod_attention"; @@ -174,28 +175,33 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, week_tensor.name = "week"; minute_tensor.name = "minute"; auto one_batch = data->NextBatch(); - // clang-format off - std::vector rnn_link_data_shape - ({static_cast(one_batch.rnn_link_data.size()), static_cast(one_batch.rnn_link_data.front().size())}); + std::vector rnn_link_data_shape( + {static_cast(one_batch.rnn_link_data.size()), + static_cast(one_batch.rnn_link_data.front().size())}); lod_attention_tensor.shape.assign({1, 2}); lod_attention_tensor.lod.assign({one_batch.lod1, one_batch.lod2}); init_zero_tensor.shape.assign({batch_size, 15}); init_zero_tensor.lod.assign({one_batch.lod3}); lod_tensor_tensor.shape = rnn_link_data_shape; lod_tensor_tensor.lod.assign({one_batch.lod1}); - week_tensor.shape.assign({(int) one_batch.rnn_week_datas.size(), (int) one_batch.rnn_week_datas.front().size()}); + // clang-format off + week_tensor.shape.assign( + {static_cast(one_batch.rnn_week_datas.size()), + static_cast(one_batch.rnn_week_datas.front().size())}); week_tensor.lod.assign({one_batch.lod3}); - minute_tensor.shape.assign({(int) one_batch.rnn_minute_datas.size(), - (int) one_batch.rnn_minute_datas.front().size()}); + minute_tensor.shape.assign( + {static_cast(one_batch.rnn_minute_datas.size()), + static_cast(one_batch.rnn_minute_datas.front().size())}); minute_tensor.lod.assign({one_batch.lod3}); + // clang-format on // assign data - TensorAssignData(&lod_attention_tensor, std::vector>({{0, 0}})); + TensorAssignData(&lod_attention_tensor, + std::vector>({{0, 0}})); std::vector tmp_zeros(batch_size * 15, 0.); TensorAssignData(&init_zero_tensor, {tmp_zeros}); TensorAssignData(&lod_tensor_tensor, one_batch.rnn_link_data); TensorAssignData(&week_tensor, one_batch.rnn_week_datas); TensorAssignData(&minute_tensor, one_batch.rnn_minute_datas); - // clang-format on // Set inputs. auto init_zero_tensor1 = init_zero_tensor; init_zero_tensor1.name = "hidden_init"; @@ -231,12 +237,9 @@ std::string DescribeTensor(const PaddleTensor &tensor) { os << "\n"; os << " - data: "; - // clang-format off - int dim = std::accumulate(tensor.shape.begin(), - tensor.shape.end(), - 1, - [](int a, int b) { return a * b; }); // clang-format on - for (size_t i = 0; i < dim; i++) { + int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1, + [](int a, int b) { return a * b; }); + for (int i = 0; i < dim; i++) { os << static_cast(tensor.data.data())[i] << " "; } os << '\n'; @@ -300,13 +303,16 @@ void TestDituRNNPrediction(const std::string &model_path, for (int i = 0; i < num_times; i++) { predictor->Run(input_slots, &outputs); } - LOG(INFO) << "time/batch: " << timer.toc() / num_times; + LOG(INFO) << "===========profile result==========="; + LOG(INFO) << "batch_size: " << batch_size << ", repeat: " << num_times + << ", latency: " << timer.toc() / num_times << "ms"; + LOG(INFO) << "====================================="; for (auto &out : outputs) { size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, [](int a, int b) { return a * b; }); float *data = static_cast(out.data.data()); - for (int i = 0; + for (size_t i = 0; i < std::min(sizeof(ditu_rnn_target_data) / sizeof(float), size); i++) { EXPECT_NEAR(data[i], ditu_rnn_target_data[i], 1e-3); @@ -336,7 +342,7 @@ TEST(Analyzer, SupportIRPass) { // Directly infer with the original model. TEST(Analyzer, DituRNN_without_analysis) { TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data, - 10, false, false); + FLAGS_batch_size, false, false, FLAGS_repeat); } // Inference with the original model with the analysis turned on, the analysis @@ -344,14 +350,14 @@ TEST(Analyzer, DituRNN_without_analysis) { TEST(Analyzer, DituRNN_with_analysis) { LOG(INFO) << "ditu rnn with analysis"; TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data, - 10, true, false, 1); + FLAGS_batch_size, true, false, FLAGS_repeat); } // Inference with analysis and IR. The IR module will fuse some large kernels. TEST(Analyzer, DituRNN_with_analysis_with_IR) { LOG(INFO) << "ditu rnn with analysis and IR fuse"; TestDituRNNPrediction(FLAGS_infer_ditu_rnn_model, FLAGS_infer_ditu_rnn_data, - 10, true, true, 1); + FLAGS_batch_size, true, true, FLAGS_repeat); } } // namespace analysis diff --git a/paddle/fluid/operators/sampling_id_op.h b/paddle/fluid/operators/sampling_id_op.h index f730a9746da..e1dd4539b30 100644 --- a/paddle/fluid/operators/sampling_id_op.h +++ b/paddle/fluid/operators/sampling_id_op.h @@ -54,7 +54,7 @@ class SamplingIdKernel : public framework::OpKernel { static_cast(context.Attr("max"))); std::vector ids(batch_size); - for (size_t i = 0; i < batch_size; ++i) { + for (int i = 0; i < batch_size; ++i) { T r = dist(engine); int idx = width - 1; for (int j = 0; j < width; ++j) { diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 8460f93b841..f2a9a6b3b9a 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -116,7 +116,6 @@ function cmake_gen() { -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_CONTRIB=${WITH_CONTRIB:-ON} -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} - -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} -DPY_VERSION=${PY_VERSION:-2.7} ======================================== EOF @@ -146,7 +145,6 @@ EOF -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \ -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \ - -DWITH_INFERENCE_DEMO=${WITH_INFERENCE_DEMO:-ON} \ -DPY_VERSION=${PY_VERSION:-2.7} } -- GitLab