diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index bf893e32569f4b50a583ab6f43cb214ec3620e09..36bbec473114cfd2e68c97a53264957477ade3fb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -103,108 +103,74 @@ void GetOneBatch(std::vector *input_slots, DataRecord *data, input_slots->assign({input_tensor}); } -const int64_t lac_ref_data[] = {24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, - 25, 25, 25, 25, 44, 24, 25, 25, 25, 36, 42, 43, - 44, 14, 15, 44, 14, 15, 44, 14, 15, 44, 38, 39, - 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23}; - -void TestLACPrediction(const std::string &model_path, - const std::string &data_file, const int batch_size, - const int repeat, bool use_analysis = false) { - AnalysisConfig cfg; - cfg.model_dir = model_path; - cfg.use_gpu = false; - cfg.device = 0; - cfg.specify_input_name = true; - cfg.enable_ir_optim = true; +void SetConfig(AnalysisConfig *cfg) { + cfg->model_dir = FLAGS_infer_model; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} - std::vector input_slots, outputs_slots; - DataRecord data(data_file, batch_size); - GetOneBatch(&input_slots, &data, batch_size); - std::unique_ptr predictor; - if (use_analysis) { - predictor = - CreatePaddlePredictor(cfg); - } else { - predictor = - CreatePaddlePredictor(cfg); - } - for (int i = 0; i < FLAGS_burning; i++) { - predictor->Run(input_slots, &outputs_slots); +void SetInput(std::vector> *inputs) { + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + std::vector input_slots; + int epoch = FLAGS_test_all_data ? data.batched_datas.size() : 1; + LOG(INFO) << "number of samples: " << epoch; + for (int bid = 0; bid < epoch; ++bid) { + GetOneBatch(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); } - Timer timer; - if (FLAGS_test_all_data) { - LOG(INFO) << "test all data"; - std::vector> input_slots_all; - for (size_t bid = 0; bid < data.batched_datas.size(); ++bid) { - GetOneBatch(&input_slots, &data, batch_size); - input_slots_all.emplace_back(input_slots); - } - LOG(INFO) << "total number of samples: " << data.datasets.size(); - TestPrediction(cfg, input_slots_all, &outputs_slots, FLAGS_num_threads); - return; - } - timer.tic(); - for (int i = 0; i < repeat; i++) { - predictor->Run(input_slots, &outputs_slots); - } - PrintTime(batch_size, repeat, 1, 0, timer.toc() / repeat); +} - // check result - EXPECT_EQ(outputs_slots.size(), 1UL); - auto &out = outputs_slots[0]; - size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); - size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t); - PADDLE_ENFORCE_GT(size, 0); - EXPECT_GE(size, batch1_size); - int64_t *pdata = static_cast(out.data.data()); - for (size_t i = 0; i < batch1_size; ++i) { - EXPECT_EQ(pdata[i], lac_ref_data[i]); - } +// Easy for profiling independently. +TEST(Analyzer_LAC, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; - if (use_analysis) { - // run once for comparion as reference - auto ref_predictor = - CreatePaddlePredictor(cfg); - std::vector ref_outputs_slots; - ref_predictor->Run(input_slots, &ref_outputs_slots); - CompareResult(ref_outputs_slots, outputs_slots); + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); - AnalysisPredictor *analysis_predictor = - dynamic_cast(predictor.get()); - auto &fuse_statis = analysis_predictor->analysis_argument() - .Get>( - framework::ir::kFuseStatisAttr); - for (auto &item : fuse_statis) { - LOG(INFO) << "fused " << item.first << " " << item.second; - } - int num_ops = 0; - for (auto &node : - analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { - if (node->IsFunction()) { - ++num_ops; - } + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + // the first inference result + const int64_t lac_ref_data[] = { + 24, 25, 25, 25, 38, 30, 31, 14, 15, 44, 24, 25, 25, 25, 25, 25, + 44, 24, 25, 25, 25, 36, 42, 43, 44, 14, 15, 44, 14, 15, 44, 14, + 15, 44, 38, 39, 14, 15, 44, 22, 23, 23, 23, 23, 23, 23, 23}; + PADDLE_ENFORCE_EQ(outputs.size(), 1UL); + size_t size = GetSize(outputs[0]); + size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t); + PADDLE_ENFORCE_GE(size, batch1_size); + int64_t *pdata = static_cast(outputs[0].data.data()); + for (size_t i = 0; i < batch1_size; ++i) { + EXPECT_EQ(pdata[i], lac_ref_data[i]); } - LOG(INFO) << "has num ops: " << num_ops; - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 4); - EXPECT_EQ(num_ops, 11); } } -TEST(Analyzer_LAC, native) { - LOG(INFO) << "LAC with native"; - TestLACPrediction(FLAGS_infer_model, FLAGS_infer_data, FLAGS_batch_size, - FLAGS_repeat); +// Check the fuse status +TEST(Analyzer_LAC, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + + int num_ops; + auto fuse_statis = GetFuseStatis(cfg, &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 4); + EXPECT_EQ(num_ops, 11); } -TEST(Analyzer_LAC, analysis) { - LOG(INFO) << "LAC with analysis"; - TestLACPrediction(FLAGS_infer_model, FLAGS_infer_data, FLAGS_batch_size, - FLAGS_repeat, true); +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_LAC, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); } } // namespace analysis diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index f8c651e32f7e2ce1d8ced0e6774ffd555d351167..8cf230a51d05c3a141f7cfd4e30bf30f064f0989 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -95,97 +95,73 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -// the first inference result -const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, - 48, 39, 38, 16, 25}; - -void TestChineseNERPrediction(bool use_analysis) { - AnalysisConfig cfg; - cfg.prog_file = FLAGS_infer_model + "/__model__"; - cfg.param_file = FLAGS_infer_model + "/param"; - cfg.use_gpu = false; - cfg.device = 0; - cfg.specify_input_name = true; - cfg.enable_ir_optim = true; - - std::vector input_slots, outputs; - std::unique_ptr predictor; - Timer timer; - if (use_analysis) { - predictor = - CreatePaddlePredictor(cfg); - } else { - predictor = - CreatePaddlePredictor(cfg); - } +void SetConfig(AnalysisConfig *cfg) { + cfg->prog_file = FLAGS_infer_model + "/__model__"; + cfg->param_file = FLAGS_infer_model + "/param"; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} - if (FLAGS_test_all_data) { - LOG(INFO) << "test all data"; - DataRecord data(FLAGS_infer_data, FLAGS_batch_size); - std::vector> input_slots_all; - for (size_t bid = 0; bid < data.num_samples / FLAGS_batch_size; ++bid) { - PrepareInputs(&input_slots, &data, FLAGS_batch_size); - input_slots_all.emplace_back(input_slots); - } - LOG(INFO) << "total number of samples: " << data.num_samples; - TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); - return; - } - // Prepare inputs. +void SetInput(std::vector> *inputs) { DataRecord data(FLAGS_infer_data, FLAGS_batch_size); - PrepareInputs(&input_slots, &data, FLAGS_batch_size); - - timer.tic(); - for (int i = 0; i < FLAGS_repeat; i++) { - predictor->Run(input_slots, &outputs); + std::vector input_slots; + int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1; + LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size; + for (int bid = 0; bid < epoch; ++bid) { + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); } - PrintTime(FLAGS_batch_size, FLAGS_repeat, 1, 0, timer.toc() / FLAGS_repeat); +} - PADDLE_ENFORCE(outputs.size(), 1UL); - auto &out = outputs[0]; - size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); - PADDLE_ENFORCE_GT(size, 0); - int64_t *result = static_cast(out.data.data()); - for (size_t i = 0; i < std::min(11UL, size); i++) { - PADDLE_ENFORCE(result[i], chinese_ner_result_data[i]); - } +// Easy for profiling independently. +TEST(Analyzer_Chinese_ner, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; - if (use_analysis) { - // run once for comparion as reference - auto ref_predictor = - CreatePaddlePredictor(cfg); - std::vector ref_outputs_slots; - ref_predictor->Run(input_slots, &ref_outputs_slots); - CompareResult(ref_outputs_slots, outputs); + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); - AnalysisPredictor *analysis_predictor = - dynamic_cast(predictor.get()); - auto &fuse_statis = analysis_predictor->analysis_argument() - .Get>( - framework::ir::kFuseStatisAttr); - for (auto &item : fuse_statis) { - LOG(INFO) << "fused " << item.first << " " << item.second; - } - int num_ops = 0; - for (auto &node : - analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { - if (node->IsFunction()) { - ++num_ops; - } + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + // the first inference result + const int chinese_ner_result_data[] = {30, 45, 41, 48, 17, 26, + 48, 39, 38, 16, 25}; + PADDLE_ENFORCE_EQ(outputs.size(), 1UL); + size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(size, 0); + int64_t *result = static_cast(outputs[0].data.data()); + for (size_t i = 0; i < std::min(11UL, size); i++) { + EXPECT_EQ(result[i], chinese_ner_result_data[i]); } - LOG(INFO) << "has num ops: " << num_ops; - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 2); - EXPECT_EQ(num_ops, 14); } } -TEST(Analyzer_Chinese_ner, native) { TestChineseNERPrediction(false); } +// Check the fuse status +TEST(Analyzer_Chinese_ner, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); -TEST(Analyzer_Chinese_ner, analysis) { TestChineseNERPrediction(true); } + int num_ops; + auto fuse_statis = GetFuseStatis(cfg, &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + ASSERT_TRUE(fuse_statis.count("fc_gru_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_gru_fuse"), 2); + EXPECT_EQ(num_ops, 14); +} + +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_Chinese_ner, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); +} } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index df96be544eaf51c52aa5592966f499fad91aab82..14bdf76efc71b326bd130858ea246be81c9bd45c 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -25,6 +25,7 @@ struct DataRecord { std::vector lod1, lod2, lod3; std::vector> rnn_link_data, rnn_week_datas, rnn_minute_datas; + size_t num_samples; // total number of samples size_t batch_iter{0}; size_t batch_size{1}; DataRecord() = default; @@ -97,6 +98,7 @@ struct DataRecord { week_data_all.push_back(std::move(week_data)); minute_data_all.push_back(std::move(minute_data)); } + num_samples = num_lines; } }; void PrepareInputs(std::vector *input_slots, DataRecord *data, @@ -147,89 +149,72 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -// Test with a really complicate model. -void TestRNN1Prediction(bool use_analysis, bool activate_ir, int num_threads) { - AnalysisConfig config; - config.prog_file = FLAGS_infer_model + "/__model__"; - config.param_file = FLAGS_infer_model + "/param"; - config.use_gpu = false; - config.device = 0; - config.specify_input_name = true; - config.enable_ir_optim = activate_ir; - PADDLE_ENFORCE(config.ir_mode == - AnalysisConfig::IrPassMode::kExclude); // default - config.ir_passes.clear(); // Do not exclude any pass. - - int batch_size = FLAGS_batch_size; +void SetConfig(AnalysisConfig *cfg) { + cfg->prog_file = FLAGS_infer_model + "/__model__"; + cfg->param_file = FLAGS_infer_model + "/param"; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; + cfg->ir_passes.clear(); // Do not exclude any pass. +} - auto base_predictor = - CreatePaddlePredictor(config); - auto predictor = - CreatePaddlePredictor( - config); +void SetInput(std::vector> *inputs) { + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); std::vector input_slots; - DataRecord data(FLAGS_infer_data, batch_size); - // Prepare inputs. - PrepareInputs(&input_slots, &data, batch_size); - std::vector outputs, base_outputs; + int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1; + LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size; + for (int bid = 0; bid < epoch; ++bid) { + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); + } +} - base_predictor->Run(input_slots, &base_outputs); +// Easy for profiling independently. +TEST(Analyzer_rnn1, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; std::vector> input_slots_all; - input_slots_all.emplace_back(input_slots); - if (num_threads == 1) { - TestOneThreadPrediction(config, input_slots_all, &outputs); - CompareResult(outputs, base_outputs); - } else { - // only return the output of first thread - TestMultiThreadPrediction(config, input_slots_all, &outputs, num_threads); - } + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); +} - if (use_analysis && activate_ir) { - AnalysisPredictor *analysis_predictor = - dynamic_cast(predictor.get()); - auto &fuse_statis = analysis_predictor->analysis_argument() - .Get>( - framework::ir::kFuseStatisAttr); - for (auto &item : fuse_statis) { - LOG(INFO) << "fused " << item.first << " " << item.second; - } +// Check the fuse status +TEST(Analyzer_rnn1, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); - int num_ops = 0; - for (auto &node : - analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { - if (node->IsFunction()) { - ++num_ops; - } - } - LOG(INFO) << "has num ops: " << num_ops; + int num_ops; + auto fuse_statis = GetFuseStatis(cfg, &num_ops); + ASSERT_TRUE(fuse_statis.count("fc_fuse")); + EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); + EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM + EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1); + EXPECT_EQ(num_ops, + 13); // After graph optimization, only 13 operators exists. +} - ASSERT_TRUE(fuse_statis.count("fc_fuse")); - EXPECT_EQ(fuse_statis.at("fc_fuse"), 1); - EXPECT_EQ(fuse_statis.at("fc_nobias_lstm_fuse"), 2); // bi-directional LSTM - EXPECT_EQ(fuse_statis.at("seq_concat_fc_fuse"), 1); - EXPECT_EQ(num_ops, - 13); // After graph optimization, only 13 operators exists. - } +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_rnn1, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); } -// Inference with analysis and IR, easy for profiling independently. -TEST(Analyzer, rnn1) { TestRNN1Prediction(true, true, FLAGS_num_threads); } +// Test Multi-Thread. +TEST(Analyzer_rnn1, multi_thread) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; -// Other unit-tests of RNN1, test different options of use_analysis, -// activate_ir and multi-threads. -TEST(Analyzer, RNN_tests) { - int num_threads[2] = {1, 4}; - for (auto i : num_threads) { - // Directly infer with the original model. - TestRNN1Prediction(false, false, i); - // Inference with the original model with the analysis turned on, the - // analysis module will transform the program to a data flow graph. - TestRNN1Prediction(true, false, i); - // Inference with analysis and IR. The IR module will fuse some large - // kernels. - TestRNN1Prediction(true, true, i); - } + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, 4 /* num_threads */); } } // namespace inference diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc index c40ea58eea9c10a85acf84108f1d081a779f526d..ba04d030b94c0924311dcff5c6a34270a764f877 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc @@ -12,24 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/analyzer.h" - -#include -#include -#include // NOLINT -#include "paddle/fluid/framework/ir/fuse_pass_base.h" -#include "paddle/fluid/framework/ir/pass.h" -#include "paddle/fluid/inference/analysis/ut_helper.h" -#include "paddle/fluid/inference/api/analysis_predictor.h" -#include "paddle/fluid/inference/api/helper.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" -#include "paddle/fluid/inference/api/paddle_inference_pass.h" - -DEFINE_string(infer_model, "", "model path"); -DEFINE_string(infer_data, "", "data path"); -DEFINE_int32(batch_size, 1, "batch size."); -DEFINE_int32(repeat, 1, "Running the inference program repeat times."); -DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); +#include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { namespace inference { @@ -41,6 +24,7 @@ struct DataRecord { std::vector lod; std::vector> rnn_link_data; std::vector result_data; + size_t num_samples; // total number of samples size_t batch_iter{0}; size_t batch_size{1}; DataRecord() = default; @@ -100,6 +84,7 @@ struct DataRecord { result_data.insert(result_data.end(), tmp.begin(), tmp.end()); } } + num_samples = num_lines / 2; } }; void PrepareInputs(std::vector *input_slots, DataRecord *data, @@ -118,64 +103,58 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, input_slots->assign({feed_tensor}); } -void CompareResult(const std::vector &outputs, - const std::vector &base_result) { - PADDLE_ENFORCE_GT(outputs.size(), 0); - for (size_t i = 0; i < outputs.size(); i++) { - auto &out = outputs[i]; - size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); - PADDLE_ENFORCE_GT(size, 0); - float *data = static_cast(out.data.data()); - for (size_t i = 0; i < size; i++) { - EXPECT_NEAR(data[i], base_result[i], 1e-3); - } +void SetConfig(AnalysisConfig *cfg) { + cfg->prog_file = FLAGS_infer_model + "/__model__"; + cfg->param_file = FLAGS_infer_model + "/param"; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} + +void SetInput(std::vector> *inputs) { + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + std::vector input_slots; + int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1; + LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size; + for (int bid = 0; bid < epoch; ++bid) { + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); } } -// Test with a really complicate model. -void TestRNN2Prediction() { - AnalysisConfig config; - config.prog_file = FLAGS_infer_model + "/__model__"; - config.param_file = FLAGS_infer_model + "/param"; - config.use_gpu = false; - config.device = 0; - config.specify_input_name = true; - config.enable_ir_optim = true; - PADDLE_ENFORCE(config.ir_mode == - AnalysisConfig::IrPassMode::kExclude); // default - int batch_size = FLAGS_batch_size; - int num_times = FLAGS_repeat; +// Easy for profiling independently. +TEST(Analyzer_rnn2, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; - auto base_predictor = - CreatePaddlePredictor(config); - auto predictor = - CreatePaddlePredictor( - config); - std::vector input_slots; - DataRecord data(FLAGS_infer_data, batch_size); - PrepareInputs(&input_slots, &data, batch_size); - std::vector outputs, base_outputs; + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); - Timer timer1; - timer1.tic(); - for (int i = 0; i < num_times; i++) { - base_predictor->Run(input_slots, &base_outputs); + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + // the first inference result + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + PADDLE_ENFORCE_GT(outputs.size(), 0); + size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(size, 0); + float *result = static_cast(outputs[0].data.data()); + for (size_t i = 0; i < size; i++) { + EXPECT_NEAR(result[i], data.result_data[i], 1e-3); + } } - PrintTime(batch_size, num_times, 1, 0, timer1.toc() / num_times); +} - Timer timer2; - timer2.tic(); - for (int i = 0; i < num_times; i++) { - predictor->Run(input_slots, &outputs); - } - PrintTime(batch_size, num_times, 1, 0, timer2.toc() / num_times); +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_rnn2, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); - CompareResult(base_outputs, data.result_data); - CompareResult(outputs, data.result_data); + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); } -TEST(Analyzer, rnn2) { TestRNN2Prediction(); } - } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc index 1472c475e4a3061ffcad96925ea215a41a7e63eb..340ef152f0b1a15a451f840b36ae845ef4984740 100644 --- a/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc @@ -46,54 +46,63 @@ struct DataReader { std::unique_ptr file; }; -void Main(int batch_size) { - // shape -- - // Create Predictor -- - AnalysisConfig config; - config.model_dir = FLAGS_infer_model; - config.use_gpu = false; - config.enable_ir_optim = true; +void SetConfig(AnalysisConfig *cfg) { + cfg->model_dir = FLAGS_infer_model; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} - std::vector input_slots, output_slots; +void SetInput(std::vector> *inputs) { + std::vector input_slots; DataReader reader(FLAGS_infer_data); - std::vector> input_slots_all; - - if (FLAGS_test_all_data) { - LOG(INFO) << "test all data"; - int num_batches = 0; - while (reader.NextBatch(&input_slots, FLAGS_batch_size)) { - input_slots_all.emplace_back(input_slots); - ++num_batches; - } - LOG(INFO) << "total number of samples: " << num_batches * FLAGS_batch_size; - TestPrediction(config, input_slots_all, &output_slots, FLAGS_num_threads); - return; + int num_batches = 0; + while (reader.NextBatch(&input_slots, FLAGS_batch_size)) { + (*inputs).emplace_back(input_slots); + ++num_batches; + if (!FLAGS_test_all_data) return; } + LOG(INFO) << "total number of samples: " << num_batches * FLAGS_batch_size; +} - // one batch starts - // data -- - reader.NextBatch(&input_slots, FLAGS_batch_size); - input_slots_all.emplace_back(input_slots); - TestPrediction(config, input_slots_all, &output_slots, FLAGS_num_threads); +// Easy for profiling independently. +TEST(Analyzer_Text_Classification, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; - // Get output - LOG(INFO) << "get outputs " << output_slots.size(); + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); - for (auto &output : output_slots) { - LOG(INFO) << "output.shape: " << to_string(output.shape); - // no lod ? - CHECK_EQ(output.lod.size(), 0UL); - LOG(INFO) << "output.dtype: " << output.dtype; - std::stringstream ss; - for (int i = 0; i < 5; i++) { - ss << static_cast(output.data.data())[i] << " "; + if (FLAGS_num_threads == 1) { + // Get output + LOG(INFO) << "get outputs " << outputs.size(); + for (auto &output : outputs) { + LOG(INFO) << "output.shape: " << to_string(output.shape); + // no lod ? + CHECK_EQ(output.lod.size(), 0UL); + LOG(INFO) << "output.dtype: " << output.dtype; + std::stringstream ss; + for (int i = 0; i < 5; i++) { + ss << static_cast(output.data.data())[i] << " "; + } + LOG(INFO) << "output.data summary: " << ss.str(); + // one batch ends } - LOG(INFO) << "output.data summary: " << ss.str(); - // one batch ends } } -TEST(text_classification, basic) { Main(FLAGS_batch_size); } +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_Text_Classification, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); +} } // namespace inference } // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index a207c41b7140c806b4c1fdc7f24a317b165c9aef..483ae66c5b24f6147b1b07da86494a914f80c34c 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -49,84 +49,83 @@ Record ProcessALine(const std::string &line) { return record; } -/* - * Use the native and analysis fluid engine to inference the demo. - * ocr, mobilenet and se_resnext50 - */ -void TestVisualPrediction(bool use_mkldnn) { - std::unique_ptr predictor; - AnalysisConfig cfg; - cfg.param_file = FLAGS_infer_model + "/__params__"; - cfg.prog_file = FLAGS_infer_model + "/__model__"; - cfg.use_gpu = false; - cfg._use_mkldnn = use_mkldnn; - cfg.device = 0; - cfg.enable_ir_optim = true; +void SetConfig(AnalysisConfig *cfg) { + cfg->param_file = FLAGS_infer_model + "/__params__"; + cfg->prog_file = FLAGS_infer_model + "/__model__"; + cfg->use_gpu = false; + cfg->device = 0; + cfg->enable_ir_optim = true; + cfg->specify_input_name = true; // TODO(TJ): fix fusion gru - cfg.ir_passes.push_back("fc_gru_fuse_pass"); + cfg->ir_passes.push_back("fc_gru_fuse_pass"); #ifdef PADDLE_WITH_MKLDNN + cfg->_use_mkldnn = true; // disable mkldnn fuse since it should have some bugs - cfg.ir_passes.push_back("conv_relu_mkldnn_fuse_pass"); + cfg->ir_passes.push_back("conv_relu_mkldnn_fuse_pass"); #endif - predictor = - CreatePaddlePredictor(cfg); +} - // Only have single batch of data. +void SetInput(std::vector> *inputs) { + PADDLE_ENFORCE_EQ(FLAGS_test_all_data, 0, "Only have single batch of data."); std::string line; std::ifstream file(FLAGS_infer_data); std::getline(file, line); auto record = ProcessALine(line); - file.close(); - // Inference. PaddleTensor input; input.shape = record.shape; - input.data = - PaddleBuf(record.data.data(), record.data.size() * sizeof(float)); input.dtype = PaddleDType::FLOAT32; + size_t input_size = record.data.size() * sizeof(float); + input.data.Resize(input_size); + memcpy(input.data.data(), record.data.data(), input_size); + std::vector input_slots; + input_slots.assign({input}); + (*inputs).emplace_back(input_slots); +} - std::vector outputs_slots; - Timer timer; - timer.tic(); - for (int i = 0; i < FLAGS_repeat; i++) { - predictor->Run({input}, &outputs_slots); - } - PrintTime(/*batch size*/ 1, FLAGS_repeat, /*num threads*/ 1, /*thread id*/ 0, - timer.toc() / FLAGS_repeat); - - VLOG(3) << "output.size " << outputs_slots.size(); - - // run native as reference - auto ref_predictor = - CreatePaddlePredictor(cfg); - std::vector ref_outputs_slots; - ref_predictor->Run({input}, &ref_outputs_slots); - CompareResult(outputs_slots, ref_outputs_slots); - // print what are fused - AnalysisPredictor *analysis_predictor = - dynamic_cast(predictor.get()); - auto &fuse_statis = analysis_predictor->analysis_argument() - .Get>( - framework::ir::kFuseStatisAttr); - for (auto &item : fuse_statis) { - LOG(INFO) << "fused " << item.first << " " << item.second; - } - int num_ops = 0; - for (auto &node : - analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { - if (node->IsFunction()) { - ++num_ops; +// Easy for profiling independently. +// ocr, mobilenet and se_resnext50 +TEST(Analyzer_vis, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; + + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); + + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + const float ocr_result_data[] = { + 5.273636460856323538e-08, 3.296741795111302054e-07, + 1.873261190610264748e-08, 3.403730275408634043e-08, + 3.383312474625199684e-08}; + PADDLE_ENFORCE_EQ(outputs.size(), 1UL); + size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(size, 0); + float *result = static_cast(outputs[0].data.data()); + for (size_t i = 0; i < std::min(5UL, size); i++) { + EXPECT_NEAR(result[i], ocr_result_data[i], 1e-3); } } - LOG(INFO) << "has num ops: " << num_ops; } -TEST(Analyzer_vis, analysis) { TestVisualPrediction(/*use_mkldnn*/ false); } -#ifdef PADDLE_WITH_MKLDNN -TEST(Analyzer_vis, analysis_mkldnn) { - TestVisualPrediction(/*use_mkldnn*/ true); +// Check the fuse status +TEST(Analyzer_vis, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + GetFuseStatis(cfg, &num_ops); +} + +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_vis, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); } -#endif } // namespace analysis } // namespace inference diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 43e97614e3ad9c14c8deee9f340757f373eb593e..384a40a3f992d1a9734e3189b422be0ce6adb938 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -15,6 +15,7 @@ #pragma once #include +#include #include // NOLINT #include #include "paddle/fluid/framework/ir/fuse_pass_base.h" @@ -28,17 +29,18 @@ DEFINE_string(infer_model, "", "model path"); DEFINE_string(infer_data, "", "data file"); DEFINE_int32(batch_size, 1, "batch size."); -DEFINE_int32(burning, 0, "Burning before repeat."); DEFINE_int32(repeat, 1, "Running the inference program repeat times."); DEFINE_bool(test_all_data, false, "Test the all dataset in data file."); DEFINE_int32(num_threads, 1, "Running the inference program in multi-threads."); +DEFINE_bool(use_analysis, true, + "Running the inference program in analysis mode."); namespace paddle { namespace inference { void CompareResult(const std::vector &outputs, const std::vector &ref_outputs) { - EXPECT_GT(outputs.size(), 0); + EXPECT_GT(outputs.size(), 0UL); EXPECT_EQ(outputs.size(), ref_outputs.size()); for (size_t i = 0; i < outputs.size(); i++) { auto &out = outputs[i]; @@ -72,14 +74,50 @@ void CompareResult(const std::vector &outputs, } } +std::unique_ptr GetPrediction(AnalysisConfig config, + bool use_analysis = true) { + if (use_analysis) { + return CreatePaddlePredictor( + config); + } else { + return CreatePaddlePredictor( + config); + } +} + +size_t GetSize(const PaddleTensor &out) { + return std::accumulate(out.shape.begin(), out.shape.end(), 1, + [](int a, int b) { return a * b; }); +} + +std::unordered_map GetFuseStatis(AnalysisConfig config, + int *num_ops) { + auto predictor = GetPrediction(config); + AnalysisPredictor *analysis_predictor = + dynamic_cast(predictor.get()); + auto &fuse_statis = analysis_predictor->analysis_argument() + .Get>( + framework::ir::kFuseStatisAttr); + for (auto &item : fuse_statis) { + LOG(INFO) << "fused " << item.first << " " << item.second; + } + int num = 0; + for (auto &node : + analysis_predictor->analysis_argument().main_dfg->nodes.nodes()) { + if (node->IsFunction()) { + ++num; + } + } + *num_ops = num; + return fuse_statis; +} + void TestOneThreadPrediction( AnalysisConfig config, const std::vector> inputs, - std::vector *outputs) { + std::vector *outputs, bool use_analysis = true) { int batch_size = FLAGS_batch_size; int num_times = FLAGS_repeat; - auto predictor = - CreatePaddlePredictor( - config); + auto predictor = GetPrediction(config, use_analysis); Timer timer; timer.tic(); for (int i = 0; i < num_times; i++) { @@ -93,7 +131,8 @@ void TestOneThreadPrediction( void TestMultiThreadPrediction( AnalysisConfig config, const std::vector> inputs, - std::vector *outputs, int num_threads) { + std::vector *outputs, int num_threads, + bool use_analysis = true) { int batch_size = FLAGS_batch_size; int num_times = FLAGS_repeat; std::vector threads; @@ -101,9 +140,7 @@ void TestMultiThreadPrediction( // TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled // because AttentionLSTM's hard code nodeid will be damanged. for (int tid = 0; tid < num_threads; ++tid) { - predictors.emplace_back( - CreatePaddlePredictor( - config)); + predictors.emplace_back(GetPrediction(config, use_analysis)); } for (int tid = 0; tid < num_threads; ++tid) { threads.emplace_back([&, tid]() { @@ -129,13 +166,25 @@ void TestMultiThreadPrediction( void TestPrediction(AnalysisConfig config, const std::vector> inputs, - std::vector *outputs, int num_threads) { + std::vector *outputs, int num_threads, + bool use_analysis = FLAGS_use_analysis) { + LOG(INFO) << "use_analysis: " << use_analysis; if (num_threads == 1) { - TestOneThreadPrediction(config, inputs, outputs); + TestOneThreadPrediction(config, inputs, outputs, use_analysis); } else { - TestMultiThreadPrediction(config, inputs, outputs, num_threads); + TestMultiThreadPrediction(config, inputs, outputs, num_threads, + use_analysis); } } +void CompareNativeAndAnalysis( + AnalysisConfig config, + const std::vector> inputs) { + std::vector native_outputs, analysis_outputs; + TestOneThreadPrediction(config, inputs, &native_outputs, false); + TestOneThreadPrediction(config, inputs, &analysis_outputs, true); + CompareResult(analysis_outputs, native_outputs); +} + } // namespace inference } // namespace paddle