diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 87e02a02caebd93d701dfd9e51c35fb974c770ed..3d72295be4b779693a56d1ddb6bc4aad7c2c82c9 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -86,7 +86,8 @@ const std::vector kAnakinSubgraphPasses({ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { passes_.assign({ - "infer_clean_graph_pass", // + "infer_clean_graph_pass", // + "runtime_context_cache_pass", // // "identity_scale_op_clean_pass", // "conv_affine_channel_fuse_pass", // "conv_eltwiseadd_affine_channel_fuse_pass", // @@ -96,7 +97,6 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { "conv_elementwise_add_act_fuse_pass", // "conv_elementwise_add2_act_fuse_pass", // "conv_elementwise_add_fuse_pass", // - "runtime_context_cache_pass", // #endif // "transpose_flatten_concat_fuse_pass", }); @@ -116,7 +116,11 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { // NOTE the large fusions should be located in the front, so that they will // not be damaged by smaller ones. passes_.assign({ - "infer_clean_graph_pass", // + "infer_clean_graph_pass", // + // TODO(luotao): runtime_context_cache_pass should be located in the + // front, see https://github.com/PaddlePaddle/Paddle/issues/16609, + // will enhance this pass later. + "runtime_context_cache_pass", // "attention_lstm_fuse_pass", // "seqpool_concat_fuse_pass", // "seqconv_eltadd_relu_fuse_pass", // @@ -132,8 +136,6 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { "conv_bn_fuse_pass", // "conv_eltwiseadd_bn_fuse_pass", // "is_test_pass", // - "identity_scale_op_clean_pass", // - "runtime_context_cache_pass", // }); use_gpu_ = false; } diff --git a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc index ece094717b8076321c68d7fdd29f07c4da6b0ed4..fbf67d933786e3ee2baab7a20911da2837cdce4d 100644 --- a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc @@ -23,18 +23,11 @@ namespace analysis { void SetConfig(AnalysisConfig *cfg) { cfg->SetModel(FLAGS_infer_model); - cfg->SetProgFile("__model__"); cfg->DisableGpu(); cfg->SwitchIrOptim(); - cfg->SwitchSpecifyInputNames(false); + cfg->SwitchSpecifyInputNames(); cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads); cfg->EnableMKLDNN(); - cfg->pass_builder()->SetPasses( - {"infer_clean_graph_pass", "mkldnn_placement_pass", - "depthwise_conv_mkldnn_pass", "conv_bn_fuse_pass", - "conv_eltwiseadd_bn_fuse_pass", "conv_bias_mkldnn_fuse_pass", - "conv_elementwise_add_mkldnn_fuse_pass", "conv_relu_mkldnn_fuse_pass", - "fc_fuse_pass", "is_test_pass"}); } template @@ -84,13 +77,13 @@ std::shared_ptr> GetWarmupData( std::to_string(num_images) + " is bigger than all test data size."); PaddleTensor images; - images.name = "input"; + images.name = "image"; images.shape = {num_images, 3, 224, 224}; images.dtype = PaddleDType::FLOAT32; images.data.Resize(sizeof(float) * num_images * 3 * 224 * 224); PaddleTensor labels; - labels.name = "labels"; + labels.name = "label"; labels.shape = {num_images, 1}; labels.dtype = PaddleDType::INT64; labels.data.Resize(sizeof(int64_t) * num_images); @@ -132,7 +125,7 @@ void SetInput(std::vector> *inputs, images_offset_in_file + sizeof(float) * total_images * 3 * 224 * 224; TensorReader image_reader(file, images_offset_in_file, - image_batch_shape, "input"); + image_batch_shape, "image"); TensorReader label_reader(file, labels_offset_in_file, label_batch_shape, "label"); diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 9a0dcc722cf00984b8c0e3ac20f13849e2904102..5cc54ed299c50b48c83de2742b715b16cf1f8cd0 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -316,7 +316,8 @@ void PredictionRun(PaddlePredictor *predictor, int num_threads, int tid) { int num_times = FLAGS_repeat; int iterations = inputs.size(); // process the whole dataset ... - if (FLAGS_iterations > 0 && FLAGS_iterations < inputs.size()) + if (FLAGS_iterations > 0 && + FLAGS_iterations < static_cast(inputs.size())) iterations = FLAGS_iterations; // ... unless the number of iterations is set outputs->resize(iterations); @@ -329,14 +330,14 @@ void PredictionRun(PaddlePredictor *predictor, #endif if (!FLAGS_zero_copy) { run_timer.tic(); - for (size_t i = 0; i < iterations; i++) { + for (int i = 0; i < iterations; i++) { for (int j = 0; j < num_times; j++) { predictor->Run(inputs[i], &(*outputs)[i], FLAGS_batch_size); } } elapsed_time = run_timer.toc(); } else { - for (size_t i = 0; i < iterations; i++) { + for (int i = 0; i < iterations; i++) { ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[i]); run_timer.tic(); for (int j = 0; j < num_times; j++) { @@ -366,9 +367,8 @@ void TestOneThreadPrediction( const std::vector> &inputs, std::vector> *outputs, bool use_analysis = true) { auto predictor = CreateTestPredictor(config, use_analysis); - PredictionWarmUp(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads, - 0); - PredictionRun(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads, 0); + PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0); + PredictionRun(predictor.get(), inputs, outputs, 1, 0); } void TestMultiThreadPrediction(