diff --git a/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc b/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc index c36968b7ed6f82811ffab2c7423420bfe70f8c5d..056371b0ae662a76445c06162b07979b4bf8eaee 100644 --- a/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc +++ b/paddle/fluid/inference/tests/infer_ut/test_LeViT.cc @@ -179,67 +179,69 @@ TEST(tensorrt_tester_LeViT, multi_thread4_trt_fp32_bz2) { } #ifdef PADDLE_WITH_GPU -// TEST(tensorrt_tester_LeViT, multi_stream_thread4_trt_fp32_bz2) { -// int thread_num = 4; - -// // init stream -// std::vector streams(thread_num); -// for (size_t i = 0; i < thread_num; ++i) { -// cudaStreamCreate(&streams[i]); -// } - -// // init input data -// std::map my_input_data_map; -// my_input_data_map["x"] = PrepareInput(2); -// // init output data -// std::map infer_output_data, -// truth_output_data; -// // prepare groudtruth config -// paddle_infer::Config config, config_no_ir; -// config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel", -// FLAGS_modeldir + "/inference.pdiparams"); -// config_no_ir.SwitchIrOptim(false); -// // prepare inference config -// config.SetModel(FLAGS_modeldir + "/inference.pdmodel", -// FLAGS_modeldir + "/inference.pdiparams"); -// config.EnableUseGpu(100, 0); -// config.EnableTensorRtEngine( -// 1 << 20, 2, 50, paddle_infer::PrecisionType::kFloat32, false, false); -// // get groudtruth by disbale ir - -// paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1); -// SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map, -// &truth_output_data, 1); - -// // get infer results from multi threads -// std::vector threads; -// config.SetExecStream(streams[0]); -// config.pass_builder()->DeletePass("add_support_int8_pass"); -// auto main_predictor = CreatePredictor(config); -// std::vector predictors; -// for (size_t i = 0; i < thread_num - 1; ++i) { -// predictors.push_back(std::move(main_predictor->Clone(streams[i + 1]))); -// LOG(INFO) << "predictors[" << i << "] stream is " -// << predictors[i]->GetExecStream(); -// } -// predictors.push_back(std::move(main_predictor)); -// LOG(INFO) << "predictors[" << thread_num - 1 << "] stream is " -// << predictors[thread_num - 1]->GetExecStream(); -// for (int i = 0; i < thread_num; ++i) { -// threads.emplace_back(paddle::test::SingleThreadPrediction, -// predictors[i].get(), &my_input_data_map, -// &infer_output_data, 10); -// } - -// // thread join & check outputs -// for (int i = 0; i < thread_num; ++i) { -// LOG(INFO) << "join tid : " << i; -// threads[i].join(); -// CompareRecord(&truth_output_data, &infer_output_data); -// } - -// std::cout << "finish multi-thread test" << std::endl; -// } +TEST(tensorrt_tester_LeViT, multi_stream_thread4_trt_fp32_bz2) { + int thread_num = 4; + + // init stream + std::vector streams(thread_num); + for (size_t i = 0; i < thread_num; ++i) { + cudaStreamCreate(&streams[i]); + } + + // init input data + std::map my_input_data_map; + my_input_data_map["x"] = PrepareInput(2); + // init output data + std::map infer_output_data, + truth_output_data; + // prepare groudtruth config + paddle_infer::Config config, config_no_ir; + config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel", + FLAGS_modeldir + "/inference.pdiparams"); + config_no_ir.SwitchIrOptim(false); + // prepare inference config + config.SetModel(FLAGS_modeldir + "/inference.pdmodel", + FLAGS_modeldir + "/inference.pdiparams"); + config.EnableUseGpu(100, 0); + config.EnableTensorRtEngine( + 1 << 20, 2, 50, paddle_infer::PrecisionType::kFloat32, false, false); + // get groudtruth by disbale ir + + paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1); + SingleThreadPrediction( + pred_pool_no_ir.Retrive(0), &my_input_data_map, &truth_output_data, 1); + + // get infer results from multi threads + std::vector threads; + config.SetExecStream(streams[0]); + config.pass_builder()->DeletePass("add_support_int8_pass"); + auto main_predictor = CreatePredictor(config); + std::vector predictors; + for (size_t i = 0; i < thread_num - 1; ++i) { + predictors.push_back(std::move(main_predictor->Clone(streams[i + 1]))); + LOG(INFO) << "predictors[" << i << "] stream is " + << predictors[i]->GetExecStream(); + } + predictors.push_back(std::move(main_predictor)); + LOG(INFO) << "predictors[" << thread_num - 1 << "] stream is " + << predictors[thread_num - 1]->GetExecStream(); + for (int i = 0; i < thread_num; ++i) { + threads.emplace_back(paddle::test::SingleThreadPrediction, + predictors[i].get(), + &my_input_data_map, + &infer_output_data, + 10); + } + + // thread join & check outputs + for (int i = 0; i < thread_num; ++i) { + LOG(INFO) << "join tid : " << i; + threads[i].join(); + CompareRecord(&truth_output_data, &infer_output_data); + } + + std::cout << "finish multi-thread test" << std::endl; +} #endif } // namespace paddle_infer diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.cu b/paddle/phi/kernels/funcs/concat_and_split_functor.cu index dbcd4016170d57515df67d4b8274aab41685ba73..01701ee287385b14e9d184b9c8d90c64ee1ec045 100644 --- a/paddle/phi/kernels/funcs/concat_and_split_functor.cu +++ b/paddle/phi/kernels/funcs/concat_and_split_functor.cu @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/phi/kernels/funcs/concat_and_split_functor.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" -#include "paddle/phi/kernels/funcs/concat_and_split_functor.h" namespace phi { namespace funcs {