未验证 提交 3c074de4 编写于 作者: W Wilber 提交者: GitHub

Enable inference multi stream ci test (#44275)

* test

* update
上级 39e5dd2e
...@@ -179,67 +179,69 @@ TEST(tensorrt_tester_LeViT, multi_thread4_trt_fp32_bz2) { ...@@ -179,67 +179,69 @@ TEST(tensorrt_tester_LeViT, multi_thread4_trt_fp32_bz2) {
} }
#ifdef PADDLE_WITH_GPU #ifdef PADDLE_WITH_GPU
// TEST(tensorrt_tester_LeViT, multi_stream_thread4_trt_fp32_bz2) { TEST(tensorrt_tester_LeViT, multi_stream_thread4_trt_fp32_bz2) {
// int thread_num = 4; int thread_num = 4;
// // init stream // init stream
// std::vector<cudaStream_t> streams(thread_num); std::vector<cudaStream_t> streams(thread_num);
// for (size_t i = 0; i < thread_num; ++i) { for (size_t i = 0; i < thread_num; ++i) {
// cudaStreamCreate(&streams[i]); cudaStreamCreate(&streams[i]);
// } }
// // init input data // init input data
// std::map<std::string, paddle::test::Record> my_input_data_map; std::map<std::string, paddle::test::Record> my_input_data_map;
// my_input_data_map["x"] = PrepareInput(2); my_input_data_map["x"] = PrepareInput(2);
// // init output data // init output data
// std::map<std::string, paddle::test::Record> infer_output_data, std::map<std::string, paddle::test::Record> infer_output_data,
// truth_output_data; truth_output_data;
// // prepare groudtruth config // prepare groudtruth config
// paddle_infer::Config config, config_no_ir; paddle_infer::Config config, config_no_ir;
// config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel", config_no_ir.SetModel(FLAGS_modeldir + "/inference.pdmodel",
// FLAGS_modeldir + "/inference.pdiparams"); FLAGS_modeldir + "/inference.pdiparams");
// config_no_ir.SwitchIrOptim(false); config_no_ir.SwitchIrOptim(false);
// // prepare inference config // prepare inference config
// config.SetModel(FLAGS_modeldir + "/inference.pdmodel", config.SetModel(FLAGS_modeldir + "/inference.pdmodel",
// FLAGS_modeldir + "/inference.pdiparams"); FLAGS_modeldir + "/inference.pdiparams");
// config.EnableUseGpu(100, 0); config.EnableUseGpu(100, 0);
// config.EnableTensorRtEngine( config.EnableTensorRtEngine(
// 1 << 20, 2, 50, paddle_infer::PrecisionType::kFloat32, false, false); 1 << 20, 2, 50, paddle_infer::PrecisionType::kFloat32, false, false);
// // get groudtruth by disbale ir // get groudtruth by disbale ir
// paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1); paddle_infer::services::PredictorPool pred_pool_no_ir(config_no_ir, 1);
// SingleThreadPrediction(pred_pool_no_ir.Retrive(0), &my_input_data_map, SingleThreadPrediction(
// &truth_output_data, 1); pred_pool_no_ir.Retrive(0), &my_input_data_map, &truth_output_data, 1);
// // get infer results from multi threads // get infer results from multi threads
// std::vector<std::thread> threads; std::vector<std::thread> threads;
// config.SetExecStream(streams[0]); config.SetExecStream(streams[0]);
// config.pass_builder()->DeletePass("add_support_int8_pass"); config.pass_builder()->DeletePass("add_support_int8_pass");
// auto main_predictor = CreatePredictor(config); auto main_predictor = CreatePredictor(config);
// std::vector<decltype(main_predictor)> predictors; std::vector<decltype(main_predictor)> predictors;
// for (size_t i = 0; i < thread_num - 1; ++i) { for (size_t i = 0; i < thread_num - 1; ++i) {
// predictors.push_back(std::move(main_predictor->Clone(streams[i + 1]))); predictors.push_back(std::move(main_predictor->Clone(streams[i + 1])));
// LOG(INFO) << "predictors[" << i << "] stream is " LOG(INFO) << "predictors[" << i << "] stream is "
// << predictors[i]->GetExecStream(); << predictors[i]->GetExecStream();
// } }
// predictors.push_back(std::move(main_predictor)); predictors.push_back(std::move(main_predictor));
// LOG(INFO) << "predictors[" << thread_num - 1 << "] stream is " LOG(INFO) << "predictors[" << thread_num - 1 << "] stream is "
// << predictors[thread_num - 1]->GetExecStream(); << predictors[thread_num - 1]->GetExecStream();
// for (int i = 0; i < thread_num; ++i) { for (int i = 0; i < thread_num; ++i) {
// threads.emplace_back(paddle::test::SingleThreadPrediction, threads.emplace_back(paddle::test::SingleThreadPrediction,
// predictors[i].get(), &my_input_data_map, predictors[i].get(),
// &infer_output_data, 10); &my_input_data_map,
// } &infer_output_data,
10);
// // thread join & check outputs }
// for (int i = 0; i < thread_num; ++i) {
// LOG(INFO) << "join tid : " << i; // thread join & check outputs
// threads[i].join(); for (int i = 0; i < thread_num; ++i) {
// CompareRecord(&truth_output_data, &infer_output_data); LOG(INFO) << "join tid : " << i;
// } threads[i].join();
CompareRecord(&truth_output_data, &infer_output_data);
// std::cout << "finish multi-thread test" << std::endl; }
// }
std::cout << "finish multi-thread test" << std::endl;
}
#endif #endif
} // namespace paddle_infer } // namespace paddle_infer
......
...@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
namespace phi { namespace phi {
namespace funcs { namespace funcs {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册