From 390d2bb5450ed122e7f9da7ed6d3fada463400f9 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 22 Dec 2021 21:50:51 +0800 Subject: [PATCH] feat(mgb): tensorrt runtime opr support mutiple profiles GitOrigin-RevId: 1157d34e4d3bcaa9665a4a652e258c9235210c6d --- src/tensorrt/impl/tensorrt_opr.cpp | 106 ++++++++++++++---- src/tensorrt/impl/tensorrt_runtime_opr.cpp | 14 ++- .../include/megbrain/tensorrt/tensorrt_opr.h | 9 +- src/tensorrt/test/make_trt_net.cpp | 20 +++- src/tensorrt/test/tensorrt_runtime.cpp | 8 +- 5 files changed, 121 insertions(+), 36 deletions(-) diff --git a/src/tensorrt/impl/tensorrt_opr.cpp b/src/tensorrt/impl/tensorrt_opr.cpp index 266e659d1..f0a2b56af 100644 --- a/src/tensorrt/impl/tensorrt_opr.cpp +++ b/src/tensorrt/impl/tensorrt_opr.cpp @@ -153,49 +153,100 @@ void TensorRTOpr::GpuAllocator::free(void* memory) { } /* ========================== TensorRTManager ========================== */ -const intl::TensorRTUniquePtr& TensorRTManager:: - create_trt_context( - const TensorShapeArray& inp_shape, nvinfer1::ICudaEngine* engine) { +void TensorRTManager::create_trt_context( + mgb::CompNode cn, const TensorShapeArray& inp_shape, + nvinfer1::ICudaEngine* engine) { if (!m_context) { m_context = {engine->createExecutionContextWithoutDeviceMemory(), {}}; + MGB_MARK_USED_VAR(cn); #if NV_TENSOR_RT_VERSION >= 6001 - for (size_t i = 0; i < inp_shape.size(); ++i) { + auto profile_num = engine->getNbOptimizationProfiles(); + auto bindings_per_profile = engine->getNbBindings() / profile_num; + // choose nearest profile + int profile_idx = 0; +#if NV_TENSOR_RT_VERSION >= 7200 + if (profile_num > 1) { + double dist = DBL_MAX; + for (int i = 0; i < profile_num; i++) { + double d_sum = 0; + for (size_t j = 0; j < inp_shape.size(); ++j) { + double d = 0; + double l = 0; + auto min_dim = engine->getProfileDimensions( + j + bindings_per_profile * i, i, + nvinfer1::OptProfileSelector::kMIN); + auto max_dim = engine->getProfileDimensions( + j + bindings_per_profile * i, i, + nvinfer1::OptProfileSelector::kMAX); + auto opt_dim = engine->getProfileDimensions( + j + bindings_per_profile * i, i, + nvinfer1::OptProfileSelector::kOPT); + for (int k = 0; k < min_dim.nbDims; k++) { + int inp_v = static_cast(inp_shape.at(j)[k]); + if (inp_v < min_dim.d[k] || inp_v > max_dim.d[k]) { + d = DBL_MAX; + break; + } else { + d += pow(inp_v - opt_dim.d[k], 2); + l += pow(opt_dim.d[k], 2); + } + } + if (d != DBL_MAX) { + d_sum += sqrt(d) / sqrt(l); + } else { + d_sum = DBL_MAX; + break; + } + } + if (d_sum < dist) { + profile_idx = i; + dist = d_sum; + } + } + cn.activate(); + auto&& env = mgb::CompNodeEnv::from_comp_node(cn); + m_context->setOptimizationProfileAsync(profile_idx, env.cuda_env().stream); + } +#endif + m_offset = profile_idx * bindings_per_profile; + for (size_t i = m_offset; i < m_offset + inp_shape.size(); ++i) { auto dims = m_context->getBindingDimensions(i); for (int j = 0; j < dims.nbDims; j++) { if (dims.d[j] == -1) { - dims.d[j] = inp_shape.at(i)[j]; + dims.d[j] = inp_shape.at(i - m_offset)[j]; } } - m_context->setBindingDimensions(i, dims); + m_context->setBindingDimensions(m_offset, dims); } // check if input shape is set correctly - for (int i = inp_shape.size(); i < engine->getNbBindings(); ++i) { + for (int i = m_offset + inp_shape.size(); i < m_offset + bindings_per_profile; + ++i) { auto dims = m_context->getBindingDimensions(i); if (dims.nbDims == -1) { - for (int j = 0; j < engine->getNbOptimizationProfiles(); j++) { - mgb_log_debug("TensorRT profile %d:\n", j); - for (size_t k = 0; k < inp_shape.size(); k++) { - mgb_log_debug( - "input[%zu]'s minimum shape is: %s\n", k, + for (int j = 0; j < profile_num; j++) { + mgb_log_error("TensorRT profile %d:\n", j); + for (size_t k = m_offset; k < m_offset + inp_shape.size(); k++) { + mgb_log_error( + "input[%zu]'s minimum shape is: %s\n", k - m_offset, TensorRTOpr::dims2shape( engine->getProfileDimensions( - j, k, + k, j, nvinfer1::OptProfileSelector::kMIN)) .to_string() .c_str()); - mgb_log_debug( - "input[%zu]'s optimum shape is: %s\n", k, + mgb_log_error( + "input[%zu]'s optimum shape is: %s\n", k - m_offset, TensorRTOpr::dims2shape( engine->getProfileDimensions( - j, k, + k, j, nvinfer1::OptProfileSelector::kOPT)) .to_string() .c_str()); - mgb_log_debug( - "input[%zu]'s maximum shape is: %s\n", k, + mgb_log_error( + "input[%zu]'s maximum shape is: %s\n", k - m_offset, TensorRTOpr::dims2shape( engine->getProfileDimensions( - j, k, + k, j, nvinfer1::OptProfileSelector::kMAX)) .to_string() .c_str()); @@ -209,9 +260,15 @@ const intl::TensorRTUniquePtr& TensorRTManager:: } #endif } - return m_context; } +#if NV_TENSOR_RT_VERSION >= 6001 +nvinfer1::Dims TensorRTManager::get_binding_dimensions(int binding_idx) const { + mgb_assert(m_context, "Please create_trt_context before get_binding_dimensions."); + return m_context->getBindingDimensions(binding_idx + m_offset); +} +#endif + void TensorRTManager::exec( cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, nvinfer1::ICudaEngine* engine, size_t batch, bool use_trt_profiler) { @@ -232,8 +289,8 @@ void TensorRTManager::exec( for (auto&& i : opr->input()) { arr.push_back(i->shape()); } - create_trt_context(arr, engine); - m_trt_iobuf.resize(opr->input().size() + opr->output().size() - 1); + create_trt_context(comp_node, arr, engine); + m_trt_iobuf.resize(engine->getNbBindings()); bool is_trt_opr = false; if (opr->same_type()) { is_trt_opr = true; @@ -250,10 +307,10 @@ void TensorRTManager::exec( } } else { for (size_t i = 0; i < opr->input().size(); ++i) { - m_trt_iobuf[i] = opr->input(i)->dev_tensor().raw_ptr(); + m_trt_iobuf[i + m_offset] = opr->input(i)->dev_tensor().raw_ptr(); } for (size_t i = 0; i < opr->output().size() - 1; ++i) { - m_trt_iobuf[opr->input().size() + i] = + m_trt_iobuf[opr->input().size() + i + m_offset] = opr->output(i)->dev_tensor().raw_ptr(); } } @@ -265,6 +322,7 @@ void TensorRTManager::exec( m_context->setDeviceMemory(workspace_ptr); m_device_workspace_memory_ptr = workspace_ptr; } + auto&& env = mgb::CompNodeEnv::from_comp_node(comp_node); bool exec_success = false; diff --git a/src/tensorrt/impl/tensorrt_runtime_opr.cpp b/src/tensorrt/impl/tensorrt_runtime_opr.cpp index e96cf3a82..ef232d8c6 100644 --- a/src/tensorrt/impl/tensorrt_runtime_opr.cpp +++ b/src/tensorrt/impl/tensorrt_runtime_opr.cpp @@ -70,7 +70,13 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr( inputs[0]->comp_node().to_string().c_str()); size_t nr_input = 0; bool is_input = true; - for (int i = 0; i < m_engine->getNbBindings(); ++i) { +#if NV_TENSOR_RT_VERSION >= 6001 + auto profile_num = m_engine->getNbOptimizationProfiles(); +#else + int profile_num = 1; +#endif + auto bindings_per_profile = m_engine->getNbBindings() / profile_num; + for (int i = 0; i < bindings_per_profile; ++i) { if (m_engine->bindingIsInput(nr_input)) { mgb_assert(is_input, "mixed input/output bindings"); // nbDims == 3, means CHW, without batch @@ -81,7 +87,7 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr( is_input = false; } } - size_t nr_output = m_engine->getNbBindings() - nr_input; + size_t nr_output = bindings_per_profile - nr_input; mgb_assert( nr_input == inputs.size(), "inputs size not equal: expect=%zu got=%zu", nr_input, inputs.size()); @@ -101,7 +107,7 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr( void TensorRTRuntimeOpr::get_output_var_shape( const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const { auto batch = inp_shape.at(0)[0]; - auto&& context = m_manager.create_trt_context(inp_shape, m_engine.get()); + m_manager.create_trt_context(this->comp_node(), inp_shape, m_engine.get()); auto get_mgb_shape = [&](int binding_idx) -> TensorShape { auto dims = m_engine->getBindingDimensions(binding_idx); #if NV_TENSOR_RT_VERSION >= 6001 @@ -132,7 +138,7 @@ void TensorRTRuntimeOpr::get_output_var_shape( } } } else { - auto trt_infer_dims = context->getBindingDimensions(binding_idx); + auto trt_infer_dims = m_manager.get_binding_dimensions(binding_idx); for (int i = 0; i < dims.nbDims; i++) { if (dims.d[i] == -1) { shape[i] = trt_infer_dims.d[i]; diff --git a/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h b/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h index 6695f7e00..baa6702f1 100644 --- a/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h +++ b/src/tensorrt/include/megbrain/tensorrt/tensorrt_opr.h @@ -50,10 +50,15 @@ class TensorRTManager { std::vector m_trt_iobuf; TensorRTUniquePtr m_context; void* m_device_workspace_memory_ptr; + int m_offset; public: - const TensorRTUniquePtr& create_trt_context( - const TensorShapeArray& inp_shape, nvinfer1::ICudaEngine* engine); + void create_trt_context( + mgb::CompNode cn, const TensorShapeArray& inp_shape, + nvinfer1::ICudaEngine* engine); +#if NV_TENSOR_RT_VERSION >= 6001 + nvinfer1::Dims get_binding_dimensions(int binding_idx) const; +#endif void exec( cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, nvinfer1::ICudaEngine* engine, size_t batch = 1, diff --git a/src/tensorrt/test/make_trt_net.cpp b/src/tensorrt/test/make_trt_net.cpp index 1f8acd139..ad2017772 100644 --- a/src/tensorrt/test/make_trt_net.cpp +++ b/src/tensorrt/test/make_trt_net.cpp @@ -519,14 +519,24 @@ TensorRTUniquePtr intl::DynamicShapeTensorRTNetwork::create_trt_net data = network->addInput("data", DataType::kFLOAT, Dims4{-1, 23, -1, -1}); nvinfer1::IBuilderConfig* config = builder->createBuilderConfig(); - nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile(); - profile->setDimensions( + + nvinfer1::IOptimizationProfile* profile1 = builder->createOptimizationProfile(); + profile1->setDimensions( + "data", nvinfer1::OptProfileSelector::kMIN, Dims4(1, 23, 10, 10)); + profile1->setDimensions( + "data", nvinfer1::OptProfileSelector::kOPT, Dims4(2, 23, 12, 12)); + profile1->setDimensions( + "data", nvinfer1::OptProfileSelector::kMAX, Dims4(3, 23, 14, 14)); + config->addOptimizationProfile(profile1); + + nvinfer1::IOptimizationProfile* profile2 = builder->createOptimizationProfile(); + profile2->setDimensions( "data", nvinfer1::OptProfileSelector::kMIN, Dims4(3, 23, 16, 16)); - profile->setDimensions( + profile2->setDimensions( "data", nvinfer1::OptProfileSelector::kOPT, Dims4(4, 23, 24, 24)); - profile->setDimensions( + profile2->setDimensions( "data", nvinfer1::OptProfileSelector::kMAX, Dims4(5, 23, 28, 28)); - config->addOptimizationProfile(profile); + config->addOptimizationProfile(profile2); { nvinfer1::TensorFormats formats = diff --git a/src/tensorrt/test/tensorrt_runtime.cpp b/src/tensorrt/test/tensorrt_runtime.cpp index 6b7fb3596..10976db5f 100644 --- a/src/tensorrt/test/tensorrt_runtime.cpp +++ b/src/tensorrt/test/tensorrt_runtime.cpp @@ -310,7 +310,13 @@ TEST(TestOprTensorRT, ICudaEngine) { #if NV_TENSOR_RT_VERSION >= 6001 TEST(TestOprTensorRT, RuntimeDynamicShape) { REQUIRE_GPU(1); - intl::DynamicShapeTensorRTNetwork net1{5, 23, 26, 26}, net2{4, 23, 24, 24}; + intl::DynamicShapeTensorRTNetwork net1{2, 23, 14, 14}; +#if NV_TENSOR_RT_VERSION >= 7200 + intl::DynamicShapeTensorRTNetwork net2{4, 23, 24, 24}; +#else + intl::DynamicShapeTensorRTNetwork net2{3, 23, 10, 10}; +#endif + auto make_trt = [](intl::DynamicShapeTensorRTNetwork& net) { TensorRTUniquePtr cuda_engine = net.create_trt_network(); TensorRTUniquePtr mem{cuda_engine->serialize(), {}}; -- GitLab