提交 390d2bb5 编写于 作者: M Megvii Engine Team

feat(mgb): tensorrt runtime opr support mutiple profiles

GitOrigin-RevId: 1157d34e4d3bcaa9665a4a652e258c9235210c6d
上级 1708ab2e
...@@ -153,49 +153,100 @@ void TensorRTOpr::GpuAllocator::free(void* memory) { ...@@ -153,49 +153,100 @@ void TensorRTOpr::GpuAllocator::free(void* memory) {
} }
/* ========================== TensorRTManager ========================== */ /* ========================== TensorRTManager ========================== */
const intl::TensorRTUniquePtr<nvinfer1::IExecutionContext>& TensorRTManager:: void TensorRTManager::create_trt_context(
create_trt_context( mgb::CompNode cn, const TensorShapeArray& inp_shape,
const TensorShapeArray& inp_shape, nvinfer1::ICudaEngine* engine) { nvinfer1::ICudaEngine* engine) {
if (!m_context) { if (!m_context) {
m_context = {engine->createExecutionContextWithoutDeviceMemory(), {}}; m_context = {engine->createExecutionContextWithoutDeviceMemory(), {}};
MGB_MARK_USED_VAR(cn);
#if NV_TENSOR_RT_VERSION >= 6001 #if NV_TENSOR_RT_VERSION >= 6001
for (size_t i = 0; i < inp_shape.size(); ++i) { auto profile_num = engine->getNbOptimizationProfiles();
auto bindings_per_profile = engine->getNbBindings() / profile_num;
// choose nearest profile
int profile_idx = 0;
#if NV_TENSOR_RT_VERSION >= 7200
if (profile_num > 1) {
double dist = DBL_MAX;
for (int i = 0; i < profile_num; i++) {
double d_sum = 0;
for (size_t j = 0; j < inp_shape.size(); ++j) {
double d = 0;
double l = 0;
auto min_dim = engine->getProfileDimensions(
j + bindings_per_profile * i, i,
nvinfer1::OptProfileSelector::kMIN);
auto max_dim = engine->getProfileDimensions(
j + bindings_per_profile * i, i,
nvinfer1::OptProfileSelector::kMAX);
auto opt_dim = engine->getProfileDimensions(
j + bindings_per_profile * i, i,
nvinfer1::OptProfileSelector::kOPT);
for (int k = 0; k < min_dim.nbDims; k++) {
int inp_v = static_cast<int>(inp_shape.at(j)[k]);
if (inp_v < min_dim.d[k] || inp_v > max_dim.d[k]) {
d = DBL_MAX;
break;
} else {
d += pow(inp_v - opt_dim.d[k], 2);
l += pow(opt_dim.d[k], 2);
}
}
if (d != DBL_MAX) {
d_sum += sqrt(d) / sqrt(l);
} else {
d_sum = DBL_MAX;
break;
}
}
if (d_sum < dist) {
profile_idx = i;
dist = d_sum;
}
}
cn.activate();
auto&& env = mgb::CompNodeEnv::from_comp_node(cn);
m_context->setOptimizationProfileAsync(profile_idx, env.cuda_env().stream);
}
#endif
m_offset = profile_idx * bindings_per_profile;
for (size_t i = m_offset; i < m_offset + inp_shape.size(); ++i) {
auto dims = m_context->getBindingDimensions(i); auto dims = m_context->getBindingDimensions(i);
for (int j = 0; j < dims.nbDims; j++) { for (int j = 0; j < dims.nbDims; j++) {
if (dims.d[j] == -1) { if (dims.d[j] == -1) {
dims.d[j] = inp_shape.at(i)[j]; dims.d[j] = inp_shape.at(i - m_offset)[j];
} }
} }
m_context->setBindingDimensions(i, dims); m_context->setBindingDimensions(m_offset, dims);
} }
// check if input shape is set correctly // check if input shape is set correctly
for (int i = inp_shape.size(); i < engine->getNbBindings(); ++i) { for (int i = m_offset + inp_shape.size(); i < m_offset + bindings_per_profile;
++i) {
auto dims = m_context->getBindingDimensions(i); auto dims = m_context->getBindingDimensions(i);
if (dims.nbDims == -1) { if (dims.nbDims == -1) {
for (int j = 0; j < engine->getNbOptimizationProfiles(); j++) { for (int j = 0; j < profile_num; j++) {
mgb_log_debug("TensorRT profile %d:\n", j); mgb_log_error("TensorRT profile %d:\n", j);
for (size_t k = 0; k < inp_shape.size(); k++) { for (size_t k = m_offset; k < m_offset + inp_shape.size(); k++) {
mgb_log_debug( mgb_log_error(
"input[%zu]'s minimum shape is: %s\n", k, "input[%zu]'s minimum shape is: %s\n", k - m_offset,
TensorRTOpr::dims2shape( TensorRTOpr::dims2shape(
engine->getProfileDimensions( engine->getProfileDimensions(
j, k, k, j,
nvinfer1::OptProfileSelector::kMIN)) nvinfer1::OptProfileSelector::kMIN))
.to_string() .to_string()
.c_str()); .c_str());
mgb_log_debug( mgb_log_error(
"input[%zu]'s optimum shape is: %s\n", k, "input[%zu]'s optimum shape is: %s\n", k - m_offset,
TensorRTOpr::dims2shape( TensorRTOpr::dims2shape(
engine->getProfileDimensions( engine->getProfileDimensions(
j, k, k, j,
nvinfer1::OptProfileSelector::kOPT)) nvinfer1::OptProfileSelector::kOPT))
.to_string() .to_string()
.c_str()); .c_str());
mgb_log_debug( mgb_log_error(
"input[%zu]'s maximum shape is: %s\n", k, "input[%zu]'s maximum shape is: %s\n", k - m_offset,
TensorRTOpr::dims2shape( TensorRTOpr::dims2shape(
engine->getProfileDimensions( engine->getProfileDimensions(
j, k, k, j,
nvinfer1::OptProfileSelector::kMAX)) nvinfer1::OptProfileSelector::kMAX))
.to_string() .to_string()
.c_str()); .c_str());
...@@ -209,9 +260,15 @@ const intl::TensorRTUniquePtr<nvinfer1::IExecutionContext>& TensorRTManager:: ...@@ -209,9 +260,15 @@ const intl::TensorRTUniquePtr<nvinfer1::IExecutionContext>& TensorRTManager::
} }
#endif #endif
} }
return m_context;
} }
#if NV_TENSOR_RT_VERSION >= 6001
nvinfer1::Dims TensorRTManager::get_binding_dimensions(int binding_idx) const {
mgb_assert(m_context, "Please create_trt_context before get_binding_dimensions.");
return m_context->getBindingDimensions(binding_idx + m_offset);
}
#endif
void TensorRTManager::exec( void TensorRTManager::exec(
cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check,
nvinfer1::ICudaEngine* engine, size_t batch, bool use_trt_profiler) { nvinfer1::ICudaEngine* engine, size_t batch, bool use_trt_profiler) {
...@@ -232,8 +289,8 @@ void TensorRTManager::exec( ...@@ -232,8 +289,8 @@ void TensorRTManager::exec(
for (auto&& i : opr->input()) { for (auto&& i : opr->input()) {
arr.push_back(i->shape()); arr.push_back(i->shape());
} }
create_trt_context(arr, engine); create_trt_context(comp_node, arr, engine);
m_trt_iobuf.resize(opr->input().size() + opr->output().size() - 1); m_trt_iobuf.resize(engine->getNbBindings());
bool is_trt_opr = false; bool is_trt_opr = false;
if (opr->same_type<TensorRTOpr>()) { if (opr->same_type<TensorRTOpr>()) {
is_trt_opr = true; is_trt_opr = true;
...@@ -250,10 +307,10 @@ void TensorRTManager::exec( ...@@ -250,10 +307,10 @@ void TensorRTManager::exec(
} }
} else { } else {
for (size_t i = 0; i < opr->input().size(); ++i) { for (size_t i = 0; i < opr->input().size(); ++i) {
m_trt_iobuf[i] = opr->input(i)->dev_tensor().raw_ptr(); m_trt_iobuf[i + m_offset] = opr->input(i)->dev_tensor().raw_ptr();
} }
for (size_t i = 0; i < opr->output().size() - 1; ++i) { for (size_t i = 0; i < opr->output().size() - 1; ++i) {
m_trt_iobuf[opr->input().size() + i] = m_trt_iobuf[opr->input().size() + i + m_offset] =
opr->output(i)->dev_tensor().raw_ptr(); opr->output(i)->dev_tensor().raw_ptr();
} }
} }
...@@ -265,6 +322,7 @@ void TensorRTManager::exec( ...@@ -265,6 +322,7 @@ void TensorRTManager::exec(
m_context->setDeviceMemory(workspace_ptr); m_context->setDeviceMemory(workspace_ptr);
m_device_workspace_memory_ptr = workspace_ptr; m_device_workspace_memory_ptr = workspace_ptr;
} }
auto&& env = mgb::CompNodeEnv::from_comp_node(comp_node); auto&& env = mgb::CompNodeEnv::from_comp_node(comp_node);
bool exec_success = false; bool exec_success = false;
......
...@@ -70,7 +70,13 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr( ...@@ -70,7 +70,13 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
inputs[0]->comp_node().to_string().c_str()); inputs[0]->comp_node().to_string().c_str());
size_t nr_input = 0; size_t nr_input = 0;
bool is_input = true; bool is_input = true;
for (int i = 0; i < m_engine->getNbBindings(); ++i) { #if NV_TENSOR_RT_VERSION >= 6001
auto profile_num = m_engine->getNbOptimizationProfiles();
#else
int profile_num = 1;
#endif
auto bindings_per_profile = m_engine->getNbBindings() / profile_num;
for (int i = 0; i < bindings_per_profile; ++i) {
if (m_engine->bindingIsInput(nr_input)) { if (m_engine->bindingIsInput(nr_input)) {
mgb_assert(is_input, "mixed input/output bindings"); mgb_assert(is_input, "mixed input/output bindings");
// nbDims == 3, means CHW, without batch // nbDims == 3, means CHW, without batch
...@@ -81,7 +87,7 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr( ...@@ -81,7 +87,7 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
is_input = false; is_input = false;
} }
} }
size_t nr_output = m_engine->getNbBindings() - nr_input; size_t nr_output = bindings_per_profile - nr_input;
mgb_assert( mgb_assert(
nr_input == inputs.size(), "inputs size not equal: expect=%zu got=%zu", nr_input == inputs.size(), "inputs size not equal: expect=%zu got=%zu",
nr_input, inputs.size()); nr_input, inputs.size());
...@@ -101,7 +107,7 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr( ...@@ -101,7 +107,7 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
void TensorRTRuntimeOpr::get_output_var_shape( void TensorRTRuntimeOpr::get_output_var_shape(
const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const { const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const {
auto batch = inp_shape.at(0)[0]; auto batch = inp_shape.at(0)[0];
auto&& context = m_manager.create_trt_context(inp_shape, m_engine.get()); m_manager.create_trt_context(this->comp_node(), inp_shape, m_engine.get());
auto get_mgb_shape = [&](int binding_idx) -> TensorShape { auto get_mgb_shape = [&](int binding_idx) -> TensorShape {
auto dims = m_engine->getBindingDimensions(binding_idx); auto dims = m_engine->getBindingDimensions(binding_idx);
#if NV_TENSOR_RT_VERSION >= 6001 #if NV_TENSOR_RT_VERSION >= 6001
...@@ -132,7 +138,7 @@ void TensorRTRuntimeOpr::get_output_var_shape( ...@@ -132,7 +138,7 @@ void TensorRTRuntimeOpr::get_output_var_shape(
} }
} }
} else { } else {
auto trt_infer_dims = context->getBindingDimensions(binding_idx); auto trt_infer_dims = m_manager.get_binding_dimensions(binding_idx);
for (int i = 0; i < dims.nbDims; i++) { for (int i = 0; i < dims.nbDims; i++) {
if (dims.d[i] == -1) { if (dims.d[i] == -1) {
shape[i] = trt_infer_dims.d[i]; shape[i] = trt_infer_dims.d[i];
......
...@@ -50,10 +50,15 @@ class TensorRTManager { ...@@ -50,10 +50,15 @@ class TensorRTManager {
std::vector<void*> m_trt_iobuf; std::vector<void*> m_trt_iobuf;
TensorRTUniquePtr<nvinfer1::IExecutionContext> m_context; TensorRTUniquePtr<nvinfer1::IExecutionContext> m_context;
void* m_device_workspace_memory_ptr; void* m_device_workspace_memory_ptr;
int m_offset;
public: public:
const TensorRTUniquePtr<nvinfer1::IExecutionContext>& create_trt_context( void create_trt_context(
const TensorShapeArray& inp_shape, nvinfer1::ICudaEngine* engine); mgb::CompNode cn, const TensorShapeArray& inp_shape,
nvinfer1::ICudaEngine* engine);
#if NV_TENSOR_RT_VERSION >= 6001
nvinfer1::Dims get_binding_dimensions(int binding_idx) const;
#endif
void exec( void exec(
cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check, cg::SingleCNOperatorNodeBase* opr, CompNode comp_node_check,
nvinfer1::ICudaEngine* engine, size_t batch = 1, nvinfer1::ICudaEngine* engine, size_t batch = 1,
......
...@@ -519,14 +519,24 @@ TensorRTUniquePtr<ICudaEngine> intl::DynamicShapeTensorRTNetwork::create_trt_net ...@@ -519,14 +519,24 @@ TensorRTUniquePtr<ICudaEngine> intl::DynamicShapeTensorRTNetwork::create_trt_net
data = network->addInput("data", DataType::kFLOAT, Dims4{-1, 23, -1, -1}); data = network->addInput("data", DataType::kFLOAT, Dims4{-1, 23, -1, -1});
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig(); nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
nvinfer1::IOptimizationProfile* profile = builder->createOptimizationProfile();
profile->setDimensions( nvinfer1::IOptimizationProfile* profile1 = builder->createOptimizationProfile();
profile1->setDimensions(
"data", nvinfer1::OptProfileSelector::kMIN, Dims4(1, 23, 10, 10));
profile1->setDimensions(
"data", nvinfer1::OptProfileSelector::kOPT, Dims4(2, 23, 12, 12));
profile1->setDimensions(
"data", nvinfer1::OptProfileSelector::kMAX, Dims4(3, 23, 14, 14));
config->addOptimizationProfile(profile1);
nvinfer1::IOptimizationProfile* profile2 = builder->createOptimizationProfile();
profile2->setDimensions(
"data", nvinfer1::OptProfileSelector::kMIN, Dims4(3, 23, 16, 16)); "data", nvinfer1::OptProfileSelector::kMIN, Dims4(3, 23, 16, 16));
profile->setDimensions( profile2->setDimensions(
"data", nvinfer1::OptProfileSelector::kOPT, Dims4(4, 23, 24, 24)); "data", nvinfer1::OptProfileSelector::kOPT, Dims4(4, 23, 24, 24));
profile->setDimensions( profile2->setDimensions(
"data", nvinfer1::OptProfileSelector::kMAX, Dims4(5, 23, 28, 28)); "data", nvinfer1::OptProfileSelector::kMAX, Dims4(5, 23, 28, 28));
config->addOptimizationProfile(profile); config->addOptimizationProfile(profile2);
{ {
nvinfer1::TensorFormats formats = nvinfer1::TensorFormats formats =
......
...@@ -310,7 +310,13 @@ TEST(TestOprTensorRT, ICudaEngine) { ...@@ -310,7 +310,13 @@ TEST(TestOprTensorRT, ICudaEngine) {
#if NV_TENSOR_RT_VERSION >= 6001 #if NV_TENSOR_RT_VERSION >= 6001
TEST(TestOprTensorRT, RuntimeDynamicShape) { TEST(TestOprTensorRT, RuntimeDynamicShape) {
REQUIRE_GPU(1); REQUIRE_GPU(1);
intl::DynamicShapeTensorRTNetwork net1{5, 23, 26, 26}, net2{4, 23, 24, 24}; intl::DynamicShapeTensorRTNetwork net1{2, 23, 14, 14};
#if NV_TENSOR_RT_VERSION >= 7200
intl::DynamicShapeTensorRTNetwork net2{4, 23, 24, 24};
#else
intl::DynamicShapeTensorRTNetwork net2{3, 23, 10, 10};
#endif
auto make_trt = [](intl::DynamicShapeTensorRTNetwork& net) { auto make_trt = [](intl::DynamicShapeTensorRTNetwork& net) {
TensorRTUniquePtr<ICudaEngine> cuda_engine = net.create_trt_network(); TensorRTUniquePtr<ICudaEngine> cuda_engine = net.create_trt_network();
TensorRTUniquePtr<IHostMemory> mem{cuda_engine->serialize(), {}}; TensorRTUniquePtr<IHostMemory> mem{cuda_engine->serialize(), {}};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册