提交 e2b79ea0 编写于 作者: M Megvii Engine Team

feat(mgb): reduce the number of trtruntimeopr create contexts

GitOrigin-RevId: 14e5d1769e951438d776db6276e5addd04f01093
上级 6157d9cf
......@@ -156,15 +156,20 @@ void TensorRTOpr::GpuAllocator::free(void* memory) {
void TensorRTManager::create_trt_context(
mgb::CompNode cn, const TensorShapeArray& inp_shape,
nvinfer1::ICudaEngine* engine) {
if (!m_context) {
bool has_no_context = (!m_context);
if (has_no_context) {
m_context = {engine->createExecutionContextWithoutDeviceMemory(), {}};
MGB_MARK_USED_VAR(cn);
}
MGB_MARK_USED_VAR(cn);
#if NV_TENSOR_RT_VERSION >= 6001
auto profile_num = engine->getNbOptimizationProfiles();
auto bindings_per_profile = engine->getNbBindings() / profile_num;
// choose nearest profile
int profile_idx = 0;
auto profile_num = engine->getNbOptimizationProfiles();
auto bindings_per_profile = engine->getNbBindings() / profile_num;
// choose nearest profile
#if NV_TENSOR_RT_VERSION >= 7200
bool has_select_profile = false;
if (has_no_context) {
has_select_profile = true;
int profile_idx = 0;
if (profile_num > 1) {
double dist = DBL_MAX;
for (int i = 0; i < profile_num; i++) {
......@@ -207,59 +212,76 @@ void TensorRTManager::create_trt_context(
auto&& env = mgb::CompNodeEnv::from_comp_node(cn);
m_context->setOptimizationProfileAsync(profile_idx, env.cuda_env().stream);
}
#endif
m_offset = profile_idx * bindings_per_profile;
for (size_t i = m_offset; i < m_offset + inp_shape.size(); ++i) {
auto dims = m_context->getBindingDimensions(i);
for (int j = 0; j < dims.nbDims; j++) {
if (dims.d[j] == -1) {
dims.d[j] = inp_shape.at(i - m_offset)[j];
}
}
#endif
bool is_set_correct = true;
for (size_t i = m_offset; i < m_offset + inp_shape.size(); ++i) {
auto dims = m_context->getBindingDimensions(i);
auto dims_check = engine->getBindingDimensions(i);
for (int j = 0; j < dims.nbDims; j++) {
if (dims_check.d[j] == -1) {
dims.d[j] = inp_shape.at(i - m_offset)[j];
}
m_context->setBindingDimensions(m_offset, dims);
}
// check if input shape is set correctly
for (int i = m_offset + inp_shape.size(); i < m_offset + bindings_per_profile;
++i) {
auto dims = m_context->getBindingDimensions(i);
if (dims.nbDims == -1) {
for (int j = 0; j < profile_num; j++) {
mgb_log_error("TensorRT profile %d:\n", j);
for (size_t k = m_offset; k < m_offset + inp_shape.size(); k++) {
mgb_log_error(
"input[%zu]'s minimum shape is: %s\n", k - m_offset,
TensorRTOpr::dims2shape(
engine->getProfileDimensions(
k, j,
nvinfer1::OptProfileSelector::kMIN))
.to_string()
.c_str());
mgb_log_error(
"input[%zu]'s optimum shape is: %s\n", k - m_offset,
TensorRTOpr::dims2shape(
engine->getProfileDimensions(
k, j,
nvinfer1::OptProfileSelector::kOPT))
.to_string()
.c_str());
mgb_log_error(
"input[%zu]'s maximum shape is: %s\n", k - m_offset,
TensorRTOpr::dims2shape(
engine->getProfileDimensions(
k, j,
nvinfer1::OptProfileSelector::kMAX))
.to_string()
.c_str());
}
is_set_correct &= m_context->setBindingDimensions(i, dims);
}
// check if input shape is set correctly
if (!is_set_correct) {
#if NV_TENSOR_RT_VERSION >= 7200
if (has_select_profile) {
#endif
for (size_t j = 0; j < inp_shape.size(); ++j) {
mgb_log_error(
"TensorRT input[%zu]'s shape is %s\n", j,
inp_shape.at(j).to_string().c_str());
}
mgb_log_error(
"The selected profile's idx is %d\n",
m_offset / bindings_per_profile);
for (int j = 0; j < profile_num; j++) {
mgb_log_error("TensorRT profile %d:\n", j);
for (size_t k = m_offset; k < m_offset + inp_shape.size(); k++) {
mgb_log_error(
"input[%zu]'s minimum shape is: %s\n", k - m_offset,
TensorRTOpr::dims2shape(
engine->getProfileDimensions(
k, j, nvinfer1::OptProfileSelector::kMIN))
.to_string()
.c_str());
mgb_log_error(
"input[%zu]'s optimum shape is: %s\n", k - m_offset,
TensorRTOpr::dims2shape(
engine->getProfileDimensions(
k, j, nvinfer1::OptProfileSelector::kOPT))
.to_string()
.c_str());
mgb_log_error(
"input[%zu]'s maximum shape is: %s\n", k - m_offset,
TensorRTOpr::dims2shape(
engine->getProfileDimensions(
k, j, nvinfer1::OptProfileSelector::kMAX))
.to_string()
.c_str());
}
mgb_throw(
MegBrainError,
"Invalid network output, this might be caused by inconsistent "
"input shapes.Correct input optimization profiles as above.");
}
mgb_throw(
MegBrainError,
"Invalid network output, this might be caused by "
"inconsistent "
"input shapes.Correct input optimization profiles as "
"above.");
#if NV_TENSOR_RT_VERSION >= 7200
} else {
// must clear context before create a new context, because
// setOptimizationProfileAsync() must be called before calling
// setBindingDimensions()
clear_trt_context();
create_trt_context(cn, inp_shape, engine);
}
#endif
}
#endif
}
#if NV_TENSOR_RT_VERSION >= 6001
......@@ -284,12 +306,15 @@ void TensorRTManager::exec(
}
auto workspace_ptr = opr->output().back()->dev_tensor().raw_ptr();
bool should_reinit_device_memory =
!m_context || m_device_workspace_memory_ptr != workspace_ptr;
TensorShapeArray arr;
for (auto&& i : opr->input()) {
arr.push_back(i->shape());
!m_context || (m_device_workspace_memory_ptr != workspace_ptr) ||
(workspace_ptr == nullptr);
if (!m_context) {
TensorShapeArray arr;
for (auto&& i : opr->input()) {
arr.push_back(i->shape());
}
create_trt_context(comp_node, arr, engine);
}
create_trt_context(comp_node, arr, engine);
m_trt_iobuf.resize(engine->getNbBindings());
bool is_trt_opr = false;
if (opr->same_type<TensorRTOpr>()) {
......
......@@ -42,6 +42,7 @@ cg::OperatorNodeBase* opr_shallow_copy_tensor_rt_runtime_opr(
const cg::OperatorNodeBase& opr_, const VarNodeArray& inputs,
const OperatorNodeConfig& config) {
auto&& opr = opr_.cast_final_safe<TensorRTRuntimeOpr>();
opr.clear_trt_context();
return TensorRTRuntimeOpr::make(
opr.trt_cuda_engine(), opr.trt_gpu_allocator(),
cg::to_symbol_var_array(inputs), config)
......
......@@ -107,7 +107,6 @@ TensorRTRuntimeOpr::TensorRTRuntimeOpr(
void TensorRTRuntimeOpr::get_output_var_shape(
const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const {
auto batch = inp_shape.at(0)[0];
m_manager.clear_trt_context();
m_manager.create_trt_context(this->comp_node(), inp_shape, m_engine.get());
auto get_mgb_shape = [&](int binding_idx) -> TensorShape {
auto dims = m_engine->getBindingDimensions(binding_idx);
......@@ -160,8 +159,6 @@ void TensorRTRuntimeOpr::get_output_var_shape(
out_shape[i] = get_mgb_shape(i + input().size());
}
out_shape.back() = {intl::workspace_size(m_engine.get())};
// must clear context, otherwise it may cause unknwon error.
m_manager.clear_trt_context();
}
void TensorRTRuntimeOpr::add_input_layout_constraint() {
......
......@@ -50,7 +50,7 @@ class TensorRTManager {
std::vector<void*> m_trt_iobuf;
TensorRTUniquePtr<nvinfer1::IExecutionContext> m_context;
void* m_device_workspace_memory_ptr;
int m_offset;
int m_offset = 0;
public:
void create_trt_context(
......
......@@ -82,6 +82,9 @@ public:
return m_gpu_allocator;
}
//! used in shallow copy to avoid create context twice error
void clear_trt_context() const { m_manager.clear_trt_context(); }
private:
// note: gpu allocator must be released after other trt objects
std::shared_ptr<TensorRTOpr::GpuAllocator> m_gpu_allocator;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册