diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index aff2f60551de93755af34ec742feaab08f32c8ca..175bc55dcff17e46aa47e1d2d187e3a8c8c4b43d 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -212,6 +212,7 @@ struct Argument { bool); DECL_ARGUMENT_FIELD(tensorrt_use_calib_mode, TensorRtUseCalibMode, bool); DECL_ARGUMENT_FIELD(tensorrt_use_oss, TensorRtUseOSS, bool); + DECL_ARGUMENT_FIELD(tensorrt_with_interleaved, TensorRtWithInterleaved, bool); DECL_ARGUMENT_FIELD(tensorrt_shape_range_info_path, TensorRtShapeRangeInfoPath, std::string); DECL_ARGUMENT_FIELD(tensorrt_tuned_dynamic_shape, TensorRtTunedDynamicShape, diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index dcbbee97a772cc4e104c24ebf7e5a433da656e02..3abda782ab6cfa071c5f010ea427b86a46b3512c 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -108,6 +108,8 @@ void IRPassManager::CreatePasses(Argument *argument, pass->Set("enable_int8", new bool(enable_int8)); pass->Set("use_calib_mode", new bool(use_calib_mode)); pass->Set("use_oss", new bool(argument->tensorrt_use_oss())); + pass->Set("with_interleaved", + new bool(argument->tensorrt_with_interleaved())); pass->Set("precision_mode", new AnalysisConfig::Precision(precision_mode)); diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index a21118e23aa5cd43479d364d886bb486bc4b95bc..ef50df3084f8cea7e0a137cc86e24f7e3c17fdd7 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -369,6 +369,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp( Get("gpu_device_id"), min_input_shape, max_input_shape, opt_input_shape, disable_trt_plugin_fp16); trt_engine->SetUseOSS(Get("use_oss")); + trt_engine->SetWithInterleaved(Get("with_interleaved")); trt_engine->SetUseDLA(Get("trt_use_dla")); trt_engine->SetDLACore(Get("trt_dla_core")); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index a1ab69906bfc443d7441647a68f3a4fa1be5e3b1..273690719336c60152c735f2344cd643e5a0166a 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -189,6 +189,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(trt_use_static_engine_); CP_MEMBER(trt_use_calib_mode_); CP_MEMBER(trt_use_oss_); + CP_MEMBER(trt_with_interleaved_); CP_MEMBER(trt_tuned_dynamic_shape_); CP_MEMBER(trt_allow_build_at_runtime_); CP_MEMBER(collect_shape_range_info_); @@ -864,6 +865,8 @@ std::string AnalysisConfig::Summary() { : "false"}); os.InsertRow({"tensorrt_use_oss", trt_use_oss_ ? "true" : "false"}); + os.InsertRow({"tensorrt_with_interleaved", + trt_with_interleaved_ ? "true" : "false"}); os.InsertRow({"tensorrt_use_dla", trt_use_dla_ ? "true" : "false"}); if (trt_use_dla_) { os.InsertRow({"tensorrt_dla_core", std::to_string(trt_dla_core_)}); diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 929984f50a7b8d3f652a214551c03550fdf61e5d..2799fb9e174d3209143d4be3a95250fb2eb882e6 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -605,6 +605,7 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetTensorRtUseStaticEngine(config_.trt_use_static_engine_); argument_.SetTensorRtUseCalibMode(config_.trt_use_calib_mode_); argument_.SetTensorRtUseOSS(config_.trt_use_oss_); + argument_.SetTensorRtWithInterleaved(config_.trt_with_interleaved_); argument_.SetMinInputShape(config_.min_input_shape_); argument_.SetMaxInputShape(config_.max_input_shape_); argument_.SetOptimInputShape(config_.optim_input_shape_); @@ -1603,5 +1604,11 @@ bool InternalUtils::RunWithExternalStream(paddle_infer::Predictor *p, #endif return false; } +void InternalUtils::UpdateConfigInterleaved(paddle_infer::Config *c, + bool with_interleaved) { +#ifdef PADDLE_WITH_CUDA + c->trt_with_interleaved_ = with_interleaved; +#endif +} } // namespace experimental } // namespace paddle_infer diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 77409f95b042eac630363e38bdb7994d5ba1096a..f65170daccb624bfe4af75b1ee32fd5500c59d99 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -796,6 +796,7 @@ struct PD_INFER_DECL AnalysisConfig { bool trt_use_static_engine_{false}; bool trt_use_calib_mode_{true}; bool trt_use_oss_{false}; + bool trt_with_interleaved_{false}; bool trt_use_dla_{false}; int trt_dla_core_{0}; std::map> min_input_shape_{}; @@ -883,6 +884,7 @@ struct PD_INFER_DECL AnalysisConfig { // So we release the memory when the predictor is set up. mutable bool is_valid_{true}; std::string opt_cache_dir_; + friend class paddle_infer::experimental::InternalUtils; }; } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index b137b7ba6f97e20cd0db5a188af0e259d22150ab..c129efe494b4fb36bc72d3c93e24951ba7fef322 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -405,3 +405,24 @@ PD_INFER_DECL std::shared_ptr MakeCipher( const std::string& config_file); } // namespace paddle + +// forward declation +using cudaStream_t = struct CUstream_st*; +using hipStream_t = struct ihipStream_t*; + +namespace paddle_infer { +class Predictor; +using Config = paddle::AnalysisConfig; +namespace experimental { +class PD_INFER_DECL InternalUtils { + public: + // Note: Can only be used under thread_local semantics. + static bool RunWithExternalStream(paddle_infer::Predictor* pred, + cudaStream_t stream); + static bool RunWithExternalStream(paddle_infer::Predictor* pred, + hipStream_t stream); + static void UpdateConfigInterleaved(paddle_infer::Config* c, + bool with_interleaved); +}; +} // namespace experimental +} // namespace paddle_infer diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index b2b9f2e40747855f211ed79bf053afbca41f55ee..65906a57f46cb60fa312851f2c11d69b53826c9d 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -41,27 +41,11 @@ limitations under the License. */ /// \since 2.0.0-beta /// -// forward declation -using cudaStream_t = struct CUstream_st*; -using hipStream_t = struct ihipStream_t*; - namespace paddle_infer { using PrecisionType = paddle::AnalysisConfig::Precision; using Config = paddle::AnalysisConfig; -class Predictor; -namespace experimental { -class PD_INFER_DECL InternalUtils { - public: - // Note: Can only be used under thread_local semantics. - static bool RunWithExternalStream(paddle_infer::Predictor* pred, - cudaStream_t stream); - static bool RunWithExternalStream(paddle_infer::Predictor* pred, - hipStream_t stream); -}; -} // namespace experimental - /// /// \class Predictor /// diff --git a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc index 71a2fa68f1749fee9e6271276e9e9af82f34f461..0e661651914741d1a7c69d9b9ee81d4f28b9553c 100644 --- a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc @@ -45,7 +45,7 @@ class BatchNormOpConverter : public OpConverter { auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front()); auto* Variance_v = scope.FindVar(op_desc.Input("Variance").front()); const float eps = BOOST_GET_CONST(float, op_desc.GetAttr("epsilon")); - + auto output_name = op_desc.Output("Y").front(); PADDLE_ENFORCE_NOT_NULL( Bias_v, platform::errors::NotFound( @@ -145,6 +145,10 @@ class BatchNormOpConverter : public OpConverter { expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); expand_layer->setReshapeDimensions(expand_shape); X = expand_layer->getOutput(0); + expand_layer->getOutput(0)->setName( + ("reshape_before_batchnorm_out: " + output_name).c_str()); + expand_layer->setName( + ("BN_Shuffle: (Output: " + output_name + ")").c_str()); } layer = TRT_ENGINE_ADD_LAYER(engine_, ScaleNd, *X, @@ -152,12 +156,13 @@ class BatchNormOpConverter : public OpConverter { shift_weights.get(), scale_weights.get(), power_weights.get(), dynamic_shape_offset); - auto output_name = op_desc.Output("Y").front(); engine_->SetWeights(op_desc.Input("Bias").front(), std::move(combile_bias_tensor)); engine_->SetWeights(op_desc.Input("Scale").front(), std::move(combile_scale_tensor)); if (x_dim.nbDims < 3 + dynamic_shape_offset) { + layer->getOutput(0)->setName("batch_norm_out"); + layer->setName(("BN: ScaleNd: (Output: " + output_name + ")").c_str()); nvinfer1::Dims squeeze_shape; squeeze_shape.nbDims = x_dim.nbDims; for (int i = 0; i < squeeze_shape.nbDims; i++) { @@ -166,10 +171,12 @@ class BatchNormOpConverter : public OpConverter { squeeze_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0))); squeeze_layer->setReshapeDimensions(squeeze_shape); - layer = static_cast(squeeze_layer); + RreplenishLayerAndOutput(squeeze_layer, "batchnorm_add_scale", + {output_name}, test_mode); + } else { + RreplenishLayerAndOutput(layer, "batchnorm_add_scale", {output_name}, + test_mode); } - RreplenishLayerAndOutput(layer, "batchnorm_add_scale", {output_name}, - test_mode); } }; diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index 7c5af43816c4481400e08f1c2808513daa0b63ad..33f732c19a8751073cf8fadfdccc15d15cfd1c20 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -50,6 +50,7 @@ class ElementwiseWeightOpConverter : public OpConverter { op_desc.Input("Y").front().c_str())); auto* Y_t = Y_v->GetMutable(); float* weight_data = nullptr; + auto output_name = op_desc.Output("Out")[0]; weight_data = engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t, false); nvinfer1::Dims dims_x = X->getDimensions(); @@ -80,6 +81,10 @@ class ElementwiseWeightOpConverter : public OpConverter { expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); expand_layer->setReshapeDimensions(expand_shape); X = expand_layer->getOutput(0); + expand_layer->getOutput(0)->setName( + ("elementwise_reshape_out: " + output_name).c_str()); + expand_layer->setName( + ("Elewise: Shuffle: (Output: " + output_name + ")").c_str()); } if (op_type_ == "add") { nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER( @@ -101,11 +106,12 @@ class ElementwiseWeightOpConverter : public OpConverter { squeeze_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0))); squeeze_layer->setReshapeDimensions(squeeze_shape); - layer = static_cast(squeeze_layer); + RreplenishLayerAndOutput(squeeze_layer, "elementwise_" + op_type_, + {output_name}, test_mode); + } else { + RreplenishLayerAndOutput(layer, "elementwise_" + op_type_, + {output_name}, test_mode); } - auto output_name = op_desc.Output("Out")[0]; - RreplenishLayerAndOutput(layer, "elementwise_" + op_type_, {output_name}, - test_mode); if (op_desc.HasAttr("enable_int8")) { #if IS_TRT_VERSION_GE(5000) CHECK(op_desc.HasAttr("X_scale")); diff --git a/paddle/fluid/inference/tensorrt/convert/gather_op.cc b/paddle/fluid/inference/tensorrt/convert/gather_op.cc index e7b82388b6ab8c1edafb479b7f2fc3705742084c..a98e7535de1b89c00a31d9ab2b4e9f3d7a820eed 100644 --- a/paddle/fluid/inference/tensorrt/convert/gather_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/gather_op.cc @@ -56,6 +56,8 @@ class GatherOpConverter : public OpConverter { index_shape.d[0] = -1; reshape_layer->setReshapeDimensions(index_shape); + reshape_layer->setName( + ("Gather: Shuffle: (Output: " + output_name + ")").c_str()); auto layer = TRT_ENGINE_ADD_LAYER(engine_, Gather, *input_tensor, *reshape_layer->getOutput(0), axis); diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 57a26aec6ebcb3d1350ec560927b76bf1988d64b..7e0c8bf1da17761510d050fd8fe4a08ee412ff69 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -144,28 +144,44 @@ class OpConverter { it->SetEngine(engine); (*it)(op, scope, test_mode); - bool has_out_scale = op_desc.HasAttr("out_threshold"); - if (has_out_scale) { - float out_scale = - BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold")); - std::string output_name = ""; - if (op_desc.HasOutput("Output")) { - output_name = op_desc.Output("Output").front(); - } else if (op_desc.HasOutput("Out")) { - output_name = op_desc.Output("Out").front(); - } else if (op_desc.HasOutput("Y")) { - output_name = op_desc.Output("Y").front(); - } else { - PADDLE_THROW( - platform::errors::NotFound("Op %s has out threshold but doesn't " - "have an output named \"Output\", " - "\"Out\" or \"Y\".", - op_desc.Type())); + size_t output_num = op_desc.OutputNames().size(); + if (output_num == 1) { // The number of output is 1 + if (op_desc.HasAttr("out_threshold")) { + float out_scale = + BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold")); + std::string output_name = ""; + if (op_desc.HasOutput("Output")) { + output_name = op_desc.Output("Output").front(); + } else if (op_desc.HasOutput("Out")) { + output_name = op_desc.Output("Out").front(); + } else if (op_desc.HasOutput("Y")) { + output_name = op_desc.Output("Y").front(); + } else { + PADDLE_THROW( + platform::errors::NotFound("Op %s has out threshold but doesn't " + "have an output named \"Output\", " + "\"Out\" or \"Y\".", + op_desc.Type())); + } + auto* output_itensor = engine->GetITensor(output_name); + engine->SetTensorDynamicRange(output_itensor, out_scale); + VLOG(1) << "Set out scale = " << out_scale << " for tensor " + << output_name << "."; + } + } else if (output_num > 1) { // The number of outputs greater than 1 + for (size_t i = 0; i < output_num; ++i) { + if (op_desc.HasAttr("out_" + std::to_string(i) + "_threshold")) { + float out_scale = BOOST_GET_CONST( + float, + op_desc.GetAttr("out_" + std::to_string(i) + "_threshold")); + std::string output_name = + op_desc.Output(op_desc.OutputNames()[i]).front(); + auto* output_itensor = engine->GetITensor(output_name); + engine->SetTensorDynamicRange(output_itensor, out_scale); + VLOG(1) << "Set out scale = " << out_scale << " for tensor " + << output_name << "."; + } } - auto* output_itensor = engine->GetITensor(output_name); - engine->SetTensorDynamicRange(output_itensor, out_scale); - VLOG(1) << "Set out scale = " << out_scale << " for tensor " - << output_name << "."; } } diff --git a/paddle/fluid/inference/tensorrt/convert/scale_op.cc b/paddle/fluid/inference/tensorrt/convert/scale_op.cc index b527f2db538086a3c2c70d994de85cff10875416..8b23a8161f5933ac3f36bfe1e9a745c74c88e282 100644 --- a/paddle/fluid/inference/tensorrt/convert/scale_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/scale_op.cc @@ -89,21 +89,34 @@ class ScaleOpConverter : public OpConverter { expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); expand_layer->setReshapeDimensions(expand_shape); input = expand_layer->getOutput(0); + expand_layer->getOutput(0)->setName( + ("before_reshape_out: " + out_name).c_str()); + expand_layer->setName( + ("Scale: before_reshape (Output: " + out_name + ")").c_str()); } if (bias_after_scale) { layer = TRT_ENGINE_ADD_LAYER( engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM, shift_weights.get(), scale_weights.get(), power_weights.get()); + layer->getOutput(0)->setName( + ("bias_after_scale_out: " + out_name).c_str()); + layer->setName(("Scale: scale (Output: " + out_name + ")").c_str()); } else { // add bias layer = TRT_ENGINE_ADD_LAYER( engine_, Scale, *(input), nvinfer1::ScaleMode::kUNIFORM, shift_weights.get(), power_weights.get(), power_weights.get()); + layer->getOutput(0)->setName( + ("bias_before_scale:bias_out: " + out_name).c_str()); + layer->setName(("Scale: scale_bias (Output: " + out_name + ")").c_str()); // mul scale layer = TRT_ENGINE_ADD_LAYER( engine_, Scale, *(layer->getOutput(0)), nvinfer1::ScaleMode::kUNIFORM, power_weights.get(), scale_weights.get(), power_weights.get()); + layer->getOutput(0)->setName( + ("bias_before_scale:scale_out: " + out_name).c_str()); + layer->setName(("Scale: scale_scale (Output: " + out_name + ")").c_str()); } PADDLE_ENFORCE_EQ(layer != nullptr, true, @@ -119,6 +132,9 @@ class ScaleOpConverter : public OpConverter { TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0))); squeeze_layer->setReshapeDimensions(squeeze_shape); layer = static_cast(squeeze_layer); + layer->getOutput(0)->setName(("after_reshape_out: " + out_name).c_str()); + layer->setName( + ("Scale: Shuffle_reshape (Output: " + out_name + ")").c_str()); } RreplenishLayerAndOutput(layer, "scale", {out_name}, test_mode); } diff --git a/paddle/fluid/inference/tensorrt/convert/slice_op.cc b/paddle/fluid/inference/tensorrt/convert/slice_op.cc index 7f270b1f390b7428aa40425ebfb2adb4d02620a8..2c08f0fe2bdedb832a9a072404c72fc05b125f66 100644 --- a/paddle/fluid/inference/tensorrt/convert/slice_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/slice_op.cc @@ -30,10 +30,11 @@ class SliceOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); // Declare inputs auto* input = engine_->GetITensor(op_desc.Input("Input")[0]); + auto output_name = op_desc.Output("Out")[0]; + float out_scale = 1; if (op_desc.HasAttr("out_threshold")) { - float out_scale = - BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold")); + out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold")); engine_->SetTensorDynamicRange(input, out_scale); } @@ -71,12 +72,22 @@ class SliceOpConverter : public OpConverter { nvinfer1::ILayer* layer = nullptr; if (engine_->with_dynamic_shape()) { -#if IS_TRT_VERSION_GE(6000) if (engine_->use_oss() && engine_->with_ernie()) { std::vector plugin_inputs; - // plugin_inputs.emplace_back(trans_layer->getOutput(0)); - plugin_inputs.emplace_back(input); - + if (engine_->with_interleaved()) { + auto* shuffler_slice = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + nvinfer1::Permutation transpose_embed{2, 1, 0, 3}; + shuffler_slice->setSecondTranspose(transpose_embed); + engine_->SetTensorDynamicRange(shuffler_slice->getOutput(0), + out_scale); + shuffler_slice->setName( + ("SpecialSlice_interleaved: Shuffle: (Output: " + output_name + + ")") + .c_str()); + plugin_inputs.emplace_back(shuffler_slice->getOutput(0)); + } else { + plugin_inputs.emplace_back(input); + } std::string pos_name; if (engine_->Has("ernie_pos_name")) { pos_name = engine_->Get("ernie_pos_name"); @@ -99,11 +110,6 @@ class SliceOpConverter : public OpConverter { new plugin::SlicePluginDynamic(starts, ends, axes, with_fp16); layer = engine_->AddDynamicPlugin(&input, 1, plugin); } -#else - PADDLE_THROW(platform::errors::Fatal( - "You are running the TRT Dynamic Shape mode, need to confirm that " - "your TRT version is no less than 6.0")); -#endif } else { bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); @@ -111,8 +117,6 @@ class SliceOpConverter : public OpConverter { new plugin::SlicePlugin(starts, ends, axes, with_fp16); layer = engine_->AddPlugin(&input, 1, plugin); } - - auto output_name = op_desc.Output("Out")[0]; RreplenishLayerAndOutput(layer, "slice", {output_name}, test_mode); } }; diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 7aaeb739de194aabff1f021b3c655104174c00aa..663534feda1a8ac1f1d3cf5e564cc8c30c58bb41 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -407,6 +407,9 @@ class TensorRTEngine { void SetUseDLA(bool use_dla) { use_dla_ = use_dla; } void SetDLACore(int dla_core) { dla_core_ = dla_core; } void SetWithErnie(bool with_ernie) { with_ernie_ = with_ernie; } + void SetWithInterleaved(bool with_interleaved) { + with_interleaved_ = with_interleaved; + } void ClearWeights() { for (auto& weight_pair : weight_map) { @@ -480,6 +483,7 @@ class TensorRTEngine { bool use_oss() { return use_oss_; } bool with_ernie() { return with_ernie_; } + bool with_interleaved() { return with_interleaved_; } bool disable_trt_plugin_fp16() { return disable_trt_plugin_fp16_; } bool with_dynamic_shape() { return with_dynamic_shape_; } AnalysisConfig::Precision precision() { return precision_; } @@ -612,6 +616,7 @@ class TensorRTEngine { bool use_dla_{false}; int dla_core_{0}; bool with_ernie_{false}; + bool with_interleaved_{false}; nvinfer1::ILogger& logger_; // max data size for the buffers.