diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
index aff2f60551de93755af34ec742feaab08f32c8ca..175bc55dcff17e46aa47e1d2d187e3a8c8c4b43d 100644
--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -212,6 +212,7 @@ struct Argument {
                       bool);
   DECL_ARGUMENT_FIELD(tensorrt_use_calib_mode, TensorRtUseCalibMode, bool);
   DECL_ARGUMENT_FIELD(tensorrt_use_oss, TensorRtUseOSS, bool);
+  DECL_ARGUMENT_FIELD(tensorrt_with_interleaved, TensorRtWithInterleaved, bool);
   DECL_ARGUMENT_FIELD(tensorrt_shape_range_info_path,
                       TensorRtShapeRangeInfoPath, std::string);
   DECL_ARGUMENT_FIELD(tensorrt_tuned_dynamic_shape, TensorRtTunedDynamicShape,
diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc
index dcbbee97a772cc4e104c24ebf7e5a433da656e02..3abda782ab6cfa071c5f010ea427b86a46b3512c 100644
--- a/paddle/fluid/inference/analysis/ir_pass_manager.cc
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -108,6 +108,8 @@ void IRPassManager::CreatePasses(Argument *argument,
       pass->Set("enable_int8", new bool(enable_int8));
       pass->Set("use_calib_mode", new bool(use_calib_mode));
       pass->Set("use_oss", new bool(argument->tensorrt_use_oss()));
+      pass->Set("with_interleaved",
+                new bool(argument->tensorrt_with_interleaved()));
       pass->Set("precision_mode",
                 new AnalysisConfig::Precision(precision_mode));
 
diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
index a21118e23aa5cd43479d364d886bb486bc4b95bc..ef50df3084f8cea7e0a137cc86e24f7e3c17fdd7 100644
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -369,6 +369,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
                   Get<int>("gpu_device_id"), min_input_shape, max_input_shape,
                   opt_input_shape, disable_trt_plugin_fp16);
   trt_engine->SetUseOSS(Get<bool>("use_oss"));
+  trt_engine->SetWithInterleaved(Get<bool>("with_interleaved"));
   trt_engine->SetUseDLA(Get<bool>("trt_use_dla"));
   trt_engine->SetDLACore(Get<int>("trt_dla_core"));
 
diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
index a1ab69906bfc443d7441647a68f3a4fa1be5e3b1..273690719336c60152c735f2344cd643e5a0166a 100644
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -189,6 +189,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
   CP_MEMBER(trt_use_static_engine_);
   CP_MEMBER(trt_use_calib_mode_);
   CP_MEMBER(trt_use_oss_);
+  CP_MEMBER(trt_with_interleaved_);
   CP_MEMBER(trt_tuned_dynamic_shape_);
   CP_MEMBER(trt_allow_build_at_runtime_);
   CP_MEMBER(collect_shape_range_info_);
@@ -864,6 +865,8 @@ std::string AnalysisConfig::Summary() {
                                                         : "false"});
 
       os.InsertRow({"tensorrt_use_oss", trt_use_oss_ ? "true" : "false"});
+      os.InsertRow({"tensorrt_with_interleaved",
+                    trt_with_interleaved_ ? "true" : "false"});
       os.InsertRow({"tensorrt_use_dla", trt_use_dla_ ? "true" : "false"});
       if (trt_use_dla_) {
         os.InsertRow({"tensorrt_dla_core", std::to_string(trt_dla_core_)});
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 929984f50a7b8d3f652a214551c03550fdf61e5d..2799fb9e174d3209143d4be3a95250fb2eb882e6 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -605,6 +605,7 @@ void AnalysisPredictor::PrepareArgument() {
     argument_.SetTensorRtUseStaticEngine(config_.trt_use_static_engine_);
     argument_.SetTensorRtUseCalibMode(config_.trt_use_calib_mode_);
     argument_.SetTensorRtUseOSS(config_.trt_use_oss_);
+    argument_.SetTensorRtWithInterleaved(config_.trt_with_interleaved_);
     argument_.SetMinInputShape(config_.min_input_shape_);
     argument_.SetMaxInputShape(config_.max_input_shape_);
     argument_.SetOptimInputShape(config_.optim_input_shape_);
@@ -1603,5 +1604,11 @@ bool InternalUtils::RunWithExternalStream(paddle_infer::Predictor *p,
 #endif
   return false;
 }
+void InternalUtils::UpdateConfigInterleaved(paddle_infer::Config *c,
+                                            bool with_interleaved) {
+#ifdef PADDLE_WITH_CUDA
+  c->trt_with_interleaved_ = with_interleaved;
+#endif
+}
 }  // namespace experimental
 }  // namespace paddle_infer
diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
index 77409f95b042eac630363e38bdb7994d5ba1096a..f65170daccb624bfe4af75b1ee32fd5500c59d99 100644
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -796,6 +796,7 @@ struct PD_INFER_DECL AnalysisConfig {
   bool trt_use_static_engine_{false};
   bool trt_use_calib_mode_{true};
   bool trt_use_oss_{false};
+  bool trt_with_interleaved_{false};
   bool trt_use_dla_{false};
   int trt_dla_core_{0};
   std::map<std::string, std::vector<int>> min_input_shape_{};
@@ -883,6 +884,7 @@ struct PD_INFER_DECL AnalysisConfig {
   // So we release the memory when the predictor is set up.
   mutable bool is_valid_{true};
   std::string opt_cache_dir_;
+  friend class paddle_infer::experimental::InternalUtils;
 };
 
 }  // namespace paddle
diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h
index b137b7ba6f97e20cd0db5a188af0e259d22150ab..c129efe494b4fb36bc72d3c93e24951ba7fef322 100644
--- a/paddle/fluid/inference/api/paddle_api.h
+++ b/paddle/fluid/inference/api/paddle_api.h
@@ -405,3 +405,24 @@ PD_INFER_DECL std::shared_ptr<framework::Cipher> MakeCipher(
     const std::string& config_file);
 
 }  // namespace paddle
+
+// forward declation
+using cudaStream_t = struct CUstream_st*;
+using hipStream_t = struct ihipStream_t*;
+
+namespace paddle_infer {
+class Predictor;
+using Config = paddle::AnalysisConfig;
+namespace experimental {
+class PD_INFER_DECL InternalUtils {
+ public:
+  // Note: Can only be used under thread_local semantics.
+  static bool RunWithExternalStream(paddle_infer::Predictor* pred,
+                                    cudaStream_t stream);
+  static bool RunWithExternalStream(paddle_infer::Predictor* pred,
+                                    hipStream_t stream);
+  static void UpdateConfigInterleaved(paddle_infer::Config* c,
+                                      bool with_interleaved);
+};
+}  // namespace experimental
+}  // namespace paddle_infer
diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h
index b2b9f2e40747855f211ed79bf053afbca41f55ee..65906a57f46cb60fa312851f2c11d69b53826c9d 100644
--- a/paddle/fluid/inference/api/paddle_inference_api.h
+++ b/paddle/fluid/inference/api/paddle_inference_api.h
@@ -41,27 +41,11 @@ limitations under the License. */
 /// \since 2.0.0-beta
 ///
 
-// forward declation
-using cudaStream_t = struct CUstream_st*;
-using hipStream_t = struct ihipStream_t*;
-
 namespace paddle_infer {
 
 using PrecisionType = paddle::AnalysisConfig::Precision;
 using Config = paddle::AnalysisConfig;
 
-class Predictor;
-namespace experimental {
-class PD_INFER_DECL InternalUtils {
- public:
-  // Note: Can only be used under thread_local semantics.
-  static bool RunWithExternalStream(paddle_infer::Predictor* pred,
-                                    cudaStream_t stream);
-  static bool RunWithExternalStream(paddle_infer::Predictor* pred,
-                                    hipStream_t stream);
-};
-}  // namespace experimental
-
 ///
 /// \class Predictor
 ///
diff --git a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
index 71a2fa68f1749fee9e6271276e9e9af82f34f461..0e661651914741d1a7c69d9b9ee81d4f28b9553c 100644
--- a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc
@@ -45,7 +45,7 @@ class BatchNormOpConverter : public OpConverter {
     auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front());
     auto* Variance_v = scope.FindVar(op_desc.Input("Variance").front());
     const float eps = BOOST_GET_CONST(float, op_desc.GetAttr("epsilon"));
-
+    auto output_name = op_desc.Output("Y").front();
     PADDLE_ENFORCE_NOT_NULL(
         Bias_v,
         platform::errors::NotFound(
@@ -145,6 +145,10 @@ class BatchNormOpConverter : public OpConverter {
       expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
       expand_layer->setReshapeDimensions(expand_shape);
       X = expand_layer->getOutput(0);
+      expand_layer->getOutput(0)->setName(
+          ("reshape_before_batchnorm_out: " + output_name).c_str());
+      expand_layer->setName(
+          ("BN_Shuffle: (Output: " + output_name + ")").c_str());
     }
 
     layer = TRT_ENGINE_ADD_LAYER(engine_, ScaleNd, *X,
@@ -152,12 +156,13 @@ class BatchNormOpConverter : public OpConverter {
                                  shift_weights.get(), scale_weights.get(),
                                  power_weights.get(), dynamic_shape_offset);
 
-    auto output_name = op_desc.Output("Y").front();
     engine_->SetWeights(op_desc.Input("Bias").front(),
                         std::move(combile_bias_tensor));
     engine_->SetWeights(op_desc.Input("Scale").front(),
                         std::move(combile_scale_tensor));
     if (x_dim.nbDims < 3 + dynamic_shape_offset) {
+      layer->getOutput(0)->setName("batch_norm_out");
+      layer->setName(("BN: ScaleNd: (Output: " + output_name + ")").c_str());
       nvinfer1::Dims squeeze_shape;
       squeeze_shape.nbDims = x_dim.nbDims;
       for (int i = 0; i < squeeze_shape.nbDims; i++) {
@@ -166,10 +171,12 @@ class BatchNormOpConverter : public OpConverter {
       squeeze_layer =
           TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
       squeeze_layer->setReshapeDimensions(squeeze_shape);
-      layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
+      RreplenishLayerAndOutput(squeeze_layer, "batchnorm_add_scale",
+                               {output_name}, test_mode);
+    } else {
+      RreplenishLayerAndOutput(layer, "batchnorm_add_scale", {output_name},
+                               test_mode);
     }
-    RreplenishLayerAndOutput(layer, "batchnorm_add_scale", {output_name},
-                             test_mode);
   }
 };
 
diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
index 7c5af43816c4481400e08f1c2808513daa0b63ad..33f732c19a8751073cf8fadfdccc15d15cfd1c20 100644
--- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc
@@ -50,6 +50,7 @@ class ElementwiseWeightOpConverter : public OpConverter {
                                         op_desc.Input("Y").front().c_str()));
     auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
     float* weight_data = nullptr;
+    auto output_name = op_desc.Output("Out")[0];
     weight_data =
         engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t, false);
     nvinfer1::Dims dims_x = X->getDimensions();
@@ -80,6 +81,10 @@ class ElementwiseWeightOpConverter : public OpConverter {
         expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
         expand_layer->setReshapeDimensions(expand_shape);
         X = expand_layer->getOutput(0);
+        expand_layer->getOutput(0)->setName(
+            ("elementwise_reshape_out: " + output_name).c_str());
+        expand_layer->setName(
+            ("Elewise: Shuffle: (Output: " + output_name + ")").c_str());
       }
       if (op_type_ == "add") {
         nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
@@ -101,11 +106,12 @@ class ElementwiseWeightOpConverter : public OpConverter {
         squeeze_layer =
             TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
         squeeze_layer->setReshapeDimensions(squeeze_shape);
-        layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
+        RreplenishLayerAndOutput(squeeze_layer, "elementwise_" + op_type_,
+                                 {output_name}, test_mode);
+      } else {
+        RreplenishLayerAndOutput(layer, "elementwise_" + op_type_,
+                                 {output_name}, test_mode);
       }
-      auto output_name = op_desc.Output("Out")[0];
-      RreplenishLayerAndOutput(layer, "elementwise_" + op_type_, {output_name},
-                               test_mode);
       if (op_desc.HasAttr("enable_int8")) {
 #if IS_TRT_VERSION_GE(5000)
         CHECK(op_desc.HasAttr("X_scale"));
diff --git a/paddle/fluid/inference/tensorrt/convert/gather_op.cc b/paddle/fluid/inference/tensorrt/convert/gather_op.cc
index e7b82388b6ab8c1edafb479b7f2fc3705742084c..a98e7535de1b89c00a31d9ab2b4e9f3d7a820eed 100644
--- a/paddle/fluid/inference/tensorrt/convert/gather_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/gather_op.cc
@@ -56,6 +56,8 @@ class GatherOpConverter : public OpConverter {
     index_shape.d[0] = -1;
 
     reshape_layer->setReshapeDimensions(index_shape);
+    reshape_layer->setName(
+        ("Gather: Shuffle: (Output: " + output_name + ")").c_str());
 
     auto layer = TRT_ENGINE_ADD_LAYER(engine_, Gather, *input_tensor,
                                       *reshape_layer->getOutput(0), axis);
diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h
index 57a26aec6ebcb3d1350ec560927b76bf1988d64b..7e0c8bf1da17761510d050fd8fe4a08ee412ff69 100644
--- a/paddle/fluid/inference/tensorrt/convert/op_converter.h
+++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h
@@ -144,28 +144,44 @@ class OpConverter {
     it->SetEngine(engine);
     (*it)(op, scope, test_mode);
 
-    bool has_out_scale = op_desc.HasAttr("out_threshold");
-    if (has_out_scale) {
-      float out_scale =
-          BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
-      std::string output_name = "";
-      if (op_desc.HasOutput("Output")) {
-        output_name = op_desc.Output("Output").front();
-      } else if (op_desc.HasOutput("Out")) {
-        output_name = op_desc.Output("Out").front();
-      } else if (op_desc.HasOutput("Y")) {
-        output_name = op_desc.Output("Y").front();
-      } else {
-        PADDLE_THROW(
-            platform::errors::NotFound("Op %s has out threshold but doesn't "
-                                       "have an output named \"Output\", "
-                                       "\"Out\" or \"Y\".",
-                                       op_desc.Type()));
+    size_t output_num = op_desc.OutputNames().size();
+    if (output_num == 1) {  // The number of output is 1
+      if (op_desc.HasAttr("out_threshold")) {
+        float out_scale =
+            BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
+        std::string output_name = "";
+        if (op_desc.HasOutput("Output")) {
+          output_name = op_desc.Output("Output").front();
+        } else if (op_desc.HasOutput("Out")) {
+          output_name = op_desc.Output("Out").front();
+        } else if (op_desc.HasOutput("Y")) {
+          output_name = op_desc.Output("Y").front();
+        } else {
+          PADDLE_THROW(
+              platform::errors::NotFound("Op %s has out threshold but doesn't "
+                                         "have an output named \"Output\", "
+                                         "\"Out\" or \"Y\".",
+                                         op_desc.Type()));
+        }
+        auto* output_itensor = engine->GetITensor(output_name);
+        engine->SetTensorDynamicRange(output_itensor, out_scale);
+        VLOG(1) << "Set out scale = " << out_scale << " for tensor "
+                << output_name << ".";
+      }
+    } else if (output_num > 1) {  // The number of outputs greater than 1
+      for (size_t i = 0; i < output_num; ++i) {
+        if (op_desc.HasAttr("out_" + std::to_string(i) + "_threshold")) {
+          float out_scale = BOOST_GET_CONST(
+              float,
+              op_desc.GetAttr("out_" + std::to_string(i) + "_threshold"));
+          std::string output_name =
+              op_desc.Output(op_desc.OutputNames()[i]).front();
+          auto* output_itensor = engine->GetITensor(output_name);
+          engine->SetTensorDynamicRange(output_itensor, out_scale);
+          VLOG(1) << "Set out scale = " << out_scale << " for tensor "
+                  << output_name << ".";
+        }
       }
-      auto* output_itensor = engine->GetITensor(output_name);
-      engine->SetTensorDynamicRange(output_itensor, out_scale);
-      VLOG(1) << "Set out scale = " << out_scale << " for tensor "
-              << output_name << ".";
     }
   }
 
diff --git a/paddle/fluid/inference/tensorrt/convert/scale_op.cc b/paddle/fluid/inference/tensorrt/convert/scale_op.cc
index b527f2db538086a3c2c70d994de85cff10875416..8b23a8161f5933ac3f36bfe1e9a745c74c88e282 100644
--- a/paddle/fluid/inference/tensorrt/convert/scale_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/scale_op.cc
@@ -89,21 +89,34 @@ class ScaleOpConverter : public OpConverter {
       expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
       expand_layer->setReshapeDimensions(expand_shape);
       input = expand_layer->getOutput(0);
+      expand_layer->getOutput(0)->setName(
+          ("before_reshape_out: " + out_name).c_str());
+      expand_layer->setName(
+          ("Scale: before_reshape (Output: " + out_name + ")").c_str());
     }
 
     if (bias_after_scale) {
       layer = TRT_ENGINE_ADD_LAYER(
           engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM,
           shift_weights.get(), scale_weights.get(), power_weights.get());
+      layer->getOutput(0)->setName(
+          ("bias_after_scale_out: " + out_name).c_str());
+      layer->setName(("Scale: scale (Output: " + out_name + ")").c_str());
     } else {
       // add bias
       layer = TRT_ENGINE_ADD_LAYER(
           engine_, Scale, *(input), nvinfer1::ScaleMode::kUNIFORM,
           shift_weights.get(), power_weights.get(), power_weights.get());
+      layer->getOutput(0)->setName(
+          ("bias_before_scale：bias_out: " + out_name).c_str());
+      layer->setName(("Scale: scale_bias (Output: " + out_name + ")").c_str());
       // mul scale
       layer = TRT_ENGINE_ADD_LAYER(
           engine_, Scale, *(layer->getOutput(0)), nvinfer1::ScaleMode::kUNIFORM,
           power_weights.get(), scale_weights.get(), power_weights.get());
+      layer->getOutput(0)->setName(
+          ("bias_before_scale：scale_out: " + out_name).c_str());
+      layer->setName(("Scale: scale_scale (Output: " + out_name + ")").c_str());
     }
 
     PADDLE_ENFORCE_EQ(layer != nullptr, true,
@@ -119,6 +132,9 @@ class ScaleOpConverter : public OpConverter {
           TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
       squeeze_layer->setReshapeDimensions(squeeze_shape);
       layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
+      layer->getOutput(0)->setName(("after_reshape_out: " + out_name).c_str());
+      layer->setName(
+          ("Scale: Shuffle_reshape (Output: " + out_name + ")").c_str());
     }
     RreplenishLayerAndOutput(layer, "scale", {out_name}, test_mode);
   }
diff --git a/paddle/fluid/inference/tensorrt/convert/slice_op.cc b/paddle/fluid/inference/tensorrt/convert/slice_op.cc
index 7f270b1f390b7428aa40425ebfb2adb4d02620a8..2c08f0fe2bdedb832a9a072404c72fc05b125f66 100644
--- a/paddle/fluid/inference/tensorrt/convert/slice_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/slice_op.cc
@@ -30,10 +30,11 @@ class SliceOpConverter : public OpConverter {
     framework::OpDesc op_desc(op, nullptr);
     // Declare inputs
     auto* input = engine_->GetITensor(op_desc.Input("Input")[0]);
+    auto output_name = op_desc.Output("Out")[0];
 
+    float out_scale = 1;
     if (op_desc.HasAttr("out_threshold")) {
-      float out_scale =
-          BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
+      out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
       engine_->SetTensorDynamicRange(input, out_scale);
     }
 
@@ -71,12 +72,22 @@ class SliceOpConverter : public OpConverter {
 
     nvinfer1::ILayer* layer = nullptr;
     if (engine_->with_dynamic_shape()) {
-#if IS_TRT_VERSION_GE(6000)
       if (engine_->use_oss() && engine_->with_ernie()) {
         std::vector<nvinfer1::ITensor*> plugin_inputs;
-        // plugin_inputs.emplace_back(trans_layer->getOutput(0));
-        plugin_inputs.emplace_back(input);
-
+        if (engine_->with_interleaved()) {
+          auto* shuffler_slice = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
+          nvinfer1::Permutation transpose_embed{2, 1, 0, 3};
+          shuffler_slice->setSecondTranspose(transpose_embed);
+          engine_->SetTensorDynamicRange(shuffler_slice->getOutput(0),
+                                         out_scale);
+          shuffler_slice->setName(
+              ("SpecialSlice_interleaved: Shuffle: (Output: " + output_name +
+               ")")
+                  .c_str());
+          plugin_inputs.emplace_back(shuffler_slice->getOutput(0));
+        } else {
+          plugin_inputs.emplace_back(input);
+        }
         std::string pos_name;
         if (engine_->Has("ernie_pos_name")) {
           pos_name = engine_->Get<std::string>("ernie_pos_name");
@@ -99,11 +110,6 @@ class SliceOpConverter : public OpConverter {
             new plugin::SlicePluginDynamic(starts, ends, axes, with_fp16);
         layer = engine_->AddDynamicPlugin(&input, 1, plugin);
       }
-#else
-      PADDLE_THROW(platform::errors::Fatal(
-          "You are running the TRT Dynamic Shape mode, need to confirm that "
-          "your TRT version is no less than 6.0"));
-#endif
     } else {
       bool with_fp16 =
           engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
@@ -111,8 +117,6 @@ class SliceOpConverter : public OpConverter {
           new plugin::SlicePlugin(starts, ends, axes, with_fp16);
       layer = engine_->AddPlugin(&input, 1, plugin);
     }
-
-    auto output_name = op_desc.Output("Out")[0];
     RreplenishLayerAndOutput(layer, "slice", {output_name}, test_mode);
   }
 };
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index 7aaeb739de194aabff1f021b3c655104174c00aa..663534feda1a8ac1f1d3cf5e564cc8c30c58bb41 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -407,6 +407,9 @@ class TensorRTEngine {
   void SetUseDLA(bool use_dla) { use_dla_ = use_dla; }
   void SetDLACore(int dla_core) { dla_core_ = dla_core; }
   void SetWithErnie(bool with_ernie) { with_ernie_ = with_ernie; }
+  void SetWithInterleaved(bool with_interleaved) {
+    with_interleaved_ = with_interleaved;
+  }
 
   void ClearWeights() {
     for (auto& weight_pair : weight_map) {
@@ -480,6 +483,7 @@ class TensorRTEngine {
 
   bool use_oss() { return use_oss_; }
   bool with_ernie() { return with_ernie_; }
+  bool with_interleaved() { return with_interleaved_; }
   bool disable_trt_plugin_fp16() { return disable_trt_plugin_fp16_; }
   bool with_dynamic_shape() { return with_dynamic_shape_; }
   AnalysisConfig::Precision precision() { return precision_; }
@@ -612,6 +616,7 @@ class TensorRTEngine {
   bool use_dla_{false};
   int dla_core_{0};
   bool with_ernie_{false};
+  bool with_interleaved_{false};
   nvinfer1::ILogger& logger_;
 
   // max data size for the buffers.