diff --git a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
index 8225840a69d7cf6ee053632791aa4227b8de4ec9..1aa12ded2611b2ca61e3f59c4f59880ef4ffaad7 100644
--- a/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
@@ -107,11 +107,7 @@ void AttentionLSTMFusePass::FindWhileOp(Graph* graph) const {
   gpd.mutable_pattern()->NewNode(
       [&](Node* n) { return fused_external_ops.count(n->id()); }, "while");
 
-  if (!graph->Has(kGraphvizMarkedNodeAttr)) {
-    graph->Set(kGraphvizMarkedNodeAttr, new GraphVizPass::marked_nodes_t);
-  }
-  auto& marked_nodes =
-      graph->Get<GraphVizPass::marked_nodes_t>(kGraphvizMarkedNodeAttr);
+  auto& marked_nodes = GetMarkedNodes(graph);
 
   auto handle = [&](const GraphPatternDetector::subgraph_t& subgraph,
                     Graph* g) {
diff --git a/paddle/fluid/framework/ir/graph_viz_pass.cc b/paddle/fluid/framework/ir/graph_viz_pass.cc
index eed4ab38f5be749dcd0739e0d0c2dd043a93a949..8403d9075e3bf6a9856976ad9e37f345cc777fb5 100644
--- a/paddle/fluid/framework/ir/graph_viz_pass.cc
+++ b/paddle/fluid/framework/ir/graph_viz_pass.cc
@@ -14,6 +14,8 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/ir/graph_viz_pass.h"
 
+#include <fstream>
+#include <ostream>
 #include <string>
 
 #include "paddle/fluid/framework/ir/graph_helper.h"
diff --git a/paddle/fluid/framework/ir/graph_viz_pass.h b/paddle/fluid/framework/ir/graph_viz_pass.h
index 118c1bc6f3c78e6ec05c897939343df83b1c3851..0d3b32a7c1a78407a7f4f5a7e830276c8247c5df 100644
--- a/paddle/fluid/framework/ir/graph_viz_pass.h
+++ b/paddle/fluid/framework/ir/graph_viz_pass.h
@@ -14,12 +14,7 @@ limitations under the License. */
 
 #pragma once
 
-#include <fstream>
-#include <map>
-#include <memory>
-#include <string>
 #include <unordered_set>
-#include <vector>
 
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/ir/pass.h"
diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc
index 4aadb34d7b354e99eb5a5717f91e55fd91d823dc..afa653dec906981254535e6e56a7770aa1fb31c0 100644
--- a/paddle/fluid/inference/analysis/analyzer.cc
+++ b/paddle/fluid/inference/analysis/analyzer.cc
@@ -15,7 +15,6 @@
 #include "paddle/fluid/inference/analysis/analyzer.h"
 
 #include <string>
-#include <vector>
 
 #include "paddle/fluid/inference/analysis/passes/passes.h"
 #include "paddle/fluid/string/pretty_log.h"
diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h
index 95a985158e6783dfa12615779e36b3283e979631..3c053283666748a33a2dfa8167fe126d8ab61219 100644
--- a/paddle/fluid/inference/analysis/analyzer.h
+++ b/paddle/fluid/inference/analysis/analyzer.h
@@ -35,12 +35,10 @@ limitations under the License. */
  * phase in the inference service.
  */
 
-#include <string>
 #include <vector>
 
 #include "gflags/gflags.h"
 #include "paddle/fluid/inference/analysis/analysis_pass.h"
-#include "paddle/fluid/inference/analysis/flags.h"
 
 namespace paddle {
 namespace inference {
diff --git a/paddle/fluid/inference/analysis/device.h b/paddle/fluid/inference/analysis/device.h
deleted file mode 100644
index 585c9923291e5f9cb6e50dbc4bcd28c374191048..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/analysis/device.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#pragma once
-
-namespace paddle {
-namespace inference {
-namespace analysis {
-
-enum class Device { CPU, GPU };
-
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
diff --git a/paddle/fluid/inference/analysis/dot_tester.cc b/paddle/fluid/inference/analysis/dot_tester.cc
deleted file mode 100644
index 0b669093a1f32c52d559c49c2396314afeb360aa..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/analysis/dot_tester.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <gtest/gtest.h>
-
-#include <memory>
-
-#include "paddle/fluid/inference/analysis/dot.h"
-
-namespace paddle {
-namespace inference {
-namespace analysis {
-
-class DotTester : public ::testing::Test {
- protected:
-  void SetUp() override {
-    std::vector<Dot::Attr> attrs({{"title", "hello"}});
-    dot.reset(new Dot(attrs));
-    dot->AddNode("a", {Dot::Attr{"shape", "box"}, Dot::Attr("color", "blue")});
-    dot->AddNode("b", {});
-    dot->AddNode("c", {});
-    dot->AddEdge("a", "b", {});
-    dot->AddEdge("b", "c", {});
-    dot->AddEdge("a", "c", {});
-  }
-
-  std::unique_ptr<Dot> dot;
-};
-
-TEST_F(DotTester, Build) {
-  auto codes = dot->Build();
-  // Output the DOT language code, the generated codes are too long to compare
-  // the string.
-  //
-  // The output is
-  //
-  // digraph G {
-  //   title="hello"
-  //   node_1
-  //   node_2
-  //   node_0[label="a" shape="box" color="blue"]
-  //   node_0->node_1
-  //   node_1->node_2
-  //   node_0->node_2
-  // } // end G
-  LOG(INFO) << '\n' << codes;
-}
-
-}  // namespace analysis
-}  // namespace inference
-}  // namespace paddle
diff --git a/paddle/fluid/inference/analysis/flags.h b/paddle/fluid/inference/analysis/flags.h
deleted file mode 100644
index ea0406128b53bfdf04ccb97d2eba2c1c236c845c..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/analysis/flags.h
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include "gflags/gflags.h"
-
-// TODO(Superjomn) add a definition flag like PADDLE_WITH_TENSORRT and hide this
-// flag if not available.
-DECLARE_bool(IA_enable_tensorrt_subgraph_engine);
-DECLARE_string(IA_graphviz_log_root);
-DECLARE_string(IA_output_storage_path);
-DECLARE_bool(IA_enable_ir);
diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
index e9733a07ec8b56af5b4574915bd085eaf7f4c486..9d39b6e6118271de8f39be99e53e06a7f855b2bc 100644
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -28,6 +28,7 @@
 #include "paddle/fluid/framework/ir/subgraph_detector.h"
 #include "paddle/fluid/framework/op_version_registry.h"
 #include "paddle/fluid/inference/analysis/helper.h"
+#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h"
 #include "paddle/fluid/inference/analysis/passes/convert_to_mixed_precision.h"
 #include "paddle/fluid/inference/api/helper.h"
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h
index a79c41f6a075f19e50d7410170136bc0903a6cde..b720f2c6b9f10c34a525431bf68760bb83dfcd19 100644
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h
@@ -21,7 +21,6 @@
 
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/framework/ir/pass.h"
-#include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h"
 #include "paddle/fluid/inference/api/paddle_analysis_config.h"
 
 namespace paddle {
diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
index 2f9914ac54a79b105b98d7ef96e3b71254ba2dee..f93606892b6464d0269ad5f9bbe2630b20903089 100644
--- a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc
@@ -14,16 +14,9 @@
 
 #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h"
 
-#include <memory>
-#include <utility>
-
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
 #include "paddle/fluid/inference/analysis/ir_pass_manager.h"
 
-#ifdef PADDLE_WITH_MKLDNN
-#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h"
-#endif
-
 namespace paddle {
 namespace inference {
 namespace analysis {
diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h
index 98ee592245660533986a9893bbca603186d0fd78..fca431b5d7779838b940209099eaab0d6e3e707e 100644
--- a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h
+++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h
@@ -33,10 +33,6 @@ class IrAnalysisPass : public AnalysisPass {
 
   void CollectFusionStatis(Argument* argument);
 
-  void ReadCalibrationInfo(
-      Argument* argument,
-      std::unordered_map<std::string, std::vector<float>>* var_quant_scales);
-
   std::string repr() const override;
 };
 
diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
index 3f13fd639aa554a43ef3ad22a87c42c5bfa1ab9c..60d94eae7a6e03395d43bb648076bc5e3c2f6a37 100644
--- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc
@@ -14,7 +14,6 @@
 
 #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h"
 
-#include <memory>
 #include <string>
 
 #include "paddle/fluid/framework/executor.h"
diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
index 54b7cd85ff2bb4eb04928103a35ae9dad68e1668..65e149925e742924ff49c03a07afabb79dd50736 100644
--- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc
@@ -17,15 +17,15 @@
 #include <cstdlib>
 #include <string>
 #include <unordered_set>
+#include <vector>
 
-#include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/framework/framework.pb.h"
 #include "paddle/fluid/framework/ir/graph_helper.h"
-#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/enforce.h"
-#include "paddle/phi/common/data_type.h"
+#include "paddle/fluid/platform/place.h"
+#include "paddle/phi/core/dense_tensor.h"
 
 DEFINE_bool(
     custom_model_save_cpu,
diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h
index 9db17abc24d2a26caa2f0bea2a626cb7e1b5049b..ee29af1c13308b8af63fa94313ffa5747a30efd6 100644
--- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h
+++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h
@@ -15,12 +15,9 @@
 #pragma once
 
 #include <string>
-#include <vector>
 
 #include "paddle/fluid/framework/ir/fuse_pass_base.h"
-#include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/inference/analysis/analysis_pass.h"
-#include "paddle/fluid/platform/place.h"
 
 namespace paddle {
 namespace inference {
diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
old mode 100755
new mode 100644
index 40a8c5ce66a2a5b7c5f54784abdcbdc2c9e3e531..a9478cde66ded90b5ecfaae8eb7aaa47ea530acc
--- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
+++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc
@@ -15,7 +15,9 @@
 #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
 
 #include <string>
+#include <unordered_set>
 #include <utility>
+#include <vector>
 
 #include "glog/logging.h"
 #include "paddle/fluid/framework/ir/graph_helper.h"
diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h
index 5dcd8b1059ebc2690a38ab7e2d201161345d762d..9ce804019744c7a884a11836703ac56b1c0ba7d8 100644
--- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h
+++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h
@@ -15,12 +15,9 @@
 #pragma once
 #include <string>
 #include <unordered_map>
-#include <unordered_set>
 #include <utility>
-#include <vector>
 
 #include "paddle/fluid/inference/analysis/analysis_pass.h"
-#include "paddle/phi/backends/dynload/port.h"
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/inference/api/CMakeLists.txt b/paddle/fluid/inference/api/CMakeLists.txt
index f07e5406a002b2f241f128141a763458efb242e4..b681e56d3b9f9e96aebf4d457bf2d3edcc87a453 100755
--- a/paddle/fluid/inference/api/CMakeLists.txt
+++ b/paddle/fluid/inference/api/CMakeLists.txt
@@ -1,18 +1,3 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
 if(APPLE)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=pessimizing-move")
 endif()
diff --git a/paddle/fluid/inference/api/README.md b/paddle/fluid/inference/api/README.md
deleted file mode 100644
index 990b061c8f92b01a593241217b24b5b3204c9121..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/api/README.md
+++ /dev/null
@@ -1,17 +0,0 @@
-# Embed Paddle Inference in Your Application
-
-Paddle inference offers the APIs in `C` and `C++` languages.
-
-You can easily deploy a model trained by Paddle following the steps as below:
-
-1. Optimize the native model;
-2. Write some codes for deployment.
-
-## The APIs
-
-All the released APIs are located in the `paddle_inference_api.h` header file.
-The stable APIs are wrapped by `namespace paddle`, the unstable APIs are protected by `namespace paddle::contrib`.
-
-## Write some codes
-
-Read `paddle_inference_api.h` for more information.
diff --git a/paddle/fluid/inference/api/high_level_api.md b/paddle/fluid/inference/api/high_level_api.md
deleted file mode 100644
index ca22767a1b8895f365633541c86d24182a3268b1..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/api/high_level_api.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# Inference High-level APIs
-This document describes the high-level inference APIs, one can use them to deploy a Paddle model for an application quickly.
-
-The APIs are described in `paddle_inference_api.h`, just one header file, and two libaries `libpaddle_inference.so` and `libpaddle_inference_io.so` are needed for a deployment.
-
-## PaddleTensor
-We provide the `PaddleTensor` data structure to give a general tensor interface.
-
-The definition is
-
-```c++
-struct PaddleTensor {
-  std::string name;  // variable name.
-  std::vector<int> shape;
-  PaddleBuf data;  // blob of data.
-  PaddleDType dtype;
-};
-```
-
-The data is stored in a continuous memory `PaddleBuf,` and a `PaddleDType` specifies tensor's data type.
-The `name` field is used to specify the name of an input variable,
-that is important when there are multiple inputs and need to distinguish which variable to set.
-
-## engine
-The inference APIs has two different underlying engines
-
-- the native engine
-- the tensorrt engine
-
-The native engine, which is consists of the native operators and framework, takes a native Paddle model
-as input, and supports any model that trained by Paddle.
-
-```c++
-enum class PaddleEngineKind {
-  kNative = 0,  // Use the native Fluid facility.
-  kAutoMixedTensorRT // Automatically mixing TensorRT with the Fluid ops.
-};
-```
-
-## PaddlePredictor and how to create one
-The main interface is `PaddlePredictor,` there are following methods
-
-- `bool Run(const std::vector<PaddleTensor>& inputs, std::vector<PaddleTensor>* output_data)`
-  - take inputs and output `output_data.`
-- `Clone` to clone a predictor from an existing one, with model parameter shared.
-
-There is a factory method to help create a predictor, and the user takes the ownership of this object.
-
-```c++
-template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
-std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
-```
-
-By specifying the engine kind and config, one can get a specific implementation.
-
-## Reference
-
-- [paddle_inference_api.h](./paddle_inference_api.h)
-- [some demos](./demo_ci)
diff --git a/paddle/fluid/inference/api/high_level_api_cn.md b/paddle/fluid/inference/api/high_level_api_cn.md
deleted file mode 100644
index 6fb4a55f200ebc8c34de4b3266f7021e2e97c312..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/api/high_level_api_cn.md
+++ /dev/null
@@ -1,84 +0,0 @@
-# Paddle 预测 API
-
-为了更简单方便的预测部署，Fluid 提供了一套高层 API 用来隐藏底层不同的优化实现。
-
-预测库包含:
-
-- 头文件 `paddle_inference_api.h` 定义了所有的接口
-- 库文件 `libpaddle_inference.so/.a(Linux/Mac)` `libpaddle_inference.lib/paddle_inference.dll(Windows)`
-
-下面是详细的一些 API 概念介绍
-
-## PaddleTensor
-
-PaddleTensor 定义了预测最基本的输入输出的数据格式，其定义是
-
-```c++
-struct PaddleTensor {
-  std::string name;  // variable name.
-  std::vector<int> shape;
-  PaddleBuf data;  // blob of data.
-  PaddleDType dtype;
-};
-```
-
-- `name` 用于指定输入数据对应的 模型中variable 的名字 （暂时没有用，但会在后续支持任意 target 时启用）
-- `shape` 表示一个 Tensor 的 shape
-- `data`  数据以连续内存的方式存储在`PaddleBuf` 中，`PaddleBuf` 可以接收外面的数据或者独立`malloc`内存，详细可以参考头文件中相关定义。
-- `dtype` 表示 Tensor 的数据类型
-
-## engine
-
-高层 API 底层有多种优化实现，我们称之为 engine，目前有两种 engine
-
-- 原生 engine，由 paddle 原生的 forward operator 组成，可以天然支持所有paddle 训练出的模型，
-- TensorRT mixed engine，用子图的方式支持了 [TensorRT](https://developer.nvidia.com/tensorrt) ，支持所有paddle 模型，并自动切割部分计算子图到 TensorRT 上加速（WIP）
-
-其实现为
-
-```c++
-enum class PaddleEngineKind {
-  kNative = 0,       // Use the native Fluid facility.
-  kAutoMixedTensorRT // Automatically mixing TensorRT with the Fluid ops.
-};
-```
-
-## 预测部署过程
-
-总体上分为以下步骤
-
-1. 用合适的配置创建 `PaddlePredictor`
-2. 创建输入用的 `PaddleTensor`，传入到 `PaddlePredictor` 中
-3. 获取输出的 `PaddleTensor` ，将结果取出
-
-下面完整演示一个简单的模型，部分细节代码隐去
-
-```c++
-#include "paddle_inference_api.h"
-
-// 创建一个 config，并修改相关设置
-paddle::NativeConfig config;
-config.model_dir = "xxx";
-config.use_gpu = false;
-// 创建一个原生的 PaddlePredictor
-auto predictor =
-      paddle::CreatePaddlePredictor<paddle::NativeConfig, paddle::PaddleEngineKind::kNative>(config);
-// 创建输入 tensor
-int64_t data[4] = {1, 2, 3, 4};
-paddle::PaddleTensor tensor{.name = "",
-                            .shape = std::vector<int>({4, 1}),
-                            .data = paddle::PaddleBuf(data, sizeof(data)),
-                            .dtype = paddle::PaddleDType::INT64};
-// 创建输出 tensor，输出 tensor 的内存可以复用
-std::vector<paddle::PaddleTensor> outputs;
-// 执行预测
-CHECK(predictor->Run(slots, &outputs));
-// 获取 outputs ...
-```
-
-编译时，联编 `libpaddle_inference.a/.so(Linux/Mac)` 或 `libpaddle_inference.lib/paddle_inference.dll(Windows)` 便可。
-
-## 详细代码参考
-
-- [inference demos](./demo_ci)
-- [复杂单线程/多线程例子](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/inference/api/test_api_impl.cc)
diff --git a/paddle/fluid/inference/engine.h b/paddle/fluid/inference/engine.h
deleted file mode 100644
index e29162cf5b23bacafcf2e5ef600a96ed4518c360..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/engine.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include <string>
-
-namespace paddle {
-namespace inference {
-
-struct Buffer;
-enum class DeviceType { UNK = -1, CPU, GPU };
-
-/*
- * EngineBase is the base class of all inference engines. An inference engine
- * takes a paddle program as input, and outputs the result in fluid Tensor
- * format. It can be used to optimize performance of computation sub-blocks, for
- * example, break down the original block into sub-blocks and execute each
- * sub-blocks in different engines.
- *
- * For example:
- *   When inference, the resnet50 model can put most of the model into subgraph
- * and run it on a TensorRT engine.
- *
- * There are several engines such as TensorRT and other frameworks, so an
- * EngineBase is put forward to give an unified interface for all the
- * different engine implemention.
- */
-class EngineBase {
- public:
-  using DescType = ::paddle::framework::proto::BlockDesc;
-
-  // Build the model and do some preparation, for example, in TensorRT, run
-  // createInferBuilder, buildCudaEngine.
-  virtual void Build(const DescType& paddle_model) = 0;
-
-  // Execute the engine, that will run the inference network.
-  virtual void Execute(int batch_size) = 0;
-
-  virtual ~EngineBase() {}
-};  // class EngineBase
-
-struct Buffer {
-  void* buffer{nullptr};               // buffer should be allocated only once.
-  size_t max_size;                     // buffer allocated space.
-  size_t size;                         // data size.
-  DeviceType device{DeviceType::UNK};  // tells which device this buffer is on.
-};
-
-}  // namespace inference
-}  // namespace paddle
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
index d09c344ca1643a50997c0c69130b8c600f78c60c..a52d6b1c39df35ed9317debd5d77c3f6560b89d1 100755
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -99,7 +99,6 @@ list(
   merge_layernorm_op.cc
   skip_merge_layernorm_op.cc
   generic_and_custom_plugin_creater.cc
-  fused_lookup_tables_op.cc
   lookup_table_op.cc
   elementwiseadd_transpose_op.cc
   skip_groupnorm_act_op.cc
diff --git a/paddle/fluid/inference/tensorrt/convert/fused_lookup_tables_op.cc b/paddle/fluid/inference/tensorrt/convert/fused_lookup_tables_op.cc
deleted file mode 100644
index aaeea2d725809eac1c26ec51b9c79d1f5d251d26..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/tensorrt/convert/fused_lookup_tables_op.cc
+++ /dev/null
@@ -1,115 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
-#include "paddle/fluid/inference/tensorrt/convert/utils.h"
-#include "paddle/fluid/inference/tensorrt/plugin/lookup_table.h"
-
-namespace paddle {
-namespace inference {
-namespace tensorrt {
-
-class FusedLookupTablesOpConverter : public OpConverter {
- public:
-  void operator()(const framework::proto::OpDesc& op,
-                  const framework::Scope& scope,
-                  bool test_mode) override {
-    if (!engine_->with_dynamic_shape()) {
-      PADDLE_THROW(
-          platform::errors::Fatal("lookup_table_op must with dynamic shape"));
-    }
-
-    framework::OpDesc op_desc(op, nullptr);
-    auto ids_name = op_desc.Input("Ids").front();
-    auto w_name = op_desc.Input("W").front();
-    auto output_name = op_desc.Output("Out").front();
-    bool enable_int8 = op_desc.HasAttr("enable_int8");
-    std::vector<nvinfer1::ITensor*> plugin_inputs;
-
-    auto ids_dims = engine_->GetITensor(ids_name)->getDimensions();
-    if (ids_dims.d[ids_dims.nbDims - 1] == 1) {
-      nvinfer1::Dims new_ids_dims;
-      new_ids_dims.nbDims = ids_dims.nbDims - 1;
-      for (int i = 0; i < ids_dims.nbDims - 1; i++) {
-        new_ids_dims.d[i] = 0;
-      }
-      auto* reshape_layer = TRT_ENGINE_ADD_LAYER(
-          engine_, Shuffle, *(engine_->GetITensor(ids_name)));
-      reshape_layer->setReshapeDimensions(new_ids_dims);
-      reshape_layer->setName(
-          ("lookup_table: Shuffle (Output: " + output_name + ")").c_str());
-      plugin_inputs.push_back(reshape_layer->getOutput(0));
-    } else {
-      plugin_inputs.push_back(engine_->GetITensor(ids_name));
-    }
-
-    TensorRTEngine::Weight weight;
-    auto* w_var = scope.FindVar(w_name);
-    auto* w_tensor = w_var->GetMutable<phi::DenseTensor>();
-    auto w_dims = w_tensor->dims();
-    weight = engine_->GetTrtWeight(w_name, *w_tensor);
-    auto weight_size = phi::product(w_dims);
-    int output_fp16;
-    if (engine_->precision() == AnalysisConfig::Precision::kFloat32) {
-      output_fp16 = 0;
-    } else {
-      output_fp16 = 1;
-    }
-
-    int32_t weight_width = static_cast<int32_t>(w_dims[1]);
-
-    std::vector<nvinfer1::PluginField> fields;
-    fields.emplace_back("lookup_table_weight",
-                        weight.get().values,
-                        GetPluginFieldType(weight.get().type),
-                        static_cast<int32_t>(weight_size));
-    fields.emplace_back("lookup_table_weight_width",
-                        &weight_width,
-                        nvinfer1::PluginFieldType::kINT32,
-                        1);
-    fields.emplace_back(
-        "output_fp16", &output_fp16, nvinfer1::PluginFieldType::kINT32, 1);
-    nvinfer1::PluginFieldCollection* plugin_ptr =
-        static_cast<nvinfer1::PluginFieldCollection*>(
-            malloc(sizeof(*plugin_ptr) +
-                   fields.size() * sizeof(nvinfer1::PluginField)));
-    plugin_ptr->nbFields = static_cast<int>(fields.size());
-    plugin_ptr->fields = fields.data();
-    auto creator =
-        GetPluginRegistry()->getPluginCreator("LookupTablePluginDynamic", "1");
-    auto plugin_obj =
-        creator->createPlugin("LookupTablePluginDynamic", plugin_ptr);
-
-    auto plugin_layer = engine_->network()->addPluginV2(
-        plugin_inputs.data(), plugin_inputs.size(), *plugin_obj);
-
-    plugin_layer->setName(
-        ("lookup_table: (Output: " + output_name + ")").c_str());
-    engine_->SetITensor(output_name, plugin_layer->getOutput(0));
-    free(plugin_ptr);
-    if (enable_int8) {
-      float out_scale =
-          PADDLE_GET_CONST(float, op_desc.GetAttr("out_threshold"));
-      engine_->SetTensorDynamicRange(plugin_layer->getOutput(0), out_scale);
-    }
-  }
-};
-
-}  // namespace tensorrt
-}  // namespace inference
-}  // namespace paddle
-
-// NOTE(liuyuanle): We will remove the implementation here later. Ref to
-// tensorrt/convert/lookup_table_op.cc.
-// REGISTER_TRT_OP_CONVERTER(lookup_table, FusedLookupTablesOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/convert/prelu_op.cc b/paddle/fluid/inference/tensorrt/convert/prelu_op.cc
index 80a2ac46f44dc31c858e8386b878264a99648f37..2d2a804f394fd80915d265f461d620f1333a2d38 100644
--- a/paddle/fluid/inference/tensorrt/convert/prelu_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/prelu_op.cc
@@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
-#include "paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h"
 
 namespace paddle {
 namespace inference {
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index 3f4df6058657326fe18364db7ddf28d1d7a0531b..a0d146c9b6dad415e67a7b0f542483f25e3dac45 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -31,7 +31,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/inference/api/paddle_analysis_config.h"
-#include "paddle/fluid/inference/engine.h"
 #include "paddle/fluid/inference/tensorrt/helper.h"
 #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
 #include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
index e72ead0bbadb6e02932f195cfbb1fb0b037158ec..b1df5a733623e0c4ae3f9040691dcd74e00f024c 100644
--- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt
@@ -4,7 +4,6 @@ list(
   trt_plugin.cc
   split_op_plugin.cu
   elementwise_op_plugin.cu
-  prelu_op_plugin.cu
   gelu_op_plugin.cu
   pool_op_plugin.cu
   swish_op_plugin.cu
@@ -40,7 +39,6 @@ list(
   preln_groupnorm_act_op_plugin.cu
   elementwiseadd_transpose_op_plugin.cu
   generic_plugin.cu
-  lookup_table.cu
   many_emb_layernorm_plugin.cu
   many_emb_layernorm_kernel.cu)
 
@@ -57,11 +55,7 @@ endif()
 nv_library(
   tensorrt_plugin
   SRCS ${TRT_FILES}
-  DEPS enforce
-       tensorrt_engine
-       prelu
-       tensor
-       bert_encoder_functor
+  DEPS enforce tensorrt_engine tensor bert_encoder_functor
        tensorrt_dynamic_shape_infermeta_factory
        tensorrt_plugin_arg_mapping_context)
 
diff --git a/paddle/fluid/inference/tensorrt/plugin/lookup_table.cu b/paddle/fluid/inference/tensorrt/plugin/lookup_table.cu
deleted file mode 100644
index 31d599bd2a67cdc1b72e7ad1a2a8c7e04029844e..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/tensorrt/plugin/lookup_table.cu
+++ /dev/null
@@ -1,348 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include "paddle/fluid/inference/tensorrt/plugin/lookup_table.h"
-
-namespace paddle {
-namespace inference {
-namespace tensorrt {
-namespace plugin {
-
-char const* PLUGINVERSION{"1"};
-char const* LOOKUPTABLEPLUGINNAME{"LookupTablePluginDynamic"};
-
-template <typename T, unsigned TPB>
-__global__ void lookup_table_kernel(int weight_height,
-                                    int32_t const* inputIds,
-                                    T const* wordEmb,
-                                    int32_t const wordSize,
-                                    T* output) {
-  // 1. lookup word and token of the block
-  // blockIdx.x = position in the sequence
-  // blockIdx.y = batch
-  // gridDim.x = S
-  // gridDim.y = B
-  __shared__ int wordId;
-  int32_t const seqPos = blockIdx.x + blockIdx.y * gridDim.x;
-  if (threadIdx.x == 0) {
-    wordId = inputIds[seqPos];
-  }
-  __syncthreads();
-
-  // 2. load word embeddings and add them toghether
-  // offset into embeddings is given by wordId * hidden_size
-  int32_t const woffset = wordId * weight_height;
-  // the output offset is given by b * (S*hidden_size) + s * hidden_size
-  int32_t const outOffset = seqPos * weight_height;
-  if (wordId >= 0 && wordId < wordSize) {
-    for (int it = threadIdx.x; it < weight_height; it += TPB) {
-      T const w(wordEmb[woffset + it]);
-      output[outOffset + it] = w;
-    }
-  } else {
-    printf(
-        "Error!!!!!!(LookupTablePlugin): ID cannot be lookup "
-        "table: ID < 0 or ID > max ");
-    return;
-  }
-}
-
-template <typename T>
-int lookup_table(cudaStream_t stream,
-                 int weight_height,
-                 int B,
-                 int S,
-                 int32_t const* inputIds,
-                 T const* wordEmb,
-                 int32_t const wordSize,
-                 T* output) {
-  constexpr int tpb = 256;
-  dim3 const grid(S, B, 1);
-  dim3 const block(tpb, 1, 1);
-  lookup_table_kernel<T, tpb><<<grid, block, 0, stream>>>(
-      weight_height, inputIds, wordEmb, wordSize, output);
-  return 0;
-}
-
-// Static class fields initialization
-nvinfer1::PluginFieldCollection LookupTablePluginDynamicCreator::mFC{};
-std::vector<nvinfer1::PluginField>
-    LookupTablePluginDynamicCreator::mPluginAttributes;
-
-LookupTablePluginDynamic::LookupTablePluginDynamic(
-    nvinfer1::DataType const type,
-    void* weight_dev,
-    int32_t weight_size,
-    int32_t width)
-    : mType(type),
-      mWeightDev(weight_dev),
-      mWeightSize(weight_size),
-      mWeightWidth(width) {}
-
-LookupTablePluginDynamic::LookupTablePluginDynamic(void const* data,
-                                                   size_t length) {
-  // Deserialize in the same order as serialization
-  deserialize_value(&data, &length, &mType);
-  deserialize_value(&data, &length, &mWeightSize);
-  deserialize_value(&data, &length, &mWeightWidth);
-  char const* d = static_cast<char const*>(data);
-  cudaMalloc(&mWeightDev, mWeightSize * getElementSize(mType));
-  cudaMemcpy(mWeightDev,
-             d,
-             mWeightSize * getElementSize(mType),
-             cudaMemcpyHostToDevice);
-}
-
-// IPluginV2DynamicExt Methods
-nvinfer1::IPluginV2DynamicExt* LookupTablePluginDynamic::clone()
-    const noexcept {
-  auto p = new LookupTablePluginDynamic(
-      mType, mWeightDev, mWeightSize, mWeightWidth);
-  p->setPluginNamespace(mNamespace.c_str());
-  return p;
-}
-
-nvinfer1::DimsExprs LookupTablePluginDynamic::getOutputDimensions(
-    int32_t outputIndex,
-    nvinfer1::DimsExprs const* inputs,
-    int32_t nbInputs,
-    nvinfer1::IExprBuilder& exprBuilder) noexcept {
-  nvinfer1::DimsExprs ret;
-  ret.nbDims = inputs[0].nbDims + 1;
-  for (int i = 0; i < inputs[0].nbDims; ++i) {
-    ret.d[i] = inputs[0].d[i];
-  }
-  ret.d[inputs[0].nbDims] = exprBuilder.constant(mWeightWidth);
-  return ret;
-}
-
-bool LookupTablePluginDynamic::supportsFormatCombination(
-    int32_t pos,
-    nvinfer1::PluginTensorDesc const* inOut,
-    int32_t nbInputs,
-    int32_t nbOutputs) noexcept {
-  nvinfer1::PluginTensorDesc const& desc = inOut[pos];
-  if (desc.format != nvinfer1::TensorFormat::kLINEAR) {
-    return false;
-  }
-  if (pos == 0) {
-    return desc.type == nvinfer1::DataType::kINT32;
-  }
-  if (pos == 1) {
-    if (mType == nvinfer1::DataType::kFLOAT) {
-      return desc.type == nvinfer1::DataType::kFLOAT;
-    } else {
-      return desc.type == nvinfer1::DataType::kHALF;
-    }
-  }
-}
-
-void LookupTablePluginDynamic::configurePlugin(
-    nvinfer1::DynamicPluginTensorDesc const* inputs,
-    int32_t nbInputs,
-    nvinfer1::DynamicPluginTensorDesc const* outputs,
-    int32_t nbOutputs) noexcept {}
-
-size_t LookupTablePluginDynamic::getWorkspaceSize(
-    nvinfer1::PluginTensorDesc const* inputs,
-    int32_t nbInputs,
-    nvinfer1::PluginTensorDesc const* outputs,
-    int32_t nbOutputs) const noexcept {
-  return 0;
-}
-
-int32_t LookupTablePluginDynamic::enqueue(
-    nvinfer1::PluginTensorDesc const* inputDesc,
-    nvinfer1::PluginTensorDesc const* outputDesc,
-    void const* const* inputs,
-    void* const* outputs,
-    void* workspace,
-    cudaStream_t stream) noexcept {
-  int32_t const batchSize = inputDesc->dims.d[0];
-  int32_t S;
-  if (inputDesc->dims.nbDims == 1) {
-    S = 1;
-  } else {
-    S = inputDesc->dims.d[1];
-  }
-  int32_t mWeightHeight = mWeightSize / mWeightWidth;
-  int32_t status = STATUS_FAILURE;
-  auto const inputIds = static_cast<int32_t const*>(inputs[0]);
-  if (mType == nvinfer1::DataType::kFLOAT) {
-    auto output = static_cast<float*>(outputs[0]);
-    auto const Weight = static_cast<const float*>(mWeightDev);
-    status = lookup_table<float>(stream,
-                                 static_cast<int32_t>(mWeightWidth),
-                                 batchSize,
-                                 S,
-                                 inputIds,
-                                 Weight,
-                                 mWeightHeight,
-                                 output);
-  } else if (mType == nvinfer1::DataType::kHALF) {
-    auto output = static_cast<half*>(outputs[0]);
-    auto const Weight = static_cast<const half*>(mWeightDev);
-    status = lookup_table<half>(stream,
-                                static_cast<int32_t>(mWeightWidth),
-                                batchSize,
-                                S,
-                                inputIds,
-                                Weight,
-                                mWeightHeight,
-                                output);
-  }
-  return status;
-}
-
-// IPluginV2Ext Methods
-nvinfer1::DataType LookupTablePluginDynamic::getOutputDataType(
-    int32_t index,
-    nvinfer1::DataType const* inputTypes,
-    int32_t nbInputs) const noexcept {
-  if (index == 0) {
-    assert(mType == nvinfer1::DataType::kHALF ||
-           mType == nvinfer1::DataType::kFLOAT);
-    return mType;
-  }
-}
-
-// IPluginV2 Methods
-char const* LookupTablePluginDynamic::getPluginType() const noexcept {
-  return LOOKUPTABLEPLUGINNAME;
-}
-
-char const* LookupTablePluginDynamic::getPluginVersion() const noexcept {
-  return PLUGINVERSION;
-}
-
-int32_t LookupTablePluginDynamic::getNbOutputs() const noexcept { return 1; }
-
-int32_t LookupTablePluginDynamic::initialize() noexcept { return 0; }
-
-void LookupTablePluginDynamic::terminate() noexcept { cudaFree(mWeightDev); }
-
-size_t LookupTablePluginDynamic::getSerializationSize() const noexcept {
-  size_t const wordSize = getElementSize(mType);
-  return sizeof(mType)              //
-         + sizeof(mWeightSize)      //
-         + sizeof(mWeightWidth)     //
-         + wordSize * mWeightSize;  //
-}
-
-void LookupTablePluginDynamic::serialize(void* buffer) const noexcept {
-  serialize_value(&buffer, mType);
-  serialize_value(&buffer, mWeightSize);
-  serialize_value(&buffer, mWeightWidth);
-  char* d = static_cast<char*>(buffer);
-  size_t const wordSize = getElementSize(mType);
-  serFromDev(&d, static_cast<char*>(mWeightDev), mWeightSize * wordSize);
-}
-
-void LookupTablePluginDynamic::destroy() noexcept {
-  // This gets called when the network containing plugin is destroyed
-  delete this;
-}
-
-void LookupTablePluginDynamic::setPluginNamespace(
-    char const* libNamespace) noexcept {
-  mNamespace = libNamespace;
-}
-
-char const* LookupTablePluginDynamic::getPluginNamespace() const noexcept {
-  return mNamespace.c_str();
-}
-
-LookupTablePluginDynamicCreator::LookupTablePluginDynamicCreator() {}
-
-char const* LookupTablePluginDynamicCreator::getPluginName() const noexcept {
-  return LOOKUPTABLEPLUGINNAME;
-}
-
-char const* LookupTablePluginDynamicCreator::getPluginVersion() const noexcept {
-  return PLUGINVERSION;
-}
-
-nvinfer1::PluginFieldCollection const*
-LookupTablePluginDynamicCreator::getFieldNames() noexcept {
-  return &mFC;
-}
-
-bool initializeFields(nvinfer1::PluginFieldCollection const* fc,
-                      nvinfer1::Weights* weight,
-                      int32_t& mWeightWidth) {  // NOLINT
-  bool output_fp16 = false;
-  for (int32_t i = 0; i < fc->nbFields; i++) {
-    std::string field_name(fc->fields[i].name);
-    if (field_name.compare("lookup_table_weight") == 0) {
-      weight->values = fc->fields[i].data;
-      weight->count = fc->fields[i].length;
-      weight->type = fieldTypeToDataType(fc->fields[i].type);
-    }
-    if (field_name.compare("lookup_table_weight_width") == 0) {
-      assert(fc->fields[i].type == nvinfer1::PluginFieldType::kINT32);
-      mWeightWidth = const_cast<int32_t*>(
-          static_cast<int32_t const*>(fc->fields[i].data))[0];  // NOLINT
-    }
-    if (field_name.compare("output_fp16") == 0) {
-      assert(fc->fields[i].type == nvinfer1::PluginFieldType::kINT32);
-      output_fp16 = static_cast<int32_t const*>(fc->fields[i].data)[0] != 0;
-    }
-  }
-  return output_fp16;
-}
-
-nvinfer1::IPluginV2* LookupTablePluginDynamicCreator::createPlugin(
-    char const* name, const nvinfer1::PluginFieldCollection* fc) noexcept {
-  nvinfer1::Weights weight;
-  int32_t mWeightWidth;
-  bool output_fp16 = initializeFields(fc, &weight, mWeightWidth);
-  nvinfer1::DataType type;
-  if (output_fp16) {
-    type = nvinfer1::DataType::kHALF;
-  } else {
-    type = nvinfer1::DataType::kFLOAT;
-  }
-  WeightsWithOwnership mWeight;
-  mWeight.convertAndCopy(weight, type);
-  void* cudaMem{nullptr};
-  cudaMalloc(&cudaMem, getWeightsSize(mWeight, type));
-  cudaMemcpy(cudaMem,
-             mWeight.values,
-             getWeightsSize(mWeight, type),
-             cudaMemcpyHostToDevice);
-  LookupTablePluginDynamic* p =
-      new LookupTablePluginDynamic(type, cudaMem, mWeight.count, mWeightWidth);
-  return p;
-}
-
-nvinfer1::IPluginV2* LookupTablePluginDynamicCreator::deserializePlugin(
-    char const* name, void const* serialData, size_t serialLength) noexcept {
-  return new LookupTablePluginDynamic(serialData, serialLength);
-}
-
-void LookupTablePluginDynamicCreator::setPluginNamespace(
-    char const* libNamespace) noexcept {
-  mNamespace = libNamespace;
-}
-
-char const* LookupTablePluginDynamicCreator::getPluginNamespace()
-    const noexcept {
-  return mNamespace.c_str();
-}
-
-}  // namespace plugin
-}  // namespace tensorrt
-}  // namespace inference
-}  // namespace paddle
diff --git a/paddle/fluid/inference/tensorrt/plugin/lookup_table.h b/paddle/fluid/inference/tensorrt/plugin/lookup_table.h
deleted file mode 100644
index 84f6282d4f239e3c4240438bb01539fe00bc3f39..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/tensorrt/plugin/lookup_table.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include <cuda.h>
-#include "NvInferPlugin.h"
-#include "NvInferRuntime.h"
-#include "paddle/fluid/inference/tensorrt/plugin/common/bertCommon.h"
-#include "paddle/fluid/inference/tensorrt/plugin/common/serialize.h"
-#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
-#include "paddle/fluid/platform/enforce.h"
-
-namespace paddle {
-namespace inference {
-namespace tensorrt {
-namespace plugin {
-
-class LookupTablePluginDynamic : public nvinfer1::IPluginV2DynamicExt {
- public:
-  LookupTablePluginDynamic(nvinfer1::DataType const type,
-                           void* weight_dev,
-                           int32_t weight_size,
-                           int32_t width);
-
-  LookupTablePluginDynamic(void const* data, size_t length);
-
-  // It doesn't make sense to make EmbLayerNormVarSeqlenPlugin without
-  // arguments, so we delete default constructor.
-  LookupTablePluginDynamic() = delete;
-
-  // IPluginV2DynamicExt Methods
-  bool supportsFormatCombination(int32_t pos,
-                                 nvinfer1::PluginTensorDesc const* inOut,
-                                 int32_t nbInputs,
-                                 int32_t nbOutputs) noexcept override;
-  size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs,
-                          int32_t nbInputs,
-                          nvinfer1::PluginTensorDesc const* outputs,
-                          int32_t nbOutputs) const noexcept override;
-
-  // IPluginV2Ext Methods
-  nvinfer1::DataType getOutputDataType(
-      int32_t index,
-      nvinfer1::DataType const* inputTypes,
-      int32_t nbInputs) const noexcept override;
-
-  // IPluginV2 Methods
-  nvinfer1::IPluginV2DynamicExt* clone() const noexcept override;
-  nvinfer1::DimsExprs getOutputDimensions(
-      int32_t outputIndex,
-      const nvinfer1::DimsExprs* inputs,
-      int32_t nbInputs,
-      nvinfer1::IExprBuilder& exprBuilder) noexcept override;
-  void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in,
-                       int32_t nbInputs,
-                       nvinfer1::DynamicPluginTensorDesc const* out,
-                       int32_t nbOutputs) noexcept override;
-  char const* getPluginType() const noexcept override;
-  int32_t getNbOutputs() const noexcept override;
-  size_t getSerializationSize() const noexcept override;
-  void serialize(void* buffer) const noexcept override;
-  void destroy() noexcept override;
-  char const* getPluginNamespace() const noexcept override;
-  void setPluginNamespace(char const* pluginNamespace) noexcept override;
-  int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc,
-                  nvinfer1::PluginTensorDesc const* outputDesc,
-                  void const* const* inputs,
-                  void* const* outputs,
-                  void* workspace,
-                  cudaStream_t stream) noexcept override;
-
-  int32_t initialize() noexcept override;
-  void terminate() noexcept override;
-  char const* getPluginVersion() const noexcept override;
-
- protected:
-  std::string mNamespace;
-  nvinfer1::DataType mType;
-  void* mWeightDev{nullptr};
-  int32_t mWeightSize;
-  int32_t mWeightWidth;
-};
-
-class LookupTablePluginDynamicCreator : public nvinfer1::IPluginCreator {
- public:
-  LookupTablePluginDynamicCreator();
-
-  char const* getPluginName() const noexcept override;
-
-  const nvinfer1::PluginFieldCollection* getFieldNames() noexcept override;
-
-  void setPluginNamespace(char const* pluginNamespace) noexcept override;
-
-  char const* getPluginNamespace() const noexcept override;
-
-  nvinfer1::IPluginV2* createPlugin(
-      char const* name,
-      const nvinfer1::PluginFieldCollection* fc) noexcept override;
-  char const* getPluginVersion() const noexcept override;
-  nvinfer1::IPluginV2* deserializePlugin(char const* name,
-                                         void const* serialData,
-                                         size_t serialLength) noexcept override;
-
- protected:
-  static nvinfer1::PluginFieldCollection mFC;
-  static std::vector<nvinfer1::PluginField> mPluginAttributes;
-  std::string mNamespace;
-};
-
-REGISTER_TRT_PLUGIN_V2(LookupTablePluginDynamicCreator);
-
-}  // namespace plugin
-}  // namespace tensorrt
-}  // namespace inference
-}  // namespace paddle
diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu
deleted file mode 100644
index 85d9b71d34b3c6360ee98194b87e6a80a2140a84..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu
+++ /dev/null
@@ -1,224 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <stdio.h>
-
-#include <cassert>
-#include <vector>
-
-#include "glog/logging.h"
-#include "paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h"
-#include "paddle/fluid/operators/math/prelu.h"
-
-namespace paddle {
-namespace inference {
-namespace tensorrt {
-namespace plugin {
-
-int PReluPlugin::initialize() TRT_NOEXCEPT {
-  cudaMalloc(&p_gpu_weight_, sizeof(float) * weight_.size());
-  cudaMemcpy(p_gpu_weight_,
-             weight_.data(),
-             weight_.size() * sizeof(float),
-             cudaMemcpyHostToDevice);
-  return 0;
-}
-
-void PReluPlugin::terminate() TRT_NOEXCEPT {
-  if (p_gpu_weight_) {
-    cudaFree(p_gpu_weight_);
-    p_gpu_weight_ = nullptr;
-  }
-}
-
-nvinfer1::Dims PReluPlugin::getOutputDimensions(int index,
-                                                const nvinfer1::Dims *inputDims,
-                                                int nbInputs) TRT_NOEXCEPT {
-  assert(nbInputs == 1);
-  assert(index < this->getNbOutputs());
-  nvinfer1::Dims const &input_dims = inputDims[0];
-  nvinfer1::Dims output_dims = input_dims;
-  return output_dims;
-}
-
-int PReluPlugin::enqueue(int batch_size,
-                         const void *const *inputs,
-#if IS_TRT_VERSION_LT(8000)
-                         void **outputs,
-                         void *workspace,
-                         cudaStream_t stream) {
-#else
-                         void *const *outputs,
-                         void *workspace,
-                         cudaStream_t stream) TRT_NOEXCEPT {
-#endif
-  // input dims is CHW.
-  const auto &input_dims = this->getInputDims(0);
-  const float *input = reinterpret_cast<const float *>(inputs[0]);
-  // const float *alpha = reinterpret_cast<const float *>(alpha_.get().values);
-  const float *alpha = p_gpu_weight_;
-  float *const output = reinterpret_cast<float *const *>(outputs)[0];
-  int numel = 1;
-  for (int i = 0; i < input_dims.nbDims; i++) {
-    numel *= input_dims.d[i];
-  }
-
-  if (mode_ == "channel") {
-    bool channel_last = data_format_ == "NHWC";
-    operators::math::PreluChannelWiseDirectCUDAFunctor<float>
-        prelu_channel_wise;
-    prelu_channel_wise(stream,
-                       input,
-                       alpha,
-                       output,
-                       input_dims.d[0],
-                       input_dims.d[1],
-                       channel_last,
-                       numel);
-  } else if (mode_ == "element") {
-    operators::math::PreluElementWiseDirectCUDAFunctor<float>
-        prelu_element_wise;
-    prelu_element_wise(stream, input, alpha, output, input_dims.d[0], numel);
-  } else {
-    operators::math::PreluScalarDirectCUDAFunctor<float> prelu_scalar;
-    prelu_scalar(stream, input, alpha, output, numel);
-  }
-  return cudaGetLastError() != cudaSuccess;
-}
-
-#if IS_TRT_VERSION_GE(6000)
-
-void PReluPluginDynamic::terminate() TRT_NOEXCEPT {
-  if (p_gpu_weight_) {
-    cudaFree(p_gpu_weight_);
-  }
-}
-
-int PReluPluginDynamic::initialize() TRT_NOEXCEPT {
-  cudaMalloc(&p_gpu_weight_, sizeof(float) * weight_.size());
-  cudaMemcpy(p_gpu_weight_,
-             weight_.data(),
-             weight_.size() * sizeof(float),
-             cudaMemcpyHostToDevice);
-  return 0;
-}
-
-PReluPluginDynamic::PReluPluginDynamic(void const *serialData,
-                                       size_t serialLength) {
-  DeserializeValue(&serialData, &serialLength, &weight_);
-  const char *prelu_mode;
-  DeserializeValue(&serialData, &serialLength, &prelu_mode);
-  mode_ = std::string(prelu_mode);
-}
-
-size_t PReluPluginDynamic::getSerializationSize() const TRT_NOEXCEPT {
-  return SerializedSize(mode_.c_str()) + SerializedSize(weight_);
-}
-
-void PReluPluginDynamic::serialize(void *buffer) const TRT_NOEXCEPT {
-  SerializeValue(&buffer, weight_);
-  SerializeValue(&buffer, mode_.c_str());
-}
-
-nvinfer1::DimsExprs PReluPluginDynamic::getOutputDimensions(
-    int output_index,
-    const nvinfer1::DimsExprs *inputs,
-    int nb_inputs,
-    nvinfer1::IExprBuilder &expr_builder) TRT_NOEXCEPT {
-  return inputs[0];
-}
-
-bool PReluPluginDynamic::supportsFormatCombination(
-    int pos,
-    const nvinfer1::PluginTensorDesc *in_out,
-    int nb_inputs,
-    int nb_outputs) TRT_NOEXCEPT {
-  PADDLE_ENFORCE_NOT_NULL(
-      in_out,
-      platform::errors::InvalidArgument(
-          "The input of swish plugin shoule not be nullptr."));
-
-  PADDLE_ENFORCE_LT(
-      pos,
-      nb_inputs + nb_outputs,
-      platform::errors::InvalidArgument("The pos(%d) should be less than the "
-                                        "num(%d) of the input and the output.",
-                                        pos,
-                                        nb_inputs + nb_outputs));
-  (in_out && pos < (nb_inputs + nb_outputs));
-
-  return ((in_out[pos].type == nvinfer1::DataType::kFLOAT) &&
-          in_out[pos].format == nvinfer1::PluginFormat::kLINEAR);
-}
-
-nvinfer1::DataType PReluPluginDynamic::getOutputDataType(
-    int index,
-    const nvinfer1::DataType *input_types,
-    int nb_inputs) const TRT_NOEXCEPT {
-  PADDLE_ENFORCE_EQ(index,
-                    0,
-                    platform::errors::InvalidArgument(
-                        "The PRelu Plugin only has one input, so the "
-                        "index value should be 0, but get %d.",
-                        index));
-  PADDLE_ENFORCE_EQ((input_types[0] == nvinfer1::DataType::kFLOAT),
-                    true,
-                    platform::errors::InvalidArgument(
-                        "The input type should be half or float"));
-  return input_types[0];
-}
-
-int PReluPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc,
-                                const nvinfer1::PluginTensorDesc *output_desc,
-                                const void *const *inputs,
-                                void *const *outputs,
-                                void *workspace,
-                                cudaStream_t stream) TRT_NOEXCEPT {
-  auto input_dims = input_desc[0].dims;
-  const float *alpha = p_gpu_weight_;
-  const float *input = static_cast<const float *>(inputs[0]);
-  float *output = static_cast<float *>(outputs[0]);
-  int numel = 1;
-  for (int i = 0; i < input_dims.nbDims; i++) {
-    numel *= input_dims.d[i];
-  }
-
-  if (mode_ == "channel") {
-    bool channel_last = data_format_ == "NHWC";
-    operators::math::PreluChannelWiseDirectCUDAFunctor<float>
-        prelu_channel_wise;
-    prelu_channel_wise(stream,
-                       input,
-                       alpha,
-                       output,
-                       input_dims.d[0],
-                       input_dims.d[1],
-                       channel_last,
-                       numel);
-  } else if (mode_ == "element") {
-    operators::math::PreluElementWiseDirectCUDAFunctor<float>
-        prelu_element_wise;
-    prelu_element_wise(stream, input, alpha, output, input_dims.d[0], numel);
-  } else {
-    operators::math::PreluScalarDirectCUDAFunctor<float> prelu_scalar;
-    prelu_scalar(stream, input, alpha, output, numel);
-  }
-  return cudaGetLastError() != cudaSuccess;
-}
-#endif
-
-}  // namespace plugin
-}  // namespace tensorrt
-}  // namespace inference
-}  // namespace paddle
diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h
deleted file mode 100644
index 921f0091f333ba09167c1628991a5b6234915c27..0000000000000000000000000000000000000000
--- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h
+++ /dev/null
@@ -1,217 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#pragma once
-
-#include <algorithm>
-#include <string>
-#include <vector>
-
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/framework/tensor_util.h"
-#include "paddle/fluid/inference/tensorrt/engine.h"
-#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h"
-
-namespace paddle {
-namespace inference {
-namespace tensorrt {
-namespace plugin {
-
-class PReluPlugin : public PluginTensorRT {
-  std::vector<float> weight_;
-  float* p_gpu_weight_;
-  std::string mode_;
-  std::string data_format_;
-
- public:
-  size_t getSerializationSize() const TRT_NOEXCEPT override {
-    return getBaseSerializationSize() + SerializedSize(mode_.c_str()) +
-           SerializedSize(data_format_.c_str()) + SerializedSize(weight_);
-  }
-
-  // TRT will call this func when we need to serialize the configuration of
-  // tensorrt.
-  // It should not be called by users.
-  void serialize(void* buffer) const TRT_NOEXCEPT override {
-    serializeBase(buffer);
-    SerializeValue(&buffer, weight_);
-    SerializeValue(&buffer, mode_.c_str());
-    SerializeValue(&buffer, data_format_.c_str());
-  }
-
-  PReluPlugin(const float* weight,
-              const int weight_num,
-              std::string const& mode,
-              std::string const& data_format)
-      : mode_(mode), data_format_(data_format) {
-    weight_.resize(weight_num);
-    std::copy(weight, weight + weight_num, weight_.data());
-  }
-
-  // It was used for tensorrt deserialization.
-  // It should not be called by users.
-  PReluPlugin(void const* serialData, size_t serialLength) {
-    deserializeBase(serialData, serialLength);
-    DeserializeValue(&serialData, &serialLength, &weight_);
-    const char* prelu_mode;
-    DeserializeValue(&serialData, &serialLength, &prelu_mode);
-    mode_ = std::string(prelu_mode);
-    const char* prelu_data_format;
-    DeserializeValue(&serialData, &serialLength, &prelu_data_format);
-    data_format_ = std::string(prelu_data_format);
-  }
-  ~PReluPlugin() {}
-  int initialize() TRT_NOEXCEPT override;
-  void terminate() TRT_NOEXCEPT override;
-
-  PReluPlugin* clone() const TRT_NOEXCEPT override {
-    auto* ptr =
-        new PReluPlugin(weight_.data(), weight_.size(), mode_, data_format_);
-    ptr->p_gpu_weight_ = p_gpu_weight_;
-    return ptr;
-  }
-
-  const char* getPluginType() const TRT_NOEXCEPT override {
-    return "prelu_plugin";
-  }
-  int getNbOutputs() const TRT_NOEXCEPT override { return 1; }
-  nvinfer1::Dims getOutputDimensions(int index,
-                                     const nvinfer1::Dims* inputs,
-                                     int nbInputDims) TRT_NOEXCEPT override;
-#if IS_TRT_VERSION_LT(8000)
-  int enqueue(int batchSize,
-              const void* const* inputs,
-              void** outputs,
-#else
-  int enqueue(int batchSize,
-              const void* const* inputs,
-              void* const* outputs,
-#endif
-              void* workspace,
-              cudaStream_t stream) TRT_NOEXCEPT override;
-};
-
-class PReluPluginCreator : public TensorRTPluginCreator {
- public:
-  const char* getPluginName() const TRT_NOEXCEPT override {
-    return "prelu_plugin";
-  }
-
-  const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; }
-
-  nvinfer1::IPluginV2* deserializePlugin(const char* name,
-                                         const void* serial_data,
-                                         size_t serial_length)
-      TRT_NOEXCEPT override {
-    return new PReluPlugin(serial_data, serial_length);
-  }
-};
-REGISTER_TRT_PLUGIN_V2(PReluPluginCreator);
-
-#if IS_TRT_VERSION_GE(6000)
-class PReluPluginDynamic : public DynamicPluginTensorRT {
- public:
-  PReluPluginDynamic(const float* weight,
-                     const int weight_num,
-                     std::string const& mode,
-                     std::string const& data_format)
-      : mode_(mode), data_format_(data_format) {
-    weight_.resize(weight_num);
-    std::copy(weight, weight + weight_num, weight_.data());
-  }
-
-  PReluPluginDynamic(void const* serialData, size_t serialLength);
-  ~PReluPluginDynamic() {}
-  nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override {
-    auto ptr = new PReluPluginDynamic(
-        weight_.data(), weight_.size(), mode_, data_format_);
-    ptr->p_gpu_weight_ = p_gpu_weight_;
-    return ptr;
-  }
-
-  const char* getPluginType() const TRT_NOEXCEPT override {
-    return "prelu_plugin_dynamic";
-  }
-  int getNbOutputs() const TRT_NOEXCEPT override { return 1; }
-  int initialize() TRT_NOEXCEPT override;
-  void terminate() TRT_NOEXCEPT override;
-
-  size_t getSerializationSize() const TRT_NOEXCEPT override;
-  void serialize(void* buffer) const TRT_NOEXCEPT override;
-
-  nvinfer1::DimsExprs getOutputDimensions(int output_index,
-                                          const nvinfer1::DimsExprs* inputs,
-                                          int nb_inputs,
-                                          nvinfer1::IExprBuilder& expr_builder)
-      TRT_NOEXCEPT override;
-
-  bool supportsFormatCombination(int pos,
-                                 const nvinfer1::PluginTensorDesc* inOut,
-                                 int nbInputs,
-                                 int nbOutputs) TRT_NOEXCEPT override;
-
-  void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in,
-                       int nbInputs,
-                       const nvinfer1::DynamicPluginTensorDesc* out,
-                       int nbOutputs) TRT_NOEXCEPT override {}
-
-  size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs,
-                          int nbInputs,
-                          const nvinfer1::PluginTensorDesc* outputs,
-                          int nbOutputs) const TRT_NOEXCEPT override {
-    return 0;
-  }
-
-  int enqueue(const nvinfer1::PluginTensorDesc* inputDesc,
-              const nvinfer1::PluginTensorDesc* outputDesc,
-              const void* const* inputs,
-              void* const* outputs,
-              void* workspace,
-              cudaStream_t stream) TRT_NOEXCEPT override;
-  nvinfer1::DataType getOutputDataType(int index,
-                                       const nvinfer1::DataType* inputTypes,
-                                       int nbInputs) const
-      TRT_NOEXCEPT override;
-
-  void destroy() TRT_NOEXCEPT override { delete this; }
-
- private:
-  std::vector<float> weight_;
-  float* p_gpu_weight_;
-  std::string mode_;
-  std::string data_format_;
-};
-#endif
-
-class PReluPluginDynamicCreator : public TensorRTPluginCreator {
- public:
-  const char* getPluginName() const TRT_NOEXCEPT override {
-    return "prelu_plugin_dynamic";
-  }
-
-  const char* getPluginVersion() const TRT_NOEXCEPT override { return "1"; }
-
-  nvinfer1::IPluginV2* deserializePlugin(const char* name,
-                                         const void* serial_data,
-                                         size_t serial_length)
-      TRT_NOEXCEPT override {
-    return new PReluPluginDynamic(serial_data, serial_length);
-  }
-};
-REGISTER_TRT_PLUGIN_V2(PReluPluginDynamicCreator);
-
-}  // namespace plugin
-}  // namespace tensorrt
-}  // namespace inference
-}  // namespace paddle