diff --git a/tools/infer_prune_patches/jitcode.h.patch b/tools/infer_prune_patches/jitcode.h.patch
index 9022b459db51c9b1f1036fc8cda2bb58ebb08352..a123939a4955b7a23ce412cd90571de3c5a195c5 100644
--- a/tools/infer_prune_patches/jitcode.h.patch
+++ b/tools/infer_prune_patches/jitcode.h.patch
@@ -1,15 +1,15 @@
 diff --git a/paddle/fluid/operators/jit/gen/jitcode.h b/paddle/fluid/operators/jit/gen/jitcode.h
-index 23650c8efc..24466e4327 100644
+index d71497275d..cb56e1d949 100644
 --- a/paddle/fluid/operators/jit/gen/jitcode.h
 +++ b/paddle/fluid/operators/jit/gen/jitcode.h
-@@ -97,8 +97,8 @@ class JitCode : public GenBase, public Xbyak::CodeGenerator {
+@@ -102,8 +102,8 @@ class JitCode : public GenBase, public Xbyak::CodeGenerator {
      }
      ret();
    }
 -  void L(const char* label) { Xbyak::CodeGenerator::L(label); }
 -  void L(Xbyak::Label& label) { Xbyak::CodeGenerator::L(label); }  // NOLINT
-+  void L(const char* label) { }
-+  void L(Xbyak::Label& label) { }  // NOLINT
++  void L(const char* label) {  }
++  void L(Xbyak::Label& label) {  }  // NOLINT
    // Enhanced vector extension
-   Xbyak::Address EVEX_compress_addr(Xbyak::Reg64 base, int offt,
-                                     bool bcast = false) {
+   Xbyak::Address EVEX_compress_addr(Xbyak::Reg64 base,
+                                     int offt,
diff --git a/tools/infer_prune_patches/op_registry.h.patch b/tools/infer_prune_patches/op_registry.h.patch
index a1d2a66347cc4c6c0199dbf4bedb2ca0cd487fe1..fb41c98de50f456d638f4af067f5da9971b2ed3b 100644
--- a/tools/infer_prune_patches/op_registry.h.patch
+++ b/tools/infer_prune_patches/op_registry.h.patch
@@ -1,10 +1,10 @@
 diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h
-index a1f07f9f25..179df3b981 100644
+index d38efbff31..f5bef776d6 100644
 --- a/paddle/fluid/framework/op_registry.h
 +++ b/paddle/fluid/framework/op_registry.h
-@@ -178,9 +178,8 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
-     RegisterKernelClass<PlaceType, T>(
-         op_type, library_type, customized_type_value,
+@@ -186,9 +186,8 @@ struct OpKernelRegistrarFunctor<PlaceType, false, I, KernelTypes...> {
+         library_type,
+         customized_type_value,
  
 -        [op_type](const framework::ExecutionContext& ctx) {
 +        [](const framework::ExecutionContext& ctx) {
@@ -13,12 +13,14 @@ index a1f07f9f25..179df3b981 100644
          });
      constexpr auto size = std::tuple_size<std::tuple<KernelTypes...>>::value;
      OpKernelRegistrarFunctor<PlaceType, I + 1 == size, I + 1, KernelTypes...>
-@@ -240,13 +239,8 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
- 
-   void operator()(const char* op_type, const char* library_type,
+@@ -257,15 +256,8 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
+   void operator()(const char* op_type,
+                   const char* library_type,
                    int customized_type_value) const {
 -    RegisterKernelClass<PlaceType, T>(
--        op_type, library_type, customized_type_value,
+-        op_type,
+-        library_type,
+-        customized_type_value,
 -
 -        [op_type](const framework::ExecutionContext& ctx) {
 -          Functor()(ctx);
@@ -29,7 +31,7 @@ index a1f07f9f25..179df3b981 100644
  
      constexpr auto size =
          std::tuple_size<std::tuple<DataTypeAndKernelType...>>::value;
-@@ -275,7 +269,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
+@@ -296,7 +288,7 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
      VarTypeInference
      InferShapeBase
  */
@@ -38,7 +40,7 @@ index a1f07f9f25..179df3b981 100644
    STATIC_ASSERT_GLOBAL_NAMESPACE(                                        \
        __reg_op__##op_type,                                               \
        "REGISTER_OPERATOR must be called in global namespace");           \
-@@ -286,15 +280,22 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
+@@ -307,15 +299,22 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
      return 0;                                                            \
    }
  
@@ -62,7 +64,7 @@ index a1f07f9f25..179df3b981 100644
                                              place_class, customized_name,      \
                                              customized_type_value, ...)        \
    STATIC_ASSERT_GLOBAL_NAMESPACE(                                              \
-@@ -311,18 +312,22 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
+@@ -332,18 +331,22 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
      return 0;                                                                  \
    }
  
@@ -91,7 +93,7 @@ index a1f07f9f25..179df3b981 100644
  
  #define REGISTER_OP_CPU_KERNEL(op_type, ...) \
    REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
-@@ -340,6 +345,11 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
+@@ -361,6 +364,11 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
    REGISTER_OP_KERNEL(op_type, MLU, ::paddle::platform::MLUPlace, __VA_ARGS__)
  
  #define REGISTER_OP_KERNEL_EX(op_type, library_type, place_class,  \
@@ -103,7 +105,7 @@ index a1f07f9f25..179df3b981 100644
                                customized_name,                     \
                                customized_type_value,               \
                                ...)                                 \
-@@ -357,8 +367,10 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
+@@ -378,8 +386,10 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
      return 0;                                                                  \
    }
  
@@ -116,7 +118,7 @@ index a1f07f9f25..179df3b981 100644
        op_type, CUDA, ::paddle::platform::CUDAPlace, DEFAULT_TYPE,     \
        ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
        __VA_ARGS__)
-@@ -375,12 +387,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
+@@ -396,12 +406,6 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
        ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
        __VA_ARGS__)
  
@@ -129,7 +131,7 @@ index a1f07f9f25..179df3b981 100644
  #define REGISTER_OP_MLU_KERNEL_FUNCTOR(op_type, ...)                  \
    REGISTER_OP_KERNEL_EX(                                              \
        op_type, MLU, ::paddle::platform::MLUPlace, DEFAULT_TYPE,       \
-@@ -392,7 +398,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
+@@ -413,7 +417,9 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
   * we will use and tell the compiler to
   * link them into target.
   */
@@ -140,7 +142,7 @@ index a1f07f9f25..179df3b981 100644
    STATIC_ASSERT_GLOBAL_NAMESPACE(                          \
        __use_op_itself_##op_type,                           \
        "USE_OP_ITSELF must be called in global namespace"); \
-@@ -400,6 +408,10 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
+@@ -421,6 +427,10 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
    UNUSED static int use_op_itself_##op_type##_ = TouchOpRegistrar_##op_type()
  
  #define USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(op_type,                     \
@@ -151,7 +153,7 @@ index a1f07f9f25..179df3b981 100644
                                                LIBRARY_TYPE,                \
                                                customized_name)             \
    STATIC_ASSERT_GLOBAL_NAMESPACE(                                          \
-@@ -410,33 +422,58 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
+@@ -431,33 +441,58 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
    UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_##customized_name##_ = /* NOLINT */ \
        TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE##_##customized_name()
  
diff --git a/tools/infer_prune_patches/phi_cmake.patch b/tools/infer_prune_patches/phi_cmake.patch
index 2eba0e0c14c6ae22b06c8e6d6596f1bf799172eb..1d9c3f75343ff6ca2bf689b04b23f3ec4b1444df 100644
--- a/tools/infer_prune_patches/phi_cmake.patch
+++ b/tools/infer_prune_patches/phi_cmake.patch
@@ -1,5 +1,5 @@
 diff --git a/paddle/phi/CMakeLists.txt b/paddle/phi/CMakeLists.txt
-index 58ad42ddd1..8ffdafcf0d 100644
+index 9715fd7704..44109e1081 100644
 --- a/paddle/phi/CMakeLists.txt
 +++ b/paddle/phi/CMakeLists.txt
 @@ -18,7 +18,7 @@ add_subdirectory(infermeta)
@@ -10,4 +10,4 @@ index 58ad42ddd1..8ffdafcf0d 100644
 +#add_subdirectory(tools)
  # phi tests
  add_subdirectory(tests)
- 
+ # phi capi
diff --git a/tools/infer_prune_patches/tensorrt_subgraph_pass.cc.patch b/tools/infer_prune_patches/tensorrt_subgraph_pass.cc.patch
index 307f12ee971822b1b6e6189f4b6e2bd5070f0188..f038a125f648fa873313495fc108b7875235db68 100644
--- a/tools/infer_prune_patches/tensorrt_subgraph_pass.cc.patch
+++ b/tools/infer_prune_patches/tensorrt_subgraph_pass.cc.patch
@@ -1,8 +1,8 @@
 diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
-index 394ce7799e..8edbef50be 100644
+index 7a9c5b889d..c847a5d523 100644
 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
 +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
-@@ -390,6 +390,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
+@@ -418,6 +418,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
        graph->Has(framework::ir::kEmbEltwiseLayernormPass) &&
        graph->Has(framework::ir::kMultiheadMatmulPass));
  
@@ -10,7 +10,7 @@ index 394ce7799e..8edbef50be 100644
    if (use_static_engine) {
      trt_engine_serialized_data = GetTrtEngineSerializedData(
          Get<std::string>("model_opt_cache_dir"), engine_key);
-@@ -399,6 +400,19 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
+@@ -427,6 +428,19 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
        LOG(INFO) << "Load TRT Optimized Info from "
                  << GetTrtEngineSerializedPath(
                         Get<std::string>("model_opt_cache_dir"), engine_key);
@@ -30,16 +30,18 @@ index 394ce7799e..8edbef50be 100644
        return;
      }
    }
-@@ -411,12 +425,25 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
+@@ -439,7 +453,6 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
  
    auto *scope = param_scope();
    framework::BlockDesc block_desc_temp(nullptr, block_desc.Proto());
 -  std::unordered_set<std::string> param_set(params.begin(), params.end());
    inference::Singleton<inference::tensorrt::OpConverter>::Global()
        .ConvertBlockToTRTEngine(
-           &block_desc_temp, *scope,
-           std::vector<std::string>(input_names.begin(), input_names.end()),
-           param_set, output_mapping, trt_engine);
+           &block_desc_temp,
+@@ -449,6 +462,21 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
+           output_mapping,
+           trt_engine);
+ 
 +  const auto* root_scope{scope};
 +  for (;root_scope->parent();) {
 +    root_scope = root_scope->parent();
@@ -54,10 +56,11 @@ index 394ce7799e..8edbef50be 100644
 +    memory::Release(platform::CUDAPlace(dev_id));
 +  }
 +  memory::Release(platform::CPUPlace());
- 
++
    if (use_static_engine) {
      nvinfer1::IHostMemory *serialized_engine_data = trt_engine->Serialize();
-@@ -431,6 +458,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
+     trt_engine_serialized_data =
+@@ -462,6 +490,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
                << GetTrtEngineSerializedPath(
                       Get<std::string>("model_opt_cache_dir"), engine_key);
    }
diff --git a/tools/infer_prune_patches/thread_local_allocator.cc.patch b/tools/infer_prune_patches/thread_local_allocator.cc.patch
index 6a4486aae9457cd20bbbabfbeaf0a3bed37ff422..b95480ad91dc77ff994b364e983866fe8f1001a7 100644
--- a/tools/infer_prune_patches/thread_local_allocator.cc.patch
+++ b/tools/infer_prune_patches/thread_local_allocator.cc.patch
@@ -1,8 +1,8 @@
 diff --git a/paddle/fluid/memory/allocation/thread_local_allocator.cc b/paddle/fluid/memory/allocation/thread_local_allocator.cc
-index f125670a59..f858a30301 100644
+index 875e57cfd4..b111ada3ab 100644
 --- a/paddle/fluid/memory/allocation/thread_local_allocator.cc
 +++ b/paddle/fluid/memory/allocation/thread_local_allocator.cc
-@@ -13,18 +13,62 @@
+@@ -13,19 +13,62 @@
  // limitations under the License.
  
  #include "paddle/fluid/memory/allocation/thread_local_allocator.h"
@@ -64,12 +64,13 @@ index f125670a59..f858a30301 100644
 -    buddy_allocator_.reset(new memory::detail::BuddyAllocator(
 -        std::unique_ptr<memory::detail::SystemAllocator>(
 -            new memory::detail::GPUAllocator(place_.device)),
--        platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()));
+-        platform::GpuMinChunkSize(),
+-        platform::GpuMaxChunkSize()));
 +    direct_allocator_.reset(new DirectAllocator{place_});
    } else {
      PADDLE_THROW(platform::errors::Unavailable(
          "Thread local allocator only supports CUDAPlace now."));
-@@ -59,7 +103,7 @@ ThreadLocalCUDAAllocatorPool::ThreadLocalCUDAAllocatorPool()
+@@ -61,7 +104,7 @@ ThreadLocalCUDAAllocatorPool::ThreadLocalCUDAAllocatorPool()
  
  ThreadLocalAllocation* ThreadLocalAllocatorImpl::AllocateImpl(size_t size) {
    VLOG(10) << "ThreadLocalAllocatorImpl::AllocateImpl " << size;
@@ -78,7 +79,7 @@ index f125670a59..f858a30301 100644
    auto* tl_allocation = new ThreadLocalAllocation(ptr, size, place_);
    tl_allocation->SetThreadLocalAllocatorImpl(shared_from_this());
    return tl_allocation;
-@@ -67,12 +111,12 @@ ThreadLocalAllocation* ThreadLocalAllocatorImpl::AllocateImpl(size_t size) {
+@@ -69,12 +112,12 @@ ThreadLocalAllocation* ThreadLocalAllocatorImpl::AllocateImpl(size_t size) {
  
  void ThreadLocalAllocatorImpl::FreeImpl(ThreadLocalAllocation* allocation) {
    VLOG(10) << "ThreadLocalAllocatorImpl::FreeImpl " << allocation;