diff --git a/paddle/fluid/framework/data_type.cc b/paddle/fluid/framework/data_type.cc
index 75ab747794f014ec1b9c8f84978426a6329b4cf4..fda588db4d82a745b581750a4f45f2344ab439ce 100644
--- a/paddle/fluid/framework/data_type.cc
+++ b/paddle/fluid/framework/data_type.cc
@@ -109,8 +109,8 @@ size_t SizeOfType(proto::VarType::Type type) {
 }
 
 // Now only supports promotion of complex type
-bool NeedPromoteTypes(const proto::VarType::Type a,
-                      const proto::VarType::Type b) {
+inline bool NeedPromoteTypes(const proto::VarType::Type& a,
+                             const proto::VarType::Type& b) {
   return (IsComplexType(a) || IsComplexType(b));
 }
 
diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h
index 124f2a86e94233b9c2374f88014f67c331151112..81a7f6a41bf3ab00da08b7f90ce6c866494b43d1 100644
--- a/paddle/fluid/framework/data_type.h
+++ b/paddle/fluid/framework/data_type.h
@@ -200,7 +200,7 @@ inline std::ostream& operator<<(std::ostream& out,
   return out;
 }
 
-extern inline bool IsComplexType(const proto::VarType::Type type) {
+extern inline bool IsComplexType(const proto::VarType::Type& type) {
   return (type == proto::VarType::COMPLEX64 ||
           type == proto::VarType::COMPLEX128);
 }
diff --git a/paddle/fluid/framework/op_registry.cc b/paddle/fluid/framework/op_registry.cc
index d69edef7840f5d875c3f799e99af0812bb154d66..d14254b7355c9ca0b550090ce44e64f5b7b3602a 100644
--- a/paddle/fluid/framework/op_registry.cc
+++ b/paddle/fluid/framework/op_registry.cc
@@ -21,13 +21,17 @@ namespace framework {
 
 std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
     const std::string& type, const VariableNameMap& inputs,
-    const VariableNameMap& outputs, AttributeMap attrs, bool attr_check) {
+    const VariableNameMap& outputs, const AttributeMap& attrs,
+    bool attr_check) {
   auto& info = OpInfoMap::Instance().Get(type);
   if (attr_check && info.Checker() != nullptr) {
-    info.Checker()->Check(&attrs);
+    auto tmp_attrs = attrs;
+    info.Checker()->Check(&tmp_attrs);
+    return std::unique_ptr<OperatorBase>(
+        info.Creator()(type, inputs, outputs, tmp_attrs));
   }
-  auto op = info.Creator()(type, inputs, outputs, attrs);
-  return std::unique_ptr<OperatorBase>(op);
+  return std::unique_ptr<OperatorBase>(
+      info.Creator()(type, inputs, outputs, attrs));
 }
 
 static VariableNameMap ConvertOpDescVarsToVarNameMap(
diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h
index eb40a49b4066a7a8c8e9c142a310b815fd73da20..a1f07f9f2520e9e6b87ce63eab3b63236cb499d2 100644
--- a/paddle/fluid/framework/op_registry.h
+++ b/paddle/fluid/framework/op_registry.h
@@ -129,7 +129,7 @@ class OpRegistry {
   static std::unique_ptr<OperatorBase> CreateOp(const std::string& type,
                                                 const VariableNameMap& inputs,
                                                 const VariableNameMap& outputs,
-                                                AttributeMap attrs,
+                                                const AttributeMap& attrs,
                                                 bool attr_check = true);
 
   static std::unique_ptr<OperatorBase> CreateOp(const proto::OpDesc& op_desc);
diff --git a/paddle/fluid/framework/phi_utils.cc b/paddle/fluid/framework/phi_utils.cc
index fe7c56827612cafd843eac8eccfd1e902d39950d..a20015a1e5927a8bf1352c1620227e72603c1fc0 100644
--- a/paddle/fluid/framework/phi_utils.cc
+++ b/paddle/fluid/framework/phi_utils.cc
@@ -81,19 +81,21 @@ OpKernelType TransPhiKernelKeyToOpKernelType(const phi::KernelKey& kernel_key) {
 phi::KernelKey TransOpKernelTypeToPhiKernelKey(
     const OpKernelType& kernel_type) {
   phi::Backend backend = phi::TransToPhiBackend(kernel_type.place_);
-  if (kernel_type.library_type_ == LibraryType::kMKLDNN) {
-    backend = phi::Backend::MKLDNN;
-  } else if (kernel_type.library_type_ == LibraryType::kCUDNN) {
-    backend = phi::Backend::GPUDNN;
-  } else if (kernel_type.library_type_ == LibraryType::kKP) {
-    backend = phi::Backend::KPS;
-  } else {
-    // do nothing
+  switch (kernel_type.library_type_) {
+    case LibraryType::kCUDNN:
+      backend = phi::Backend::GPUDNN;
+      break;
+    case LibraryType::kMKLDNN:
+      backend = phi::Backend::MKLDNN;
+      break;
+    case LibraryType::kKP:
+      backend = phi::Backend::KPS;
+      break;
+    default:
+      break;
   }
-  paddle::experimental::DataLayout layout = kernel_type.data_layout_;
-  paddle::experimental::DataType dtype =
-      paddle::framework::TransToPhiDataType(kernel_type.data_type_);
-  return phi::KernelKey(backend, layout, dtype);
+  return phi::KernelKey(backend, kernel_type.data_layout_,
+                        framework::TransToPhiDataType(kernel_type.data_type_));
 }
 
 phi::KernelKey FallBackToCpu(const OpKernelType& expected_kernel_key,
diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc
index 038ea575247d54d6c4abf159e7bec96adefa8bcf..e928cbb6548397d9a784ec95785e07ac6f239d7d 100644
--- a/paddle/fluid/imperative/layer.cc
+++ b/paddle/fluid/imperative/layer.cc
@@ -459,7 +459,7 @@ static void OpBaseRunImpl(const framework::OperatorBase& op,
                           const framework::AttributeMap& attrs,
                           const framework::AttributeMap& default_attrs,
                           const platform::Place& place) {
-  auto* op_kernel = dynamic_cast<const framework::OperatorWithKernel*>(&op);
+  auto* op_kernel = static_cast<const framework::OperatorWithKernel*>(&op);
   PADDLE_ENFORCE_NOT_NULL(
       op_kernel, platform::errors::PermissionDenied(
                      "Only support operator with kernel in Dygraph mode."));
diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc
index 1fef559f21e12b82136b32469dc77ae2abf95238..bf69f6cf5ac9d722710c45f7d21c9651d5a5d4b2 100644
--- a/paddle/fluid/imperative/prepared_operator.cc
+++ b/paddle/fluid/imperative/prepared_operator.cc
@@ -40,6 +40,13 @@ static const phi::Kernel empty_kernel;
 static const framework::RuntimeContext empty_ctx({}, {});
 static const framework::Scope empty_scope;
 
+const phi::KernelFactory& PreparedOp::phi_kernel_factory =
+    phi::KernelFactory::Instance();
+const phi::OpUtilsMap& PreparedOp::phi_op_utils_map =
+    phi::OpUtilsMap::Instance();
+const phi::DefaultKernelSignatureMap& PreparedOp::default_phi_kernel_sig_map =
+    phi::DefaultKernelSignatureMap::Instance();
+
 const std::shared_ptr<VariableWrapper>& GetVariableWrapper(
     const std::shared_ptr<paddle::imperative::VarBase>& var) {
   return var->SharedVar();
@@ -139,12 +146,14 @@ PreparedOp::PreparedOp(const framework::OperatorBase& op,
       phi_kernel_(phi_kernel) {}
 
 template <typename VarType>
-PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
-                       const NameVarMap<VarType>& outs,
-                       const framework::OperatorWithKernel& op,
-                       const platform::Place& place,
-                       const framework::AttributeMap& attrs,
-                       const framework::AttributeMap& default_attrs) {
+PreparedOp PrepareImpl(
+    const NameVarMap<VarType>& ins, const NameVarMap<VarType>& outs,
+    const framework::OperatorWithKernel& op, const platform::Place& place,
+    const framework::AttributeMap& attrs,
+    const framework::AttributeMap& default_attrs,
+    const phi::KernelFactory& phi_kernel_factory,
+    const phi::OpUtilsMap& phi_op_utils_map,
+    const phi::DefaultKernelSignatureMap& default_phi_kernel_sig_map) {
   platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
   auto* dev_ctx = pool.Get(place);
 
@@ -184,15 +193,15 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
 
   bool has_phi_kernel = false;
 
-  const auto* arg_map_fn =
-      phi::OpUtilsMap::Instance().GetArgumentMappingFn(op.Type());
+  const auto* arg_map_fn = phi_op_utils_map.GetArgumentMappingFn(op.Type());
+
   if (arg_map_fn) {
     has_phi_kernel = true;
     kernel_signature = (*arg_map_fn)(
         framework::ExecutionArgumentMappingContext(dygraph_exe_ctx));
   } else {
     default_kernel_signature =
-        phi::DefaultKernelSignatureMap::Instance().GetNullable(op.Type());
+        default_phi_kernel_sig_map.GetNullable(op.Type());
     if (default_kernel_signature) {
       has_phi_kernel = true;
       kernel_signature = *default_kernel_signature;
@@ -228,8 +237,7 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
                 << ", using_kernel_key:" << expected_kernel_key;
         phi::KernelKey try_pt_kernel_key =
             TransOpKernelTypeToPhiKernelKey(expected_kernel_key);
-        if (!phi::KernelFactory::Instance().HasKernel(pt_kernel_name,
-                                                      try_pt_kernel_key)) {
+        if (!phi_kernel_factory.HasKernel(pt_kernel_name, try_pt_kernel_key)) {
           expected_kernel_key.library_type_ = expected_kernel_key_library_type;
           VLOG(3) << "modify XPU KP kernel: " << op.Type() << " is failed "
                   << expected_kernel_key;
@@ -239,8 +247,8 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
 #endif
 
     pt_kernel_key = TransOpKernelTypeToPhiKernelKey(expected_kernel_key);
-    auto& phi_kernel = phi::KernelFactory::Instance().SelectKernel(
-        pt_kernel_name, pt_kernel_key);
+    auto& phi_kernel =
+        phi_kernel_factory.SelectKernel(pt_kernel_name, pt_kernel_key);
 
     if (phi_kernel.IsValid()
 #if defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP)
@@ -295,11 +303,11 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
       || (is_xpu_unsupport && !is_xpu_kp_support)
 #endif
           ) {
-    if (phi::KernelFactory::Instance().HasCompatiblePhiKernel(op.Type())) {
+    if (has_phi_kernel) {
       auto pt_cpu_kernel_key =
           FallBackToCpu(expected_kernel_key, pt_kernel_key, op);
-      auto& pt_cpu_kernel = phi::KernelFactory::Instance().SelectKernel(
-          pt_kernel_name, pt_cpu_kernel_key);
+      auto& pt_cpu_kernel =
+          phi_kernel_factory.SelectKernel(pt_kernel_name, pt_cpu_kernel_key);
       if (pt_cpu_kernel.IsValid()) {
         VLOG(6) << "Dynamic mode PrepareImpl - kernel name: " << pt_kernel_name
                 << " | kernel key: " << pt_cpu_kernel_key
@@ -408,7 +416,9 @@ PreparedOp PreparedOp::Prepare(const NameVarMap<VarBase>& ins,
                                const platform::Place& place,
                                const framework::AttributeMap& attrs,
                                const framework::AttributeMap& default_attrs) {
-  return PrepareImpl<VarBase>(ins, outs, op, place, attrs, default_attrs);
+  return PrepareImpl<VarBase>(ins, outs, op, place, attrs, default_attrs,
+                              phi_kernel_factory, phi_op_utils_map,
+                              default_phi_kernel_sig_map);
 }
 
 PreparedOp PreparedOp::Prepare(const NameVarMap<VariableWrapper>& ins,
@@ -417,8 +427,9 @@ PreparedOp PreparedOp::Prepare(const NameVarMap<VariableWrapper>& ins,
                                const platform::Place& place,
                                const framework::AttributeMap& attrs,
                                const framework::AttributeMap& default_attrs) {
-  return PrepareImpl<VariableWrapper>(ins, outs, op, place, attrs,
-                                      default_attrs);
+  return PrepareImpl<VariableWrapper>(
+      ins, outs, op, place, attrs, default_attrs, phi_kernel_factory,
+      phi_op_utils_map, default_phi_kernel_sig_map);
 }
 
 PreparedOp PreparedOp::Prepare(const NameVarMap<egr::EagerVariable>& ins,
@@ -427,8 +438,9 @@ PreparedOp PreparedOp::Prepare(const NameVarMap<egr::EagerVariable>& ins,
                                const platform::Place& place,
                                const framework::AttributeMap& attrs,
                                const framework::AttributeMap& default_attrs) {
-  return PrepareImpl<egr::EagerVariable>(ins, outs, op, place, attrs,
-                                         default_attrs);
+  return PrepareImpl<egr::EagerVariable>(
+      ins, outs, op, place, attrs, default_attrs, phi_kernel_factory,
+      phi_op_utils_map, default_phi_kernel_sig_map);
 }
 template <typename VarType>
 static void PreparedOpRunImpl(
@@ -441,7 +453,6 @@ static void PreparedOpRunImpl(
     const NameVarMap<VarType>& outs, const framework::AttributeMap& attrs,
     const framework::AttributeMap& default_attrs) {
   // TODO(zjl): remove scope in dygraph
-  framework::Scope scope;
 
   {
     platform::RecordEvent record_event("infer_shape",
@@ -458,8 +469,8 @@ static void PreparedOpRunImpl(
                                        platform::TracerEventType::OperatorInner,
                                        1, platform::EventRole::kInnerOp);
 
-    func(DygraphExecutionContext<VarType>(op, scope, *dev_ctx, ctx, ins, outs,
-                                          attrs, default_attrs));
+    func(DygraphExecutionContext<VarType>(op, empty_scope, *dev_ctx, ctx, ins,
+                                          outs, attrs, default_attrs));
   }
 
   if (FLAGS_check_nan_inf) {
@@ -503,7 +514,7 @@ static void PreparedOpRunPtImpl(
     const NameVarMap<VarType>& outs, const framework::AttributeMap& attrs,
     const framework::AttributeMap& default_attrs) {
   {
-    platform::RecordEvent record_event(op.Type() + "::infer_shape",
+    platform::RecordEvent record_event("infer_shape",
                                        platform::TracerEventType::OperatorInner,
                                        1, platform::EventRole::kInnerOp);
     DygraphInferShapeContext<VarType> infer_shape_ctx(
@@ -513,7 +524,7 @@ static void PreparedOpRunPtImpl(
   }
 
   {
-    platform::RecordEvent record_event(op.Type() + "::compute",
+    platform::RecordEvent record_event("compute",
                                        platform::TracerEventType::OperatorInner,
                                        1, platform::EventRole::kInnerOp);
 
diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h
index 5c7f337dc6cf4290d9a5ad43f8ee98f28d29a97e..c6a535c5990d621c146e67c892ba163d05fb4008 100644
--- a/paddle/fluid/imperative/prepared_operator.h
+++ b/paddle/fluid/imperative/prepared_operator.h
@@ -214,6 +214,10 @@ class PreparedOp {
   const phi::KernelSignature* default_kernel_signature_;
   phi::KernelSignature kernel_signature_;
   const phi::Kernel& phi_kernel_;
+
+  static const phi::KernelFactory& phi_kernel_factory;
+  static const phi::OpUtilsMap& phi_op_utils_map;
+  static const phi::DefaultKernelSignatureMap& default_phi_kernel_sig_map;
 };
 
 const inline framework::Attribute& GetAttr(
diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc
index 47274f8a31efb811642ce2d5fe9b4e1d7a452e5a..6c31b025507f8a19b59c346555d3d4c99a4a976d 100644
--- a/paddle/fluid/imperative/tracer.cc
+++ b/paddle/fluid/imperative/tracer.cc
@@ -192,7 +192,7 @@ void Tracer::TraceOpImpl(const std::string& type,
                          paddle::framework::AttributeMap* passed_default_attrs_,
                          bool use_default_attr_map) {
   platform::RecordEvent op_type_record_event(
-      type + " trace_op", platform::TracerEventType::Operator, 1);
+      "trace_op", platform::TracerEventType::Operator, 1);
   platform::ScopedFlushDenormal flush;
   VLOG(1) << "Trace Op: " << type;
   if (FLAGS_use_mkldnn) {
diff --git a/paddle/phi/core/compat/convert_utils.cc b/paddle/phi/core/compat/convert_utils.cc
index 4fa11ac7860ef534e2e48973723407c4e318dfd2..9dd3bbd59a19b42c597fe245b86e610d5df46649 100644
--- a/paddle/phi/core/compat/convert_utils.cc
+++ b/paddle/phi/core/compat/convert_utils.cc
@@ -28,25 +28,26 @@ namespace phi {
 
 Backend TransToPhiBackend(const phi::Place& place) {
   auto allocation_type = place.GetType();
-  if (allocation_type == phi::AllocationType::CPU) {
-    return Backend::CPU;
-  } else if (allocation_type == phi::AllocationType::GPU) {
-    return Backend::GPU;
-  } else if (allocation_type == phi::AllocationType::GPUPINNED) {
-    return Backend::GPU;
-  } else if (allocation_type == phi::AllocationType::XPU) {
-    return Backend::XPU;
-  } else if (allocation_type == phi::AllocationType::NPU) {
-    return Backend::NPU;
-  } else if (allocation_type == phi::AllocationType::IPU) {
-    return Backend::IPU;
-  } else if (allocation_type == phi::AllocationType::CUSTOM) {
-    return static_cast<Backend>(
-        static_cast<size_t>(Backend::NUM_BACKENDS) +
-        GetOrRegisterGlobalDeviceTypeId(place.GetDeviceType()));
-  } else {
-    PADDLE_THROW(phi::errors::InvalidArgument(
-        "Unsupported transform %s to phi Backend.", place));
+  switch (allocation_type) {
+    case phi::AllocationType::GPU:
+      return Backend::GPU;
+    case AllocationType::CPU:
+      return Backend::CPU;
+    case AllocationType::GPUPINNED:
+      return Backend::GPU;
+    case AllocationType::XPU:
+      return Backend::XPU;
+    case AllocationType::NPU:
+      return Backend::NPU;
+    case AllocationType::IPU:
+      return Backend::IPU;
+    case AllocationType::CUSTOM:
+      return static_cast<Backend>(
+          static_cast<size_t>(Backend::NUM_BACKENDS) +
+          GetOrRegisterGlobalDeviceTypeId(place.GetDeviceType()));
+    default:
+      PADDLE_THROW(phi::errors::InvalidArgument(
+          "Unsupported transform %s to phi Backend.", place));
   }
 }
 
diff --git a/paddle/phi/core/dense_tensor.cc b/paddle/phi/core/dense_tensor.cc
index 1bfe29bc9d3ba211681e18b749eb3b801abc019f..53746d3c04717526ab4587bce9385e8787a0ea4e 100644
--- a/paddle/phi/core/dense_tensor.cc
+++ b/paddle/phi/core/dense_tensor.cc
@@ -129,7 +129,6 @@ void* DenseTensor::AllocateFrom(Allocator* allocator,
 
 template <typename T>
 const T* DenseTensor::data() const {
-  check_memory_size();
   PADDLE_ENFORCE_EQ(
       dtype(),
       paddle::experimental::CppTypeToDataType<T>::Type(),
@@ -141,13 +140,13 @@ const T* DenseTensor::data() const {
 
 template <typename T>
 T* DenseTensor::data() {
-  check_memory_size();
+  T* ret = static_cast<T*>(data());
   PADDLE_ENFORCE(
       (dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
       phi::errors::InvalidArgument(
           "The type of data we are trying to retrieve does not match the "
           "type of data currently contained in the container."));
-  return static_cast<T*>(data());
+  return ret;
 }
 
 void* DenseTensor::data() {
diff --git a/paddle/phi/core/kernel_utils.h b/paddle/phi/core/kernel_utils.h
index ddc58f512bf14fe6afa7d351e2e06531763fe323..f548d1da2d4e7ed382a27bccc2c3f1aac35c5d93 100644
--- a/paddle/phi/core/kernel_utils.h
+++ b/paddle/phi/core/kernel_utils.h
@@ -75,7 +75,7 @@ namespace phi {
                     "Kernel's Input should appear before Attributes."); \
       static_assert(out_idx == 0,                                       \
                     "Kernel's Input should appear before Outputs.");    \
-      const std::pair<int, int> range = ctx->InputRangeAt(in_idx);      \
+      const std::pair<int, int>& range = ctx->InputRangeAt(in_idx);     \
       const tensor_type& arg = ctx->InputAt<tensor_type>(range.first);  \
       KernelCallHelper<Tail...>::                                       \
           template Compute<dev_ctx_idx, in_idx + 1, attr_idx, out_idx>( \
@@ -96,7 +96,7 @@ namespace phi {
                     "Kernel's Input should appear before Attributes.");    \
       static_assert(out_idx == 0,                                          \
                     "Kernel's Input should appear before Outputs.");       \
-      const std::pair<int, int> range = ctx->InputRangeAt(in_idx);         \
+      const std::pair<int, int>& range = ctx->InputRangeAt(in_idx);        \
       auto arg = ctx->OptionalInputAt<tensor_type>(range.first);           \
       KernelCallHelper<Tail...>::                                          \
           template Compute<dev_ctx_idx, in_idx + 1, attr_idx, out_idx>(    \
@@ -117,7 +117,7 @@ namespace phi {
                     "Kernel's Input should appear before Attributes.");      \
       static_assert(out_idx == 0,                                            \
                     "Kernel's Input should appear before Outputs.");         \
-      const std::pair<int, int> range = ctx->InputRangeAt(in_idx);           \
+      const std::pair<int, int>& range = ctx->InputRangeAt(in_idx);          \
       std::vector<const tensor_type*> arg = std::move(                       \
           ctx->InputsBetween<tensor_type>(range.first, range.second));       \
       KernelCallHelper<Tail...>::                                            \
@@ -141,7 +141,7 @@ namespace phi {
                     "Kernel's Input should appear before Attributes.");       \
       static_assert(out_idx == 0,                                             \
                     "Kernel's Input should appear before Outputs.");          \
-      const std::pair<int, int> range = ctx->InputRangeAt(in_idx);            \
+      const std::pair<int, int>& range = ctx->InputRangeAt(in_idx);           \
       paddle::optional<const std::vector<const tensor_type*>> arg =           \
           ctx->OptionalInputsBetween<tensor_type>(range.first, range.second); \
       KernelCallHelper<Tail...>::                                             \
@@ -195,7 +195,7 @@ namespace phi {
               int out_idx,                                               \
               typename... PreviousArgs>                                  \
     static void Compute(KernelContext* ctx, PreviousArgs&... pargs) {    \
-      const std::pair<int, int> range = ctx->OutputRangeAt(out_idx);     \
+      const std::pair<int, int>& range = ctx->OutputRangeAt(out_idx);    \
       tensor_type* arg = ctx->MutableOutputAt<tensor_type>(range.first); \
       KernelCallHelper<Tail...>::                                        \
           template Compute<dev_ctx_idx, in_idx, attr_idx, out_idx + 1>(  \
@@ -212,7 +212,7 @@ namespace phi {
               int out_idx,                                                    \
               typename... PreviousArgs>                                       \
     static void Compute(KernelContext* ctx, PreviousArgs&... pargs) {         \
-      const std::pair<int, int> range = ctx->OutputRangeAt(out_idx);          \
+      const std::pair<int, int>& range = ctx->OutputRangeAt(out_idx);         \
       std::vector<tensor_type*> arg = std::move(                              \
           ctx->MutableOutputBetween<tensor_type>(range.first, range.second)); \
       KernelCallHelper<Tail...>::                                             \
diff --git a/paddle/phi/kernels/funcs/broadcast_function.h b/paddle/phi/kernels/funcs/broadcast_function.h
index 7634c2462738b2b3bdb622e851723aef23045dfd..d6b9f0935a24cfede25417a8b4fc072125c6426e 100644
--- a/paddle/phi/kernels/funcs/broadcast_function.h
+++ b/paddle/phi/kernels/funcs/broadcast_function.h
@@ -554,6 +554,7 @@ void BroadcastKernel(const KPDevice &ctx,
                      int axis,
                      Functor func) {
   std::vector<int> dims_size;
+  dims_size.reserve(ins.size());
   bool no_broadcast_flag = true;
   for (auto *in : ins) {
     no_broadcast_flag &= ins[0]->dims() == in->dims();
diff --git a/paddle/phi/kernels/gpu/elementwise_kernel.cu b/paddle/phi/kernels/gpu/elementwise_kernel.cu
index 73964d31a34eebaa257095839900f82c127d28ce..531e5c913112f41db493527b82a8d0cca21fad0b 100644
--- a/paddle/phi/kernels/gpu/elementwise_kernel.cu
+++ b/paddle/phi/kernels/gpu/elementwise_kernel.cu
@@ -28,7 +28,9 @@ namespace phi {
                        int axis,                                     \
                        DenseTensor* out) {                           \
     std::vector<const DenseTensor*> inputs;                          \
+    inputs.reserve(2);                                               \
     std::vector<DenseTensor*> outputs;                               \
+    outputs.reserve(1);                                              \
     inputs.emplace_back(&x);                                         \
     inputs.emplace_back(&y);                                         \
     outputs.emplace_back(out);                                       \