[clang-tidy] enable modernize-use-emplace (#55799)

* [clang-tidy] enable modernize-use-emplace * Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into modernize_use_emplace

[clang-tidy] enable modernize-use-emplace (#55799)
* [clang-tidy] enable modernize-use-emplace * Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into modernize_use_emplace
469a0392 · Ruibin Cheung · GitHub · 1e4f627d · 469a0392 · 469a0392
39 changed file
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -180,7 +180,7 @@ modernize-redundant-void-arg,
 -modernize-shrink-to-fit,
 -modernize-unary-static-assert,
 -modernize-use-bool-literals,
-modernize-use-emplace,
+modernize-use-emplace,
 -modernize-use-equals-default,
 -modernize-use-equals-delete,
 -modernize-use-noexcept,

--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc
@@ -203,8 +203,7 @@ GetAxesDimsMappingPair(const std::vector<std::string>& tensor_axes,
  std::vector<std::pair<std::string, std::vector<int64_t>>> res;
  size_t ntensor = specs.size();
  for (size_t i = 0; i < ntensor; ++i) {
-    res.emplace_back(std::pair<std::string, std::vector<int64_t>>(
-        tensor_axes[i], specs[i].dims_mapping()));
+    res.emplace_back(tensor_axes[i], specs[i].dims_mapping());
  }
  return res;
 }

--- a/paddle/fluid/distributed/collective/reducer.cc
+++ b/paddle/fluid/distributed/collective/reducer.cc
@@ -609,8 +609,8 @@ void EagerReducer::InitializeDenseGroups(
    p_group->length_.push_back(size);

    // for concat operator
-    p_group->origin_shapes_.push_back(IntArray(tensor.shape()));
-    p_group->dense_tensors_.push_back(phi::DenseTensor());
+    p_group->origin_shapes_.emplace_back(tensor.shape());
+    p_group->dense_tensors_.emplace_back();

    const auto &dtype = tensor.dtype();
    const auto &inner_place = tensor.impl()->place();

--- a/paddle/fluid/distributed/ps/table/graph/graph_node.cc
+++ b/paddle/fluid/distributed/ps/table/graph/graph_node.cc
@@ -114,7 +114,7 @@ void FeatureNode::recover_from_buffer(char* buffer) {
    memcpy(str, buffer, feat_len);
    buffer += feat_len;
    str[feat_len] = '\0';
-    feature.push_back(std::string(str));
+    feature.push_back(str);  // NOLINT
  }
 }
 }  // namespace distributed

--- a/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc
+++ b/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc
@@ -88,7 +88,7 @@ int32_t SSDSparseTable::PullSparse(float* pull_values,
        _real_local_shard_num);
    for (size_t i = 0; i < num; ++i) {
      int shard_id = (keys[i] % _sparse_table_shard_num) % _avg_local_shard_num;
-      task_keys[shard_id].push_back({keys[i], i});
+      task_keys[shard_id].emplace_back(keys[i], i);
    }

    std::atomic<uint32_t> missed_keys{0};
@@ -202,8 +202,8 @@ int32_t SSDSparseTable::PullSparsePtr(int shard_id,
      auto itr = local_shard.find(key);
      if (itr == local_shard.end()) {
        cur_ctx->batch_index.push_back(i);
-        cur_ctx->batch_keys.push_back(rocksdb::Slice(
-            (char*)&(pull_keys[i]), sizeof(uint64_t)));  // NOLINT
+        cur_ctx->batch_keys.emplace_back(
+            reinterpret_cast<const char*>(&(pull_keys[i])), sizeof(uint64_t));
        if (cur_ctx->batch_keys.size() == 1024) {
          cur_ctx->batch_values.resize(cur_ctx->batch_keys.size());
          cur_ctx->status.resize(cur_ctx->batch_keys.size());
@@ -334,7 +334,7 @@ int32_t SSDSparseTable::PushSparse(const uint64_t* keys,
        _real_local_shard_num);
    for (size_t i = 0; i < num; ++i) {
      int shard_id = (keys[i] % _sparse_table_shard_num) % _avg_local_shard_num;
-      task_keys[shard_id].push_back({keys[i], i});
+      task_keys[shard_id].emplace_back(keys[i], i);
    }
    for (int shard_id = 0; shard_id < _real_local_shard_num; ++shard_id) {
      tasks[shard_id] =
@@ -440,7 +440,7 @@ int32_t SSDSparseTable::PushSparse(const uint64_t* keys,
        _real_local_shard_num);
    for (size_t i = 0; i < num; ++i) {
      int shard_id = (keys[i] % _sparse_table_shard_num) % _avg_local_shard_num;
-      task_keys[shard_id].push_back({keys[i], i});
+      task_keys[shard_id].emplace_back(keys[i], i);
    }
    for (int shard_id = 0; shard_id < _real_local_shard_num; ++shard_id) {
      tasks[shard_id] =
@@ -1658,11 +1658,10 @@ int32_t SSDSparseTable::LoadWithString(
        // ssd or mem
        if (_value_accesor->SaveSSD(data_buffer_ptr)) {
          tmp_key.emplace_back(key);
-          ssd_keys.emplace_back(std::make_pair(
-              reinterpret_cast<char*>(&tmp_key.back()), sizeof(uint64_t)));
-          ssd_values.emplace_back(
-              std::make_pair(reinterpret_cast<char*>(data_buffer_ptr),
-                             value_size * sizeof(float)));
+          ssd_keys.emplace_back(reinterpret_cast<char*>(&tmp_key.back()),
+                                sizeof(uint64_t));
+          ssd_values.emplace_back(reinterpret_cast<char*>(data_buffer_ptr),
+                                  value_size * sizeof(float));
          data_buffer_ptr += feature_value_size;
          if (static_cast<int>(ssd_keys.size()) ==
              FLAGS_pserver_load_batch_size) {

--- a/paddle/fluid/framework/attribute_test.cc
+++ b/paddle/fluid/framework/attribute_test.cc
@@ -297,7 +297,7 @@ TEST(Attribute, ProtoAttrToAttribute_scalars) {

  std::vector<paddle::experimental::Scalar> scalars;
  for (int i = 0; i < 10; i++) {
-    scalars.push_back(paddle::experimental::Scalar(i));
+    scalars.emplace_back(i);
  }
  std::vector<paddle::framework::proto::Scalar> proto_scalars;
  proto_scalars.reserve(scalars.size());

--- a/paddle/fluid/framework/data_feed.cc
+++ b/paddle/fluid/framework/data_feed.cc
@@ -1084,13 +1084,13 @@ void MultiSlotInMemoryDataFeed::Init(
  feed_vec_.resize(use_slots_.size());
  const int kEstimatedFeasignNumPerSlot = 5;  // Magic Number
  for (size_t i = 0; i < all_slot_num; i++) {
-    batch_float_feasigns_.push_back(std::vector<float>());
-    batch_uint64_feasigns_.push_back(std::vector<uint64_t>());
+    batch_float_feasigns_.emplace_back();
+    batch_uint64_feasigns_.emplace_back();
    batch_float_feasigns_[i].reserve(default_batch_size_ *
                                     kEstimatedFeasignNumPerSlot);
    batch_uint64_feasigns_[i].reserve(default_batch_size_ *
                                      kEstimatedFeasignNumPerSlot);
-    offset_.push_back(std::vector<size_t>());
+    offset_.emplace_back();
    offset_[i].reserve(default_batch_size_ +
                       1);  // Each lod info will prepend a zero
  }
@@ -1224,7 +1224,7 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstanceFromPipe(Record* instance) {
            }
            FeatureFeasign f;
            f.float_feasign_ = feasign;
-            instance->float_feasigns_.push_back(FeatureItem(f, idx));
+            instance->float_feasigns_.emplace_back(f, idx);
          }
        } else if (all_slots_type_[i][0] == 'u') {  // uint64
          for (int j = 0; j < num; ++j) {
@@ -1236,7 +1236,7 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstanceFromPipe(Record* instance) {
            }
            FeatureFeasign f;
            f.uint64_feasign_ = feasign;
-            instance->uint64_feasigns_.push_back(FeatureItem(f, idx));
+            instance->uint64_feasigns_.emplace_back(f, idx);
          }
        }
        pos = endptr - str;
@@ -1297,7 +1297,7 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstance(Record* instance) {
            }
            FeatureFeasign f;
            f.float_feasign_ = feasign;
-            instance->float_feasigns_.push_back(FeatureItem(f, idx));
+            instance->float_feasigns_.emplace_back(f, idx);
          }
        } else if (all_slots_type_[i][0] == 'u') {  // uint64
          for (int j = 0; j < num; ++j) {
@@ -1307,7 +1307,7 @@ bool MultiSlotInMemoryDataFeed::ParseOneInstance(Record* instance) {
            }
            FeatureFeasign f;
            f.uint64_feasign_ = feasign;
-            instance->uint64_feasigns_.push_back(FeatureItem(f, idx));
+            instance->uint64_feasigns_.emplace_back(f, idx);
          }
        }
        pos = endptr - str;
@@ -2093,13 +2093,13 @@ void SlotRecordInMemoryDataFeed::Init(const DataFeedDesc& data_feed_desc) {
  feed_vec_.resize(used_slots_info_.size());
  const int kEstimatedFeasignNumPerSlot = 5;  // Magic Number
  for (size_t i = 0; i < all_slot_num; i++) {
-    batch_float_feasigns_.push_back(std::vector<float>());
-    batch_uint64_feasigns_.push_back(std::vector<uint64_t>());
+    batch_float_feasigns_.emplace_back();
+    batch_uint64_feasigns_.emplace_back();
    batch_float_feasigns_[i].reserve(default_batch_size_ *
                                     kEstimatedFeasignNumPerSlot);
    batch_uint64_feasigns_[i].reserve(default_batch_size_ *
                                      kEstimatedFeasignNumPerSlot);
-    offset_.push_back(std::vector<size_t>());
+    offset_.emplace_back();
    offset_[i].reserve(default_batch_size_ +
                       1);  // Each lod info will prepend a zero
  }

--- a/paddle/fluid/framework/data_set.cc
+++ b/paddle/fluid/framework/data_set.cc
@@ -510,8 +510,8 @@ void DatasetImpl<T>::LoadIntoMemory() {
 #endif
  } else {
    for (int64_t i = 0; i < thread_num_; ++i) {
-      load_threads.push_back(std::thread(
-          &paddle::framework::DataFeed::LoadIntoMemory, readers_[i].get()));
+      load_threads.emplace_back(&paddle::framework::DataFeed::LoadIntoMemory,
+                                readers_[i].get());
    }
    for (std::thread& t : load_threads) {
      t.join();
@@ -534,16 +534,16 @@ void DatasetImpl<T>::PreLoadIntoMemory() {
    CHECK(static_cast<size_t>(preload_thread_num_) == preload_readers_.size());
    preload_threads_.clear();
    for (int64_t i = 0; i < preload_thread_num_; ++i) {
-      preload_threads_.push_back(
-          std::thread(&paddle::framework::DataFeed::LoadIntoMemory,
-                      preload_readers_[i].get()));
+      preload_threads_.emplace_back(
+          &paddle::framework::DataFeed::LoadIntoMemory,
+          preload_readers_[i].get());
    }
  } else {
    CHECK(static_cast<size_t>(thread_num_) == readers_.size());
    preload_threads_.clear();
    for (int64_t i = 0; i < thread_num_; ++i) {
-      preload_threads_.push_back(std::thread(
-          &paddle::framework::DataFeed::LoadIntoMemory, readers_[i].get()));
+      preload_threads_.emplace_back(
+          &paddle::framework::DataFeed::LoadIntoMemory, readers_[i].get());
    }
  }
  VLOG(3) << "DatasetImpl<T>::PreLoadIntoMemory() end";
@@ -849,7 +849,7 @@ void MultiSlotDataset::GlobalShuffle(int thread_num) {
  }
  VLOG(3) << "start global shuffle threads, num = " << thread_num;
  for (int i = 0; i < thread_num; ++i) {
-    global_shuffle_threads.push_back(std::thread(global_shuffle_func));
+    global_shuffle_threads.emplace_back(global_shuffle_func);
  }
  for (std::thread& t : global_shuffle_threads) {
    t.join();
@@ -1618,7 +1618,7 @@ void MultiSlotDataset::GetRandomData(
    for (auto slot : slots_to_replace) {
      auto range = rand_rec.feas_.equal_range(slot);
      for (auto it = range.first; it != range.second; ++it) {
-        new_rec.uint64_feasigns_.push_back({it->second, it->first});
+        new_rec.uint64_feasigns_.emplace_back(it->second, it->first);
        debug_push_cnt += 1;
      }
    }

--- a/paddle/fluid/framework/dist_multi_trainer.cc
+++ b/paddle/fluid/framework/dist_multi_trainer.cc
@@ -88,7 +88,7 @@ void DistMultiTrainer::InitDumpEnv() {
    }
  }
  for (int i = 0; i < dump_thread_num_; i++) {
-    dump_thread_.push_back(std::thread([this, i] { DumpWork(i); }));
+    dump_thread_.emplace_back([this, i] { DumpWork(i); });
  }
 }

@@ -131,11 +131,10 @@ void DistMultiTrainer::InitOtherEnv(const ProgramDesc &main_program) {
 void DistMultiTrainer::Run() {
  for (int thidx = 0; thidx < thread_num_; ++thidx) {
    if (!debug_) {
-      threads_.push_back(
-          std::thread(&DeviceWorker::TrainFiles, workers_[thidx].get()));
+      threads_.emplace_back(&DeviceWorker::TrainFiles, workers_[thidx].get());
    } else {
-      threads_.push_back(std::thread(&DeviceWorker::TrainFilesWithProfiler,
-                                     workers_[thidx].get()));
+      threads_.emplace_back(&DeviceWorker::TrainFilesWithProfiler,
+                            workers_[thidx].get());
    }
  }
 }

--- a/paddle/fluid/framework/downpour_worker.cc
+++ b/paddle/fluid/framework/downpour_worker.cc
@@ -107,14 +107,14 @@ void DownpourWorker::Initialize(const TrainerDesc& desc) {
    uint64_t dest_table = copy_table_config_.dest_sparse_tables(i);
    VLOG(3) << "copy_sparse_tables_ push back " << src_table << "->"
            << dest_table;
-    copy_sparse_tables_.push_back(std::make_pair(src_table, dest_table));
+    copy_sparse_tables_.emplace_back(src_table, dest_table);
  }
  for (int i = 0; i < copy_table_config_.src_dense_tables_size(); ++i) {
    uint64_t src_table = copy_table_config_.src_dense_tables(i);
    uint64_t dest_table = copy_table_config_.dest_dense_tables(i);
    VLOG(3) << "copy_dense_tables_ push back " << src_table << "->"
            << dest_table;
-    copy_dense_tables_.push_back(std::make_pair(src_table, dest_table));
+    copy_dense_tables_.emplace_back(src_table, dest_table);
  }
  for (auto& m : copy_table_config_.table_denpendency_map()) {
    if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {

--- a/paddle/fluid/framework/downpour_worker_opt.cc
+++ b/paddle/fluid/framework/downpour_worker_opt.cc
@@ -168,14 +168,14 @@ void DownpourWorkerOpt::Initialize(const TrainerDesc& desc) {
    uint64_t dest_table = copy_table_config_.dest_sparse_tables(i);
    VLOG(3) << "copy_sparse_tables_ push back " << src_table << "->"
            << dest_table;
-    copy_sparse_tables_.push_back(std::make_pair(src_table, dest_table));
+    copy_sparse_tables_.emplace_back(src_table, dest_table);
  }
  for (int i = 0; i < copy_table_config_.src_dense_tables_size(); ++i) {
    uint64_t src_table = copy_table_config_.src_dense_tables(i);
    uint64_t dest_table = copy_table_config_.dest_dense_tables(i);
    VLOG(3) << "copy_dense_tables_ push back " << src_table << "->"
            << dest_table;
-    copy_dense_tables_.push_back(std::make_pair(src_table, dest_table));
+    copy_dense_tables_.emplace_back(src_table, dest_table);
  }
  for (auto& m : copy_table_config_.table_denpendency_map()) {
    if (sparse_key_names_.find(m.key()) != sparse_key_names_.end()) {

--- a/paddle/fluid/framework/io/fs.cc
+++ b/paddle/fluid/framework/io/fs.cc
@@ -179,7 +179,7 @@ std::vector<std::string> localfs_list(const std::string& path) {
  std::vector<std::string> list;

  while (reader.getline(&*pipe)) {
-    list.push_back(reader.get());
+    list.emplace_back(reader.get());
  }

  return list;

--- a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.cc
@@ -174,8 +174,8 @@ int EmbeddingEltwiseLayerNormFusePass::BuildFusion(
      return;
    }
    std::vector<std::pair<Node*, Node*>> ins;
-    ins.push_back(std::make_pair(lookup_table1_x, lookup_table1_w));
-    ins.push_back(std::make_pair(lookup_table2_x, lookup_table2_w));
+    ins.emplace_back(lookup_table1_x, lookup_table1_w);
+    ins.emplace_back(lookup_table2_x, lookup_table2_w);
    start_pattern_in_nodes.push_back(ins);
    start_pattern_out_node.push_back(eltwise_add_out);

@@ -294,7 +294,7 @@ int EmbeddingEltwiseLayerNormFusePass::BuildFusion(

    for (size_t k = 0; k < end_pattern_elt_out.size(); ++k) {
      if (tmp == end_pattern_elt_out[k]) {
-        fusion_ids.push_back(std::make_pair(i, std::make_pair(k, js)));
+        fusion_ids.emplace_back(i, std::make_pair(k, js));
        break;
      }
    }

--- a/paddle/fluid/framework/multi_trainer.cc
+++ b/paddle/fluid/framework/multi_trainer.cc
@@ -107,7 +107,7 @@ void MultiTrainer::InitDumpEnv() {
    }
  }
  for (int i = 0; i < dump_thread_num_; i++) {
-    dump_thread_.push_back(std::thread([this, i] { DumpWork(i); }));
+    dump_thread_.emplace_back([this, i] { DumpWork(i); });
  }
 }


--- a/paddle/fluid/framework/new_executor/executor_statistics.cc
+++ b/paddle/fluid/framework/new_executor/executor_statistics.cc
@@ -174,31 +174,31 @@ int StatisticsEngine::Init(const platform::NodeTrees& trees) {

 void StatisticsEngine::InitStdEvents() {
  name2idx_["Total"] = names_.size();
-  names_.push_back("Total");
+  names_.emplace_back("Total");
  name2idx_["PythonEnd"] = names_.size();
-  names_.push_back("PythonEnd");
+  names_.emplace_back("PythonEnd");
  name2idx_["CplusplusEnd"] = names_.size();
-  names_.push_back("CplusplusEnd");
+  names_.emplace_back("CplusplusEnd");
  name2idx_["RunOp"] = names_.size();
-  names_.push_back("RunOp");
+  names_.emplace_back("RunOp");
  name2idx_["LaunchKernel"] = names_.size();
-  names_.push_back("LaunchKernel");
+  names_.emplace_back("LaunchKernel");
  name2idx_["OpCompute"] = names_.size();
-  names_.push_back("OpCompute");
+  names_.emplace_back("OpCompute");
  name2idx_["OpInfershape"] = names_.size();
-  names_.push_back("OpInfershape");
+  names_.emplace_back("OpInfershape");
  name2idx_["DataTransform"] = names_.size();
-  names_.push_back("DataTransform");
+  names_.emplace_back("DataTransform");
  name2idx_["GarbageCollect"] = names_.size();
-  names_.push_back("GarbageCollect");
+  names_.emplace_back("GarbageCollect");
  name2idx_["CalcNextOp"] = names_.size();
-  names_.push_back("CalcNextOp");
+  names_.emplace_back("CalcNextOp");
  name2idx_["AllocateDeviceMem"] = names_.size();
-  names_.push_back("AllocateDeviceMem");
+  names_.emplace_back("AllocateDeviceMem");
  name2idx_["FreeDeviceMem"] = names_.size();
-  names_.push_back("FreeDeviceMem");
+  names_.emplace_back("FreeDeviceMem");
  name2idx_["ThreadpoolAddTask"] = names_.size();
-  names_.push_back("ThreadpoolAddTask");
+  names_.emplace_back("ThreadpoolAddTask");

  size_t n = names_.size();
  filters_.resize(n);

--- a/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/data_transfer.cc
@@ -299,8 +299,8 @@ std::shared_ptr<OperatorBase> TransferLayout(const std::string& var_name,
  VLOG(3) << "Create Variable " << *new_var_name
          << " locally, which pointer is " << ptr << "Variable Type "
          << var_type;
-  var_scope->MutableDataTransferAddedVars().push_back(
-      std::make_pair(*new_var_name, var_type));
+  var_scope->MutableDataTransferAddedVars().emplace_back(*new_var_name,
+                                                         var_type);
  var_scope->AddVar(*new_var_name, nullptr);

  // 2. Construct VariableNameMap
@@ -347,8 +347,8 @@ std::shared_ptr<OperatorBase> TransferDtype(const std::string& var_name,
  VLOG(3) << "Create Variable " << *new_var_name
          << " locally, which pointer is " << ptr << "Variable Type "
          << var_type;
-  var_scope->MutableDataTransferAddedVars().push_back(
-      std::make_pair(*new_var_name, var_type));
+  var_scope->MutableDataTransferAddedVars().emplace_back(*new_var_name,
+                                                         var_type);
  var_scope->AddVar(*new_var_name, nullptr);

  // 2. Construct VariableNameMap
@@ -398,8 +398,8 @@ std::shared_ptr<OperatorBase> TransferDevice(const std::string& var_name,
  VLOG(3) << "Create Variable " << *new_var_name
          << " locally, which pointer is " << ptr << "Variable Type "
          << var_type;
-  var_scope->MutableDataTransferAddedVars().push_back(
-      std::make_pair(*new_var_name, var_type));
+  var_scope->MutableDataTransferAddedVars().emplace_back(*new_var_name,
+                                                         var_type);
  var_scope->AddVar(*new_var_name, nullptr);

  // 2. Construct VariableNameMap

--- a/paddle/fluid/framework/op_def_api.cc
+++ b/paddle/fluid/framework/op_def_api.cc
@@ -61,9 +61,9 @@ const proto::OpDef& GetOpDef(const std::string& op_name) {
      }
      if (op_def.type() != op_name) {
        LOG(WARNING) << op_name << ".pbtxt has error type :" << op_def.type();
-        ops_definition.emplace(std::make_pair(op_name, proto::OpDef()));
+        ops_definition.emplace(op_name, proto::OpDef());
      } else {
-        ops_definition.emplace(std::make_pair(op_name, std::move(op_def)));
+        ops_definition.emplace(op_name, std::move(op_def));
      }
    }
  }

--- a/paddle/fluid/framework/op_desc_test.cc
+++ b/paddle/fluid/framework/op_desc_test.cc
@@ -30,7 +30,7 @@ TEST(OpDesc, SetScalarsAttr) {

  std::vector<paddle::experimental::Scalar> scalars;
  for (int i = 0; i < 4; i++) {
-    scalars.push_back(paddle::experimental::Scalar(i));
+    scalars.emplace_back(i);
  }
  opdesc.SetPlainAttr("scalars", scalars);
  ASSERT_EQ(opdesc.GetAttrType("scalars"), paddle::framework::proto::SCALARS);

--- a/paddle/fluid/framework/pipeline_trainer.cc
+++ b/paddle/fluid/framework/pipeline_trainer.cc
@@ -64,8 +64,7 @@ void PipelineTrainer::InitDumpEnv() {
  // TODO(sandyhouse): should make it as a config
  dump_thread_num_ = 1;
  for (int i = 0; i < dump_thread_num_; i++) {
-    dump_thread_.push_back(
-        std::thread(std::bind(&TrainerBase::DumpWork, this, i)));
+    dump_thread_.emplace_back(std::bind(&TrainerBase::DumpWork, this, i));
  }
 }


--- a/paddle/fluid/imperative/reducer.cc
+++ b/paddle/fluid/imperative/reducer.cc
@@ -377,7 +377,7 @@ void Reducer::InitializeDenseGroups(
    p_group->length_.push_back(size);

    // for concat operator
-    p_group->dense_tensors_.push_back(phi::DenseTensor());
+    p_group->dense_tensors_.emplace_back();

    // check the dtype and place, it must be same.
    const auto &dtype = var->DataType();

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2338,7 +2338,7 @@ void AnalysisPredictor::StatisticShapeRangeInfo() {
          auto ShapeMaxFreq =
              [](const std::map<int32_t, int32_t> &m) -> int32_t {
            std::vector<std::pair<int32_t, int32_t>> counter;
-            for (auto &it : m) counter.push_back(it);
+            for (auto &it : m) counter.emplace_back(it);
            std::sort(counter.begin(),
                      counter.end(),
                      [](std::pair<int32_t, int32_t> &a,

--- a/paddle/fluid/inference/api/api_impl.cc
+++ b/paddle/fluid/inference/api/api_impl.cc
@@ -373,7 +373,7 @@ CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
-      flags.push_back("dummpy");
+      flags.emplace_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
                         num2str<float>(config.fraction_of_gpu_memory);
      flags.push_back(flag);

--- a/paddle/fluid/inference/api/paddle_pass_builder.cc
+++ b/paddle/fluid/inference/api/paddle_pass_builder.cc
@@ -393,7 +393,7 @@ void CpuPassStrategy::EnableMKLDNN() {
 void CpuPassStrategy::EnableMkldnnQuantizer() {
 #ifdef PADDLE_WITH_MKLDNN
  if (!use_mkldnn_quantizer_) {
-    passes_.push_back("cpu_quantize_placement_pass");
+    passes_.emplace_back("cpu_quantize_placement_pass");
  }
  use_mkldnn_quantizer_ = true;
 #else
@@ -404,12 +404,12 @@ void CpuPassStrategy::EnableMkldnnQuantizer() {
 void CpuPassStrategy::EnableMkldnnBfloat16() {
 #ifdef PADDLE_WITH_MKLDNN
  if (!use_mkldnn_bfloat16_) {
-    passes_.push_back("fc_mkldnn_pass");
-    passes_.push_back("fc_act_mkldnn_fuse_pass");
+    passes_.emplace_back("fc_mkldnn_pass");
+    passes_.emplace_back("fc_act_mkldnn_fuse_pass");

-    passes_.push_back("cpu_bfloat16_placement_pass");
-    passes_.push_back("cpu_bfloat16_pass");
-    passes_.push_back("cpu_quantize_squash_pass");
+    passes_.emplace_back("cpu_bfloat16_placement_pass");
+    passes_.emplace_back("cpu_bfloat16_pass");
+    passes_.emplace_back("cpu_quantize_squash_pass");
  }
  use_mkldnn_bfloat16_ = true;
 #else
@@ -421,60 +421,60 @@ void CpuPassStrategy::EnableMkldnnInt8() {
 #ifdef PADDLE_WITH_MKLDNN
  if (!use_mkldnn_int8_) {
    passes_.clear();
-    passes_.push_back("simplify_with_basic_ops_pass");
-    passes_.push_back("quant_dequant_mkldnn_pass");
-    passes_.push_back("mkldnn_placement_pass");
-    passes_.push_back("constant_folding_pass");
-    passes_.push_back("squeeze2_transpose2_onednn_fuse_pass");
-    passes_.push_back("layer_norm_fuse_pass");
-    passes_.push_back("attention_lstm_fuse_pass");
-    passes_.push_back("seqconv_eltadd_relu_fuse_pass");
-    passes_.push_back("fc_lstm_fuse_pass");
-    passes_.push_back("mul_lstm_fuse_pass");
-    passes_.push_back("fc_gru_fuse_pass");
-    passes_.push_back("mul_gru_fuse_pass");
-    passes_.push_back("multi_gru_fuse_pass");
-    passes_.push_back("multi_gru_seq_fuse_pass");
-    passes_.push_back("seq_concat_fc_fuse_pass");
-    passes_.push_back("gpu_cpu_squeeze2_matmul_fuse_pass");
-    passes_.push_back("gpu_cpu_reshape2_matmul_fuse_pass");
-    passes_.push_back("gpu_cpu_flatten2_matmul_fuse_pass");
-    passes_.push_back("matmul_v2_scale_fuse_pass");
-    passes_.push_back("squared_mat_sub_fuse_pass");
-    passes_.push_back("is_test_pass");
-    passes_.push_back("gpu_cpu_map_matmul_v2_to_mul_pass");
-    passes_.push_back("gpu_cpu_map_matmul_v2_to_matmul_pass");
-    passes_.push_back("matmul_scale_fuse_pass");
-    passes_.push_back("gpu_cpu_map_matmul_to_mul_pass");
-    passes_.push_back("repeated_fc_relu_fuse_pass");
-    passes_.push_back("depthwise_conv_mkldnn_pass");
-    passes_.push_back("conv_bn_fuse_pass");
-    passes_.push_back("conv_eltwiseadd_bn_fuse_pass");
-    passes_.push_back("conv_affine_channel_mkldnn_fuse_pass");
-    passes_.push_back("conv_transpose_bn_fuse_pass");
-    passes_.push_back("conv_transpose_eltwiseadd_bn_fuse_pass");
-    passes_.push_back("conv_bias_mkldnn_fuse_pass");
-    passes_.push_back("conv_transpose_bias_mkldnn_fuse_pass");
-    passes_.push_back("conv_elementwise_add_mkldnn_fuse_pass");
-    passes_.push_back("conv_activation_mkldnn_fuse_pass");
-    passes_.push_back("fc_fuse_pass");
-    passes_.push_back("repeated_fc_relu_fuse_pass");
-    passes_.push_back("fc_mkldnn_pass");
-    passes_.push_back("fc_act_mkldnn_fuse_pass");
-    passes_.push_back("matmul_transpose_reshape_mkldnn_fuse_pass");
-    passes_.push_back("batch_norm_act_fuse_pass");
-    passes_.push_back("softplus_activation_onednn_fuse_pass");
-    passes_.push_back("compute_propagate_scales_mkldnn_pass");
-    passes_.push_back("scale_matmul_fuse_pass");
-    passes_.push_back("reshape_transpose_matmul_mkldnn_fuse_pass");
-    passes_.push_back("matmul_elementwise_add_mkldnn_fuse_pass");
-    passes_.push_back("operator_scale_onednn_fuse_pass");
-    passes_.push_back("operator_unsqueeze2_onednn_fuse_pass");
-    passes_.push_back("operator_reshape2_onednn_fuse_pass");
-    passes_.push_back("cpu_quantize_placement_pass");
-    passes_.push_back("cpu_quantize_pass");
-    passes_.push_back("cpu_quantize_squash_pass");
-    passes_.push_back("quant_transpose2_dequant_onednn_fuse_pass");
+    passes_.emplace_back("simplify_with_basic_ops_pass");
+    passes_.emplace_back("quant_dequant_mkldnn_pass");
+    passes_.emplace_back("mkldnn_placement_pass");
+    passes_.emplace_back("constant_folding_pass");
+    passes_.emplace_back("squeeze2_transpose2_onednn_fuse_pass");
+    passes_.emplace_back("layer_norm_fuse_pass");
+    passes_.emplace_back("attention_lstm_fuse_pass");
+    passes_.emplace_back("seqconv_eltadd_relu_fuse_pass");
+    passes_.emplace_back("fc_lstm_fuse_pass");
+    passes_.emplace_back("mul_lstm_fuse_pass");
+    passes_.emplace_back("fc_gru_fuse_pass");
+    passes_.emplace_back("mul_gru_fuse_pass");
+    passes_.emplace_back("multi_gru_fuse_pass");
+    passes_.emplace_back("multi_gru_seq_fuse_pass");
+    passes_.emplace_back("seq_concat_fc_fuse_pass");
+    passes_.emplace_back("gpu_cpu_squeeze2_matmul_fuse_pass");
+    passes_.emplace_back("gpu_cpu_reshape2_matmul_fuse_pass");
+    passes_.emplace_back("gpu_cpu_flatten2_matmul_fuse_pass");
+    passes_.emplace_back("matmul_v2_scale_fuse_pass");
+    passes_.emplace_back("squared_mat_sub_fuse_pass");
+    passes_.emplace_back("is_test_pass");
+    passes_.emplace_back("gpu_cpu_map_matmul_v2_to_mul_pass");
+    passes_.emplace_back("gpu_cpu_map_matmul_v2_to_matmul_pass");
+    passes_.emplace_back("matmul_scale_fuse_pass");
+    passes_.emplace_back("gpu_cpu_map_matmul_to_mul_pass");
+    passes_.emplace_back("repeated_fc_relu_fuse_pass");
+    passes_.emplace_back("depthwise_conv_mkldnn_pass");
+    passes_.emplace_back("conv_bn_fuse_pass");
+    passes_.emplace_back("conv_eltwiseadd_bn_fuse_pass");
+    passes_.emplace_back("conv_affine_channel_mkldnn_fuse_pass");
+    passes_.emplace_back("conv_transpose_bn_fuse_pass");
+    passes_.emplace_back("conv_transpose_eltwiseadd_bn_fuse_pass");
+    passes_.emplace_back("conv_bias_mkldnn_fuse_pass");
+    passes_.emplace_back("conv_transpose_bias_mkldnn_fuse_pass");
+    passes_.emplace_back("conv_elementwise_add_mkldnn_fuse_pass");
+    passes_.emplace_back("conv_activation_mkldnn_fuse_pass");
+    passes_.emplace_back("fc_fuse_pass");
+    passes_.emplace_back("repeated_fc_relu_fuse_pass");
+    passes_.emplace_back("fc_mkldnn_pass");
+    passes_.emplace_back("fc_act_mkldnn_fuse_pass");
+    passes_.emplace_back("matmul_transpose_reshape_mkldnn_fuse_pass");
+    passes_.emplace_back("batch_norm_act_fuse_pass");
+    passes_.emplace_back("softplus_activation_onednn_fuse_pass");
+    passes_.emplace_back("compute_propagate_scales_mkldnn_pass");
+    passes_.emplace_back("scale_matmul_fuse_pass");
+    passes_.emplace_back("reshape_transpose_matmul_mkldnn_fuse_pass");
+    passes_.emplace_back("matmul_elementwise_add_mkldnn_fuse_pass");
+    passes_.emplace_back("operator_scale_onednn_fuse_pass");
+    passes_.emplace_back("operator_unsqueeze2_onednn_fuse_pass");
+    passes_.emplace_back("operator_reshape2_onednn_fuse_pass");
+    passes_.emplace_back("cpu_quantize_placement_pass");
+    passes_.emplace_back("cpu_quantize_pass");
+    passes_.emplace_back("cpu_quantize_squash_pass");
+    passes_.emplace_back("quant_transpose2_dequant_onednn_fuse_pass");
  }
  use_mkldnn_int8_ = true;
 #else

--- a/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc
+++ b/paddle/fluid/inference/tensorrt/dynamic_shape_infermeta.cc
@@ -122,7 +122,7 @@ static std::vector<ExprWrapper> DimsExprs2VecExprWrapper(
 ) {
  std::vector<ExprWrapper> x_dims_wrap;
  for (int i = 0; i < x_dims.nbDims; i++) {
-    x_dims_wrap.push_back(ExprWrapper(x_dims.d[i], &expr_builder));
+    x_dims_wrap.emplace_back(x_dims.d[i], &expr_builder);
  }
  return x_dims_wrap;
 }
@@ -643,7 +643,7 @@ nvinfer1::DimsExprs Conv2dFusionInferMeta(

  std::vector<ExprWrapper> paddings_wrap;
  for (size_t i = 0; i < paddings.size(); ++i) {
-    paddings_wrap.emplace_back(ExprWrapper(paddings[i], &expr_builder));
+    paddings_wrap.emplace_back(paddings[i], &expr_builder);
  }

  UpdatePaddingAndDilation(&paddings_wrap,

--- a/paddle/fluid/inference/utils/table_printer.cc
+++ b/paddle/fluid/inference/utils/table_printer.cc
@@ -92,7 +92,7 @@ void TablePrinter::InsertRow(const std::vector<std::string>& row) {
  size_t max_height = 0;

  for (size_t i = 0; i < row.size(); ++i) {
-    table_row.emplace_back(std::vector<std::string>());
+    table_row.emplace_back();
    std::stringstream ss(row[i]);
    std::string line;
    size_t max_width = 0;
@@ -113,7 +113,7 @@ void TablePrinter::InsertRow(const std::vector<std::string>& row) {

 void TablePrinter::InsetDivider() {
  heights_.emplace_back(1);
-  data_.emplace_back(std::vector<std::vector<std::string>>());
+  data_.emplace_back();
 }

 void TablePrinter::CalcLayout() {

--- a/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
+++ b/paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
@@ -260,7 +260,8 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
        op_item->dyn_cast<paddle::dialect::OpYamlInfoInterface>();
    std::unique_ptr<OpYamlInfoParser> op_info_parser;
    if (op_info_interface) {
-      op_info_parser.reset(new OpYamlInfoParser(op_info_interface.GetOpInfo()));
+      op_info_parser =
+          std::make_unique<OpYamlInfoParser>(op_info_interface.GetOpInfo());
    }

    std::string kernel_fn_str;
@@ -328,7 +329,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
                  ctx,
                  phi::TransToPhiPlace(kernel_key.backend()),
                  result_type.dyn_cast<dialect::SelectedRowsType>());
-          op_output_types.push_back(allocated_selected_rows_dtype);
+          op_output_types.emplace_back(allocated_selected_rows_dtype);
        } else {
          PADDLE_THROW(phi::errors::Unimplemented(
              "Result type only support DenseTensorType and VectorType"));
@@ -343,7 +344,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
      for (size_t i = 0; i < op_item->num_operands(); ++i) {
        auto cur_in = op_item->operand_source(i);
        if (!cur_in) {
-          vec_inputs.push_back(ir::OpResult());
+          vec_inputs.emplace_back();
          continue;
        }
        PADDLE_ENFORCE_EQ(map_value_pair.count(cur_in),

--- a/paddle/fluid/ir_adaptor/translator/op_translator.cc
+++ b/paddle/fluid/ir_adaptor/translator/op_translator.cc
@@ -388,7 +388,7 @@ std::vector<ir::OpResult> OpTranscriber::GenerateOperationInput(

    if (legacy_input_vars.empty()) {
      if (info.optional) {
-        op_inputs.push_back(ir::OpResult(nullptr));
+        op_inputs.emplace_back(nullptr);
        continue;
      }
    }
@@ -484,7 +484,7 @@ OpTranscriber::GenerateOperationOutput(ir::IrContext* ctx,
                 "Op %s arg %s should be optional if it can be empty",
                 op_desc.Type(),
                 legacy_output_name);
-      op_output_types.push_back(ir::Type(nullptr));
+      op_output_types.emplace_back(nullptr);
      continue;
    }

@@ -521,7 +521,7 @@ OpTranscriber::GenerateOperationOutput(ir::IrContext* ctx,
               << info.type_name << " " << legacy_output_name << " "
               << legacy_output_vars.size();
      if (legacy_output_vars.empty()) {
-        op_output_types.push_back(ir::Type(nullptr));
+        op_output_types.emplace_back(nullptr);
        continue;
      }

@@ -548,7 +548,7 @@ OpTranscriber::GenerateOperationOutput(ir::IrContext* ctx,
      std::vector<ir::Type> types;
      for (const auto& var_name : legacy_output_vars) {
        if (var_name == kEmptyVarName) {
-          types.push_back(ir::Type(nullptr));
+          types.emplace_back(nullptr);
          arg_to_idx[var_name] = cur_output_idx;
          continue;
        }

--- a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc
@@ -146,7 +146,7 @@ void VirtualMemoryAutoGrowthBestFitAllocator::ExtendAndMerge(size_t size) {
  allocations_.push_back(std::move(allocateptr));  // hold allocation

  if (all_blocks_.empty()) {
-    all_blocks_.push_back(Block(ptr, size, true));
+    all_blocks_.emplace_back(ptr, size, true);
    free_blocks_.emplace(std::make_pair(size, ptr), all_blocks_.begin());
    return;
  }
@@ -165,7 +165,7 @@ void VirtualMemoryAutoGrowthBestFitAllocator::ExtendAndMerge(size_t size) {
                               block_it);
        } else {
          // do not merge
-          all_blocks_.push_front(Block(ptr, size, true));
+          all_blocks_.emplace_back(ptr, size, true);
          free_blocks_.emplace(std::make_pair(size, ptr), all_blocks_.begin());
        }
      } else {
@@ -222,7 +222,7 @@ void VirtualMemoryAutoGrowthBestFitAllocator::ExtendAndMerge(size_t size) {
                         block_it);
  } else {
    // do not merge
-    all_blocks_.push_back(Block(ptr, size, true));
+    all_blocks_.emplace_back(ptr, size, true);
    auto block_it = all_blocks_.end();
    block_it--;
    free_blocks_.emplace(std::make_pair(size, ptr), block_it);

--- a/paddle/fluid/memory/malloc_test.cu
+++ b/paddle/fluid/memory/malloc_test.cu
@@ -193,8 +193,8 @@ TEST(Malloc, GPUContextMultiThreadMultiStream) {
            .get());
    ctx->PartialInitWithAllocator();
    dev_ctx.emplace_back(std::move(ctx));
-    threads.push_back(std::thread(
-        MultiStreamCompute, &data[i], &second_data[i], std::cref(*dev_ctx[i])));
+    threads.emplace_back(
+        MultiStreamCompute, &data[i], &second_data[i], std::cref(*dev_ctx[i]));
  }

  for (int i = 0; i < NUM_STREAMS; ++i) {

--- a/paddle/fluid/memory/stream_safe_cuda_alloc_test.cu
+++ b/paddle/fluid/memory/stream_safe_cuda_alloc_test.cu
@@ -289,8 +289,7 @@ class StreamSafeCUDAAllocTest : public ::testing::Test {
  void MultiThreadMultiStreamRun() {
    std::vector<std::thread> threads;
    for (size_t i = 0; i < stream_num_; ++i) {
-      threads.push_back(
-          std::thread(&StreamSafeCUDAAllocTest::SingleStreamRun, this, i));
+      threads.emplace_back(&StreamSafeCUDAAllocTest::SingleStreamRun, this, i);
    }
    for (size_t i = 0; i < stream_num_; ++i) {
      threads[i].join();

--- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
+++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc
@@ -124,7 +124,7 @@ class MultiGRUHandler {

    // Create attributes for each oneDNN gru
    for (int i = 0; i < 2 * layers_; ++i) {
-      attrs_.push_back(dnnl::primitive_attr());
+      attrs_.emplace_back();
    }

    if (is_int8) {

--- a/paddle/fluid/operators/math/tree2col.cc
+++ b/paddle/fluid/operators/math/tree2col.cc
@@ -26,8 +26,8 @@ std::vector<TreeNode> Tree2ColUtil::construct_patch(
  std::unordered_map<int, bool> visited;
  std::vector<TreeNode> patch;

-  stack.push(TreeNode(root, 1, 1, 0));
-  patch.emplace_back(TreeNode(root, 1, 1, 0));
+  stack.emplace(root, 1, 1, 0);
+  patch.emplace_back(root, 1, 1, 0);
  visited[root] = true;

  while (!stack.empty()) {
@@ -39,8 +39,8 @@ std::vector<TreeNode> Tree2ColUtil::construct_patch(
      size_t v = tr[node][i];
      if (!visited[v] && static_cast<int>(u.get_depth()) + 1 < max_depth) {
        visited[v] = true;
-        stack.push(TreeNode(v, i, sz, u.get_depth() + 1));
-        patch.push_back(TreeNode(v, i + 1, sz, u.get_depth() + 1));
+        stack.emplace(v, i, sz, u.get_depth() + 1);
+        patch.emplace_back(v, i + 1, sz, u.get_depth() + 1);
        end = false;
      }
    }

--- a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc
+++ b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc
@@ -44,104 +44,104 @@ TEST(SerializationLoggerTest, dump_case0) {
  std::list<DeviceTraceEvent> device_events;
  std::list<MemTraceEvent> mem_events;
  std::list<OperatorSupplementEvent> op_supplement_events;
-  host_events.push_back(HostTraceEvent(std::string("dataloader#1"),
-                                       TracerEventType::Dataloader,
-                                       1000,
-                                       10000,
-                                       10,
-                                       10));
-  host_events.push_back(HostTraceEvent(
-      std::string("op1"), TracerEventType::Operator, 11000, 20000, 10, 10));
-  host_events.push_back(HostTraceEvent(
-      std::string("op2"), TracerEventType::Operator, 21000, 30000, 10, 10));
-  host_events.push_back(HostTraceEvent(
-      std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11));
-  mem_events.push_back(MemTraceEvent(11500,
-                                     0x1000,
-                                     TracerMemEventType::Allocate,
-                                     10,
-                                     10,
-                                     50,
-                                     "GPU:0",
-                                     50,
-                                     50,
-                                     100,
-                                     100));
-  mem_events.push_back(MemTraceEvent(11900,
-                                     0x1000,
-                                     TracerMemEventType::Free,
-                                     10,
-                                     10,
-                                     -50,
-                                     "GPU:0",
-                                     0,
-                                     50,
-                                     100,
-                                     100));
+  host_events.emplace_back(std::string("dataloader#1"),
+                           TracerEventType::Dataloader,
+                           1000,
+                           10000,
+                           10,
+                           10);
+  host_events.emplace_back(
+      std::string("op1"), TracerEventType::Operator, 11000, 20000, 10, 10);
+  host_events.emplace_back(
+      std::string("op2"), TracerEventType::Operator, 21000, 30000, 10, 10);
+  host_events.emplace_back(
+      std::string("op3"), TracerEventType::Operator, 31000, 40000, 10, 11);
+  mem_events.emplace_back(11500,
+                          0x1000,
+                          TracerMemEventType::Allocate,
+                          10,
+                          10,
+                          50,
+                          "GPU:0",
+                          50,
+                          50,
+                          100,
+                          100);
+  mem_events.emplace_back(11900,
+                          0x1000,
+                          TracerMemEventType::Free,
+                          10,
+                          10,
+                          -50,
+                          "GPU:0",
+                          0,
+                          50,
+                          100,
+                          100);
  std::map<std::string, std::vector<std::vector<int64_t>>> input_shapes;
  std::map<std::string, std::vector<std::string>> dtypes;
  input_shapes[std::string("X")].push_back(std::vector<int64_t>{1, 2, 3});
  input_shapes[std::string("X")].push_back(std::vector<int64_t>{4, 5, 6, 7});
-  dtypes[std::string("X")].push_back(std::string("int8"));
-  dtypes[std::string("X")].push_back(std::string("float32"));
+  dtypes[std::string("X")].emplace_back("int8");
+  dtypes[std::string("X")].emplace_back("float32");
  AttributeMap attrs;
-  op_supplement_events.push_back(OperatorSupplementEvent(
-      11600, "op1", input_shapes, dtypes, "op1()", attrs, 0, 10, 10));
-  runtime_events.push_back(RuntimeTraceEvent(
-      std::string("cudalaunch1"), 15000, 17000, 10, 10, 1, 0));
-  runtime_events.push_back(RuntimeTraceEvent(
-      std::string("cudalaunch2"), 25000, 35000, 10, 10, 2, 0));
-  runtime_events.push_back(RuntimeTraceEvent(
-      std::string("cudalaunch3"), 33000, 37000, 10, 11, 3, 0));
-  runtime_events.push_back(RuntimeTraceEvent(
-      std::string("cudaMemcpy1"), 18000, 19000, 10, 10, 4, 0));
-  runtime_events.push_back(RuntimeTraceEvent(
-      std::string("cudaMemset1"), 38000, 39000, 10, 11, 5, 0));
-  device_events.push_back(DeviceTraceEvent(std::string("kernel1"),
-                                           TracerEventType::Kernel,
-                                           40000,
-                                           55000,
-                                           0,
-                                           10,
-                                           10,
-                                           1,
-                                           KernelEventInfo()));
-  device_events.push_back(DeviceTraceEvent(std::string("kernel2"),
-                                           TracerEventType::Kernel,
-                                           70000,
-                                           95000,
-                                           0,
-                                           10,
-                                           10,
-                                           2,
-                                           KernelEventInfo()));
-  device_events.push_back(DeviceTraceEvent(std::string("kernel3"),
-                                           TracerEventType::Kernel,
-                                           60000,
-                                           65000,
-                                           0,
-                                           10,
-                                           11,
-                                           3,
-                                           KernelEventInfo()));
-  device_events.push_back(DeviceTraceEvent(std::string("memcpy1"),
-                                           TracerEventType::Memcpy,
-                                           56000,
-                                           59000,
-                                           0,
-                                           10,
-                                           10,
-                                           4,
-                                           MemcpyEventInfo()));
-  device_events.push_back(DeviceTraceEvent(std::string("memset1"),
-                                           TracerEventType::Memset,
-                                           66000,
-                                           69000,
-                                           0,
-                                           10,
-                                           11,
-                                           5,
-                                           MemsetEventInfo()));
+  op_supplement_events.emplace_back(
+      11600, "op1", input_shapes, dtypes, "op1()", attrs, 0, 10, 10);
+  runtime_events.emplace_back(
+      std::string("cudalaunch1"), 15000, 17000, 10, 10, 1, 0);
+  runtime_events.emplace_back(
+      std::string("cudalaunch2"), 25000, 35000, 10, 10, 2, 0);
+  runtime_events.emplace_back(
+      std::string("cudalaunch3"), 33000, 37000, 10, 11, 3, 0);
+  runtime_events.emplace_back(
+      std::string("cudaMemcpy1"), 18000, 19000, 10, 10, 4, 0);
+  runtime_events.emplace_back(
+      std::string("cudaMemset1"), 38000, 39000, 10, 11, 5, 0);
+  device_events.emplace_back(std::string("kernel1"),
+                             TracerEventType::Kernel,
+                             40000,
+                             55000,
+                             0,
+                             10,
+                             10,
+                             1,
+                             KernelEventInfo());
+  device_events.emplace_back(std::string("kernel2"),
+                             TracerEventType::Kernel,
+                             70000,
+                             95000,
+                             0,
+                             10,
+                             10,
+                             2,
+                             KernelEventInfo());
+  device_events.emplace_back(std::string("kernel3"),
+                             TracerEventType::Kernel,
+                             60000,
+                             65000,
+                             0,
+                             10,
+                             11,
+                             3,
+                             KernelEventInfo());
+  device_events.emplace_back(std::string("memcpy1"),
+                             TracerEventType::Memcpy,
+                             56000,
+                             59000,
+                             0,
+                             10,
+                             10,
+                             4,
+                             MemcpyEventInfo());
+  device_events.emplace_back(std::string("memset1"),
+                             TracerEventType::Memset,
+                             66000,
+                             69000,
+                             0,
+                             10,
+                             11,
+                             5,
+                             MemsetEventInfo());
  SerializationLogger logger("test_serialization_logger_case0.pb");
  logger.LogMetaInfo(std::string("1.0.2"), 0);
  NodeTrees tree(host_events,
@@ -182,61 +182,61 @@ TEST(SerializationLoggerTest, dump_case1) {
  std::list<DeviceTraceEvent> device_events;
  std::list<MemTraceEvent> mem_events;
  std::list<OperatorSupplementEvent> op_supplement_events;
-  runtime_events.push_back(RuntimeTraceEvent(
-      std::string("cudalaunch1"), 15000, 17000, 10, 10, 1, 0));
-  runtime_events.push_back(RuntimeTraceEvent(
-      std::string("cudalaunch2"), 25000, 35000, 10, 10, 2, 0));
-  runtime_events.push_back(RuntimeTraceEvent(
-      std::string("cudalaunch3"), 33000, 37000, 10, 11, 3, 0));
-  runtime_events.push_back(RuntimeTraceEvent(
-      std::string("cudaMemcpy1"), 18000, 19000, 10, 10, 4, 0));
-  runtime_events.push_back(RuntimeTraceEvent(
-      std::string("cudaMemset1"), 38000, 39000, 10, 11, 5, 0));
-  device_events.push_back(DeviceTraceEvent(std::string("kernel1"),
-                                           TracerEventType::Kernel,
-                                           40000,
-                                           55000,
-                                           0,
-                                           10,
-                                           10,
-                                           1,
-                                           KernelEventInfo()));
-  device_events.push_back(DeviceTraceEvent(std::string("kernel2"),
-                                           TracerEventType::Kernel,
-                                           70000,
-                                           95000,
-                                           0,
-                                           10,
-                                           10,
-                                           2,
-                                           KernelEventInfo()));
-  device_events.push_back(DeviceTraceEvent(std::string("kernel3"),
-                                           TracerEventType::Kernel,
-                                           60000,
-                                           65000,
-                                           0,
-                                           10,
-                                           11,
-                                           3,
-                                           KernelEventInfo()));
-  device_events.push_back(DeviceTraceEvent(std::string("memcpy1"),
-                                           TracerEventType::Memcpy,
-                                           56000,
-                                           59000,
-                                           0,
-                                           10,
-                                           10,
-                                           4,
-                                           MemcpyEventInfo()));
-  device_events.push_back(DeviceTraceEvent(std::string("memset1"),
-                                           TracerEventType::Memset,
-                                           66000,
-                                           69000,
-                                           0,
-                                           10,
-                                           11,
-                                           5,
-                                           MemsetEventInfo()));
+  runtime_events.emplace_back(
+      std::string("cudalaunch1"), 15000, 17000, 10, 10, 1, 0);
+  runtime_events.emplace_back(
+      std::string("cudalaunch2"), 25000, 35000, 10, 10, 2, 0);
+  runtime_events.emplace_back(
+      std::string("cudalaunch3"), 33000, 37000, 10, 11, 3, 0);
+  runtime_events.emplace_back(
+      std::string("cudaMemcpy1"), 18000, 19000, 10, 10, 4, 0);
+  runtime_events.emplace_back(
+      std::string("cudaMemset1"), 38000, 39000, 10, 11, 5, 0);
+  device_events.emplace_back(std::string("kernel1"),
+                             TracerEventType::Kernel,
+                             40000,
+                             55000,
+                             0,
+                             10,
+                             10,
+                             1,
+                             KernelEventInfo());
+  device_events.emplace_back(std::string("kernel2"),
+                             TracerEventType::Kernel,
+                             70000,
+                             95000,
+                             0,
+                             10,
+                             10,
+                             2,
+                             KernelEventInfo());
+  device_events.emplace_back(std::string("kernel3"),
+                             TracerEventType::Kernel,
+                             60000,
+                             65000,
+                             0,
+                             10,
+                             11,
+                             3,
+                             KernelEventInfo());
+  device_events.emplace_back(std::string("memcpy1"),
+                             TracerEventType::Memcpy,
+                             56000,
+                             59000,
+                             0,
+                             10,
+                             10,
+                             4,
+                             MemcpyEventInfo());
+  device_events.emplace_back(std::string("memset1"),
+                             TracerEventType::Memset,
+                             66000,
+                             69000,
+                             0,
+                             10,
+                             11,
+                             5,
+                             MemsetEventInfo());
  SerializationLogger logger("test_serialization_logger_case1.pb");
  logger.LogMetaInfo(std::string("1.0.2"), 0);
  NodeTrees tree(host_events,

--- a/paddle/fluid/platform/profiler/test_event_node.cc
+++ b/paddle/fluid/platform/profiler/test_event_node.cc
--- a/paddle/ir/core/ir_printer.cc
+++ b/paddle/ir/core/ir_printer.cc
@@ -257,7 +257,7 @@ void IrPrinter::PrintOperandsType(const Operation* op) {
    if (op_operand) {
      op_operand_types.push_back(op_operand.type());
    } else {
-      op_operand_types.push_back(Type());
+      op_operand_types.emplace_back();
    }
  }
  os << " (";
@@ -278,7 +278,7 @@ void IrPrinter::PrintOpReturnType(const Operation* op) {
    if (op_result) {
      op_result_types.push_back(op_result.type());
    } else {
-      op_result_types.push_back(Type(nullptr));
+      op_result_types.emplace_back(nullptr);
    }
  }
  PrintInterleave(

--- a/paddle/phi/api/lib/op_meta_info.cc
+++ b/paddle/phi/api/lib/op_meta_info.cc
@@ -86,13 +86,13 @@ PADDLE_API void AssignTensorImpl(const Tensor& src, Tensor* dst) {
 void CustomOpKernelContext::EmplaceBackInput(Tensor&& input) {
  size_t index = inputs_.size();
  inputs_.emplace_back(input);
-  input_range_.emplace_back(std::make_pair(index, index + 1));
+  input_range_.emplace_back(index, index + 1);
 }

 void CustomOpKernelContext::EmplaceBackInputs(
    const std::vector<Tensor>& inputs) {
  size_t index = inputs_.size();
-  input_range_.emplace_back(std::make_pair(index, index + inputs.size()));
+  input_range_.emplace_back(index, index + inputs.size());
  inputs_.insert(inputs_.end(),
                 std::make_move_iterator(inputs.begin()),
                 std::make_move_iterator(inputs.end()));
@@ -101,13 +101,13 @@ void CustomOpKernelContext::EmplaceBackInputs(
 void CustomOpKernelContext::EmplaceBackOutput(Tensor&& output) {
  size_t index = outputs_.size();
  outputs_.emplace_back(output);
-  output_range_.emplace_back(std::make_pair(index, index + 1));
+  output_range_.emplace_back(index, index + 1);
 }

 void CustomOpKernelContext::EmplaceBackOutputs(
    const std::vector<Tensor>& outputs) {
  size_t index = outputs_.size();
-  output_range_.emplace_back(std::make_pair(index, index + outputs.size()));
+  output_range_.emplace_back(index, index + outputs.size());
  outputs_.insert(outputs_.end(),
                  std::make_move_iterator(outputs.begin()),
                  std::make_move_iterator(outputs.end()));

--- a/paddle/utils/string/string_helper_test.cc
+++ b/paddle/utils/string/string_helper_test.cc
@@ -41,8 +41,8 @@ TEST(StringHelper, FormatStringAppend) {

 TEST(StringHelper, JoinStrings) {
  std::vector<std::string> v;
-  v.push_back("hello");
-  v.push_back("world");
+  v.emplace_back("hello");
+  v.emplace_back("world");

  std::string result = paddle::string::join_strings(v, ' ');
  EXPECT_EQ(result, "hello world");

--- a/test/cpp/fluid/reader/reader_blocking_queue_test.cc
+++ b/test/cpp/fluid/reader/reader_blocking_queue_test.cc
@@ -146,18 +146,18 @@ void MultiSenderMultiReceiver(const size_t queue_cap,
  size_t sender_num = to_send.size();
  std::vector<std::thread> senders;
  for (size_t s_idx = 0; s_idx < sender_num; ++s_idx) {
-    senders.emplace_back(std::thread([&, s_idx] {
+    senders.emplace_back([&, s_idx] {
      for (size_t elem : to_send[s_idx]) {
        std::this_thread::sleep_for(std::chrono::milliseconds(send_time_gap));
        EXPECT_TRUE(q.Send(elem));
      }
-    }));
+    });
  }
  std::vector<std::thread> receivers;
  std::mutex mu;
  std::vector<std::vector<size_t>> res;
  for (size_t r_idx = 0; r_idx < receiver_num; ++r_idx) {
-    receivers.emplace_back(std::thread([&] {
+    receivers.emplace_back([&] {
      std::vector<size_t> receiver_res;
      while (true) {
        std::this_thread::sleep_for(
@@ -170,7 +170,7 @@ void MultiSenderMultiReceiver(const size_t queue_cap,
      }
      std::lock_guard<std::mutex> lock(mu);
      res.push_back(receiver_res);
-    }));
+    });
  }
  for (auto& t : senders) {
    t.join();

--- a/test/cpp/phi/kernels/test_fused_adam_kernel.cc
+++ b/test/cpp/phi/kernels/test_fused_adam_kernel.cc
@@ -89,7 +89,7 @@ static auto ToMutableTensorPtrVector(
 static auto ToMetaTensorVector(const std::vector<DenseTensor> &tensors) {
  std::vector<MetaTensor> results;
  for (auto &t : tensors) {
-    results.push_back(t);
+    results.emplace_back(t);
  }
  return results;
 }