[clang-tidy] NO.8 enable `cppcoreguidelines-narrowing-conversions`. step:2 (#56895)

* [clang-tidy] replenish cppcoreguidelines-narrowing-conversions * fix * fix

[clang-tidy] NO.8 enable `cppcoreguidelines-narrowing-conversions`. step:2 (#56895)
* [clang-tidy] replenish cppcoreguidelines-narrowing-conversions * fix * fix
c2f0e9c4 · gouzil · GitHub · e9e07a19 · c2f0e9c4 · c2f0e9c4
103 changed file
--- a/paddle/fluid/distributed/auto_parallel/dist_attr.cc
+++ b/paddle/fluid/distributed/auto_parallel/dist_attr.cc
@@ -378,13 +378,13 @@ std::string OperatorDistAttr::to_string() const {
 }

 void OperatorDistAttr::from_proto(const OperatorDistAttrProto& proto) {
-  for (int64_t i = 0; i < proto.input_dist_attrs_size(); ++i) {
+  for (int i = 0; i < proto.input_dist_attrs_size(); ++i) {
    TensorDistAttr dist_attr;
    std::string name = proto.input_dist_attrs(i).name();
    dist_attr.from_proto(proto.input_dist_attrs(i).tensor_dist_attr());
    input_dist_attrs_[name] = dist_attr;
  }
-  for (int64_t i = 0; i < proto.output_dist_attrs_size(); ++i) {
+  for (int i = 0; i < proto.output_dist_attrs_size(); ++i) {
    TensorDistAttr dist_attr;
    std::string name = proto.output_dist_attrs(i).name();
    dist_attr.from_proto(proto.output_dist_attrs(i).tensor_dist_attr());

--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/common.cc
@@ -223,7 +223,7 @@ std::vector<int64_t> GetDimsMappingForAxes(
    const std::unordered_map<std::string, int64_t>& axis_to_dim_map,
    const bool unsharded_miss_axis) {
  std::vector<int64_t> dims_mapping;
-  for (int64_t i = 0, n = axes.size(); i < n; i++) {
+  for (int64_t i = 0, n = static_cast<int64_t>(axes.size()); i < n; i++) {
    std::string axis = axes.substr(i, 1);
    if (axis == "1") {
      dims_mapping.emplace_back(-1);

--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/cross_entropy_with_softmax_spmd_rule.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/cross_entropy_with_softmax_spmd_rule.cc
@@ -34,7 +34,7 @@ CrossEntropyWithSoftmaxSPMDRule::InferForward(
                                   input_specs_size));

  auto x_shape = input_specs[0].shape();
-  int x_ndim = x_shape.size();
+  int x_ndim = static_cast<int>(x_shape.size());
  auto x_dist_attr_src = input_specs[0].dist_attr();
  std::vector<int64_t> x_dims_mapping_src = x_dist_attr_src.dims_mapping();


--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/dim_trans.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/dim_trans.cc
@@ -85,7 +85,7 @@ void Flatten::set_inputs(const std::vector<DimTrans*>& dims) {

 std::string Flatten::to_string() {
  std::string ret_str("Flatten(");
-  for (int64_t i = 0, n = input_dims_.size(); i < n; ++i) {
+  for (int i = 0, n = static_cast<int>(input_dims_.size()); i < n; ++i) {
    ret_str += input_dims_[i]->to_string();
    if (i < n - 1) {
      ret_str += ",";
@@ -125,7 +125,7 @@ int64_t Split::local_splitted_shape_value() {
 std::string Split::to_string() {
  std::string ret_str("Split(");
  ret_str += input_dim_trans_->to_string() + ", (";
-  for (int64_t i = 0, n = splitted_shape_.size(); i < n; ++i) {
+  for (int i = 0, n = static_cast<int>(splitted_shape_.size()); i < n; ++i) {
    ret_str += std::to_string(splitted_shape_[i]);
    if (i < n - 1) {
      ret_str += ",";
@@ -161,9 +161,9 @@ DimTrans* make_split(DimTrans* dim,
    std::vector<int64_t> new_shape;
    // map between from idx in shape to new_shape
    std::vector<int64_t> idx_map(shape.size(), -1);
-    for (int64_t i = 0, n = shape.size(); i < n; ++i) {
+    for (int i = 0, n = static_cast<int>(shape.size()); i < n; ++i) {
      if (shape[id] != 1) {
-        idx_map[i] = new_shape.size();
+        idx_map[i] = static_cast<int64_t>(new_shape.size());
        new_shape.emplace_back(shape[i]);
      }
    }
@@ -173,7 +173,8 @@ DimTrans* make_split(DimTrans* dim,
 }

 void CleanUp() {
-  for (int64_t i = 0, n = all_dim_trans.size(); i < n; i++) {
+  int n = static_cast<int>(all_dim_trans.size());
+  for (int i = 0; i < n; i++) {
    if (all_dim_trans[i]) {
      delete all_dim_trans[i];
      all_dim_trans[i] = nullptr;
@@ -210,8 +211,8 @@ DimTrans* GetDimTrans(DimTrans* dim_trans,
  } else if (type == DimTrans::Type::FLATTEN) {
    Flatten* flatten = dynamic_cast<Flatten*>(dim_trans);
    const std::vector<DimTrans*>& inputs = flatten->inputs();
-    int64_t nmesh = (*shardable)[0].size();
-    for (int64_t i = 1, n = inputs.size(); i < n; i++) {
+    int64_t nmesh = (*shardable)[0].size();  // NOLINT
+    for (int i = 1, n = static_cast<int>(inputs.size()); i < n; i++) {
      DimTrans* input = inputs[i];
      if (input->type() == DimTrans::Type::INPUTDIM) {
        InputDim* inputdim = dynamic_cast<InputDim*>(input);
@@ -252,7 +253,7 @@ DimTrans* GetDimTrans(DimTrans* dim_trans,
                          phi::errors::InvalidArgument(
                              "The returned dim_trans must be INPUTDIM."));
        InputDim* inputdim = dynamic_cast<InputDim*>(dim);
-        int64_t nmesh = mesh_shape.size();
+        int64_t nmesh = static_cast<int64_t>(mesh_shape.size());
        int64_t input_axis = inputdim->input_dim();

        // Check whether the sharded dim can be sharded on
@@ -295,13 +296,15 @@ std::vector<std::vector<int64_t>> InferFromDimTrans(
  const std::vector<int64_t>& mesh_shape = mesh.shape();

  std::set<int64_t> sharded_input_dims;
-  for (int64_t i = 0, n = input_dims_mapping.size(); i < n; ++i) {
+  for (int64_t i = 0, n = static_cast<int64_t>(input_dims_mapping.size());
+       i < n;
+       ++i) {
    if (input_dims_mapping[i] > -1) {
      sharded_input_dims.insert(i);
    }
  }
-  int64_t ndim = input_shape.size();
-  int64_t nmesh = mesh_shape.size();
+  int64_t ndim = static_cast<int64_t>(input_shape.size());
+  int64_t nmesh = static_cast<int64_t>(mesh_shape.size());
  std::vector<std::vector<bool>> shardable(ndim,
                                           std::vector<bool>(nmesh, true));

@@ -319,7 +322,7 @@ std::vector<std::vector<int64_t>> InferFromDimTrans(

  // get the map from sharded input dimensions to output dimensions.
  std::vector<int64_t> dim_map_src2tgt(ndim, -1);
-  for (int64_t i = 0, n = dim_trans.size(); i < n; i++) {
+  for (int64_t i = 0, n = static_cast<int64_t>(dim_trans.size()); i < n; i++) {
    DimTrans* dim = GetDimTrans(dim_trans[i],
                                &shardable,
                                &seen_input_dims,

--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/elementwise_spmd_rule.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/elementwise_spmd_rule.cc
@@ -25,7 +25,7 @@ ElementwiseSPMDRule::InferForward(
    const std::vector<DistTensorSpec>& input_specs,
    const paddle::framework::AttributeMap& attrs) {
  // step0: Verify Input Args Based on Elementwise Logic
-  int64_t ninputs = input_specs.size();
+  int64_t ninputs = static_cast<int64_t>(input_specs.size());
  PADDLE_ENFORCE_GT(
      ninputs,
      0,
@@ -39,7 +39,7 @@ ElementwiseSPMDRule::InferForward(
  std::vector<std::string> input_axes_vec;
  int64_t max_ndim = 0;
  for (int64_t i = 0; i < ninputs; ++i) {
-    int64_t ndim = input_specs[i].shape().size();
+    int64_t ndim = static_cast<int64_t>(input_specs[i].shape().size());
    if (ndim > max_ndim) {
      max_ndim = ndim;
    }
@@ -49,7 +49,7 @@ ElementwiseSPMDRule::InferForward(
  std::vector<int64_t> broadcast_axis_count(max_ndim, 0);
  for (int64_t i = 0; i < ninputs; ++i) {
    std::vector<int64_t> shape = input_specs[i].shape();
-    int64_t ndim = shape.size();
+    int64_t ndim = static_cast<int64_t>(shape.size());
    int64_t start_dim = max_ndim - ndim;
    std::string axes_notation = GetBroadcastAxes(ndim, max_ndim, alphabet);
    if (ninputs > 1) {

--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/embedding_spmd_rule.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/embedding_spmd_rule.cc
@@ -33,8 +33,8 @@ EmbeddingSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
          input_specs_size));
  auto x_shape = input_specs[0].shape();
  auto weight_shape = input_specs[1].shape();
-  int x_ndim = x_shape.size();
-  int weight_ndim = weight_shape.size();
+  int x_ndim = static_cast<int>(x_shape.size());
+  int weight_ndim = static_cast<int>(weight_shape.size());
  auto x_dist_attr_src = input_specs[0].dist_attr();
  auto weight_dist_attr_src = input_specs[1].dist_attr();
  std::vector<int64_t> x_dims_mapping = x_dist_attr_src.dims_mapping();
@@ -170,9 +170,9 @@ EmbeddingSPMDRule::InferBackward(
          output_specs_size));

  auto x_shape = input_specs[0].shape();
-  int x_ndim = x_shape.size();
+  int x_ndim = static_cast<int>(x_shape.size());
  auto out_shape = output_specs[0].shape();
-  int out_ndim = out_shape.size();
+  int out_ndim = static_cast<int>(out_shape.size());

  PADDLE_ENFORCE_EQ(x_ndim,
                    out_ndim - 1,

--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/layer_norm_spmd_rule.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/layer_norm_spmd_rule.cc
@@ -34,9 +34,9 @@ LayerNormSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
  auto x_shape = input_specs[0].shape();
  auto scale_shape = input_specs[1].shape();
  auto bias_shape = input_specs[2].shape();
-  int x_ndim = x_shape.size();
-  int scale_ndim = scale_shape.size();
-  int bias_ndim = bias_shape.size();
+  int x_ndim = static_cast<int>(x_shape.size());
+  int scale_ndim = static_cast<int>(scale_shape.size());
+  int bias_ndim = static_cast<int>(bias_shape.size());

  PADDLE_ENFORCE_EQ(
      scale_ndim,

--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/reduction_spmd_rule.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/reduction_spmd_rule.cc
@@ -26,7 +26,7 @@ std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
 ReductionSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
                                const paddle::framework::AttributeMap& attrs) {
  // step0: Verify Input Args Based on Elementwise Logic
-  int64_t ninputs = input_specs.size();
+  int64_t ninputs = static_cast<int64_t>(input_specs.size());
  PADDLE_ENFORCE_EQ(
      ninputs,
      1,
@@ -42,7 +42,7 @@ ReductionSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
  std::string alphabet = "abcdefghijklmnopqrstuvwxyz";

  // get einsum notation for input
-  int64_t ndim = input_specs[0].shape().size();
+  int64_t ndim = static_cast<int64_t>(input_specs[0].shape().size());
  std::vector<std::string> input_axes_vec;
  std::string input_axes = alphabet.substr(0, ndim);
  input_axes_vec.emplace_back(input_axes);

--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/reshape_spmd_rule.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/reshape_spmd_rule.cc
@@ -28,7 +28,7 @@ using phi::distributed::auto_parallel::str_join;
 std::vector<int64_t> InferTargetShape(const std::vector<int64_t>& shape,
                                      int64_t len) {
  int64_t infer_idx = -1;
-  for (int64_t i = 0, n = shape.size(); i < n; i++) {
+  for (int64_t i = 0, n = static_cast<int64_t>(shape.size()); i < n; i++) {
    if (shape[i] == -1) {
      PADDLE_ENFORCE_EQ(
          infer_idx,
@@ -74,8 +74,8 @@ std::vector<DimTrans*> MakeReshapeDimTrans(
  int64_t src_idx = 0, tgt_idx = 0;
  int64_t s, t;
  int64_t src_len, tgt_len;
-  src_len = src_shape.size();
-  tgt_len = inferred_tgt_shape.size();
+  src_len = static_cast<int64_t>(src_shape.size());
+  tgt_len = static_cast<int64_t>(inferred_tgt_shape.size());
  while (src_idx < src_len || tgt_idx < tgt_len) {
    std::vector<int64_t> src_dims, tgt_splitted_shape;
    if (src_idx >= src_len) {
@@ -125,7 +125,9 @@ std::vector<DimTrans*> MakeReshapeDimTrans(
      }
      DimTrans* flatten = make_flatten(input_dims);

-      for (int64_t i = 0, n = tgt_splitted_shape.size(); i < n; i++) {
+      for (int64_t i = 0, n = static_cast<int64_t>(tgt_splitted_shape.size());
+           i < n;
+           i++) {
        ret.emplace_back(make_split(flatten, tgt_splitted_shape, i));
      }
    }
@@ -155,7 +157,7 @@ paddle::distributed::auto_parallel::ReshapeSPMDRule::InferForward(

  // handle the '0' values in target shape, '0' indicates
  // that the target shape is equal to the source shape
-  for (int64_t i = 0, n = tgt_shape.size(); i < n; i++) {
+  for (int64_t i = 0, n = static_cast<int64_t>(tgt_shape.size()); i < n; i++) {
    if (tgt_shape[i] == 0) {
      tgt_shape[i] = src_shape[i];
    }
@@ -178,7 +180,7 @@ paddle::distributed::auto_parallel::ReshapeSPMDRule::InferForward(
  VLOG(4) << "Reshape: input_shape: [" << str_join(src_shape)
          << "] output_shape: [" << str_join(tgt_shape) << "]";
  VLOG(4) << "Transformation from input to output:";
-  for (int64_t i = 0, n = trans.size(); i < n; i++) {
+  for (int64_t i = 0, n = static_cast<int64_t>(trans.size()); i < n; i++) {
    DimTrans* t = trans[i];
    VLOG(4) << "\tOutput axis " << i << ": " << t->to_string();
  }

--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/softmax_spmd_rule.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/softmax_spmd_rule.cc
@@ -33,7 +33,7 @@ SoftmaxSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
          input_specs_size));

  auto x_shape = input_specs[0].shape();
-  int x_ndim = x_shape.size();
+  int x_ndim = static_cast<int>(x_shape.size());
  auto x_dist_attr_src = input_specs[0].dist_attr();
  std::vector<int64_t> x_dims_mapping = x_dist_attr_src.dims_mapping();


--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/split_spmd_rule.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/split_spmd_rule.cc
@@ -27,7 +27,7 @@ std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
 SplitSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
                            const paddle::framework::AttributeMap& attrs) {
  // step0: Verify Input Args Based on Elementwise Logic
-  int64_t ninputs = input_specs.size();
+  int64_t ninputs = static_cast<int64_t>(input_specs.size());
  PADDLE_ENFORCE_EQ(
      ninputs,
      1,
@@ -37,7 +37,7 @@ SplitSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
  VerifySpecs(input_specs, "split");

  // step1: Build Einsum Notation
-  int64_t ndim = input_specs[0].shape().size();
+  int64_t ndim = static_cast<int64_t>(input_specs[0].shape().size());
  int64_t noutput = 0;
  // split api uses num or sections as attribute
  if (attrs.find("num") != attrs.end()) {
@@ -45,7 +45,7 @@ SplitSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
  } else if (attrs.find("sections") != attrs.end()) {
    std::vector<int64_t> sections =
        ExtractAttr<std::vector<int64_t>>("sections", attrs);
-    noutput = sections.size();
+    noutput = static_cast<int64_t>(sections.size());
  }
  int64_t axis = ExtractAttr<int>("axis", attrs);
  if (axis < 0) {

--- a/paddle/fluid/distributed/auto_parallel/spmd_rules/transpose_spmd_rule.cc
+++ b/paddle/fluid/distributed/auto_parallel/spmd_rules/transpose_spmd_rule.cc
@@ -23,7 +23,7 @@ std::pair<std::vector<TensorDistAttr>, std::vector<TensorDistAttr>>
 TransposeSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
                                const paddle::framework::AttributeMap& attrs) {
  // step0: Verify Input Args Based on Transpose Logic
-  int64_t ninputs = input_specs.size();
+  int64_t ninputs = static_cast<int64_t>(input_specs.size());
  PADDLE_ENFORCE_EQ(
      ninputs,
      1,
@@ -38,13 +38,13 @@ TransposeSPMDRule::InferForward(const std::vector<DistTensorSpec>& input_specs,
  std::string alphabet = "abcdefghijklmnopqrstuvwxyz";

  // get einsum notation for input
-  int64_t ndim = input_specs[0].shape().size();
+  int64_t ndim = static_cast<int64_t>(input_specs[0].shape().size());
  std::vector<std::string> input_axes_vec;
  std::string input_axes = alphabet.substr(0, ndim);
  input_axes_vec.emplace_back(input_axes);

  // get einsum notation for output
-  for (int64_t i = 0, n = perm_dims.size(); i < n; ++i) {
+  for (int64_t i = 0, n = static_cast<int64_t>(perm_dims.size()); i < n; ++i) {
    // convert the negative dim value to normal dim value
    if (perm_dims[i] < 0) {
      perm_dims[i] = ndim + perm_dims[i];

--- a/paddle/fluid/distributed/collective/reducer.cc
+++ b/paddle/fluid/distributed/collective/reducer.cc
@@ -953,9 +953,9 @@ void EagerReducer::MarkGroupReady(size_t group_index) {
       ++next_group_) {
    UNUSED auto &group = groups_[next_group_];
    if (group.is_sparse_) {
-      AllReduceSparse(&group, next_group_);
+      AllReduceSparse(&group, static_cast<int>(next_group_));
    } else {
-      FusedAllReduceSchedule(&group, next_group_);
+      FusedAllReduceSchedule(&group, static_cast<int>(next_group_));
    }
  }
 }
@@ -1078,7 +1078,7 @@ void EagerReducer::FusedAllReduceSchedule(EagerGroup *group,

  // div nranks
  paddle::experimental::scale_(
-      group->dense_contents_, 1.0 / nranks_, 0.0, false);
+      group->dense_contents_, 1.0 / nranks_, 0.0, false);  // NOLINT

  // all_reduce
  std::vector<Tensor> reduce_tensors = {group->dense_contents_};
@@ -1104,7 +1104,8 @@ void EagerReducer::AllReduceSparse(EagerGroup *group,
                                   const int curr_group_index) {
  // div nranks
  Tensor sparse_tensor(group->sparse_contents_);
-  paddle::experimental::scale_(sparse_tensor, 1.0 / nranks_, 0.0, false);
+  paddle::experimental::scale_(
+      sparse_tensor, 1.0 / nranks_, 0.0, false);  // NOLINT

  VLOG(3) << "sparse_group [" << curr_group_index << "] start allreduce.";


--- a/paddle/fluid/distributed/fleet_executor/carrier.cc
+++ b/paddle/fluid/distributed/fleet_executor/carrier.cc
@@ -260,7 +260,8 @@ Interceptor* Carrier::SetInterceptor(int64_t interceptor_id,
  interceptor->RegisterCarrier(this);

  // TODO(fleet_exe dev): get loop
-  auto* loop = thread_pool_.GetLoop(interceptor_id % thread_num_);
+  auto* loop =
+      thread_pool_.GetLoop(static_cast<int>(interceptor_id % thread_num_));
  PADDLE_ENFORCE_NOT_NULL(
      loop, platform::errors::Fatal("thread task loop must not null"));
  interceptor->RegisterTaskLoop(loop);
@@ -296,7 +297,7 @@ void Carrier::CreateInterceptors(
  auto gc = GetGC(place_);

  // create source and sink task node
-  auto max_run_times = microbatch_scopes_.size();
+  int64_t max_run_times = static_cast<int64_t>(microbatch_scopes_.size());
  TaskNode* source = new TaskNode(
      rank_, SOURCE_ID, max_run_times);  // rank, task_id, max_run_times
  TaskNode* sink = new TaskNode(rank_, SINK_ID, max_run_times);

--- a/paddle/fluid/distributed/fleet_executor/dist_model.cc
+++ b/paddle/fluid/distributed/fleet_executor/dist_model.cc
@@ -242,7 +242,8 @@ bool DistModel::CommInit() {
  std::string var_name_base = "comm_init_";
  for (int64_t ring_id : ring_ids) {
    VLOG(3) << "Init comm for ring id: " << ring_id;
-    int64_t ranks_in_group = config_.ring_id_to_ranks_[ring_id].size();
+    int64_t ranks_in_group =
+        static_cast<int64_t>(config_.ring_id_to_ranks_[ring_id].size());
    int64_t rank_in_group = 0;
    std::vector<int64_t> &ranks = config_.ring_id_to_ranks_[ring_id];
    for (int64_t rank : ranks) {
@@ -259,11 +260,11 @@ bool DistModel::CommInit() {
      peer_endpoints.emplace_back(config_.trainer_endpoints[rank]);
    }
    InsertCommOp(var_name_base + std::to_string(order),
-                 ranks_in_group,
-                 rank_in_group,
+                 static_cast<int>(ranks_in_group),
+                 static_cast<int>(rank_in_group),
                 peer_endpoints,
                 comm_init_block,
-                 ring_id);
+                 static_cast<int>(ring_id));
    order += 1;
  }
  framework::NaiveExecutor e(place_);
@@ -408,7 +409,7 @@ bool DistModel::LoadProgram() {
  fin.seekg(0, std::ios::end);
  pb_content.resize(fin.tellg());
  fin.seekg(0, std::ios::beg);
-  fin.read(&(pb_content.at(0)), pb_content.size());
+  fin.read(&(pb_content.at(0)), pb_content.size());  // NOLINT
  fin.close();
  program_proto.ParseFromString(pb_content);
  VLOG(5) << pb_content;
@@ -582,7 +583,7 @@ bool DistModel::FeedData(const std::vector<DistModelTensor> &input_data,
                 << DistModelDTypeToString(input_data[i].dtype) << ".";
      return false;
    }
-    int feed_idx = feed_names_[target_name];
+    int feed_idx = static_cast<int>(feed_names_[target_name]);
    framework::SetFeedVariable(scope, *input_tensor, "feed", feed_idx);
  }
  return true;

--- a/paddle/fluid/distributed/fleet_executor/dist_model.h
+++ b/paddle/fluid/distributed/fleet_executor/dist_model.h
@@ -43,7 +43,7 @@ struct DistModelConfig {
  framework::ProgramDesc* program_desc{nullptr};
  framework::Scope* scope{nullptr};
  std::string place{};
-  int64_t device_id{0};
+  int device_id{0};
  std::string device_type{};
  std::vector<std::string> trainer_endpoints{};
  std::string current_endpoint{};

--- a/paddle/fluid/framework/version.cc
+++ b/paddle/fluid/framework/version.cc
@@ -39,10 +39,11 @@ bool IsTensorVersionSupported(uint32_t version) {

 std::string DumpVersion(const int64_t version) {
  std::stringstream buffer;
-  const int major = version / MAJOR_COEFF;
-  const int minor = (version - major * MAJOR_COEFF) / MINOR_COEFF;
-  const int patch =
-      (version - major * MAJOR_COEFF - minor * MINOR_COEFF) / PATCH_COEFF;
+  const int major = static_cast<int>(version / MAJOR_COEFF);
+  const int minor =
+      static_cast<int>((version - major * MAJOR_COEFF) / MINOR_COEFF);
+  const int patch = static_cast<int>(
+      (version - major * MAJOR_COEFF - minor * MINOR_COEFF) / PATCH_COEFF);
  buffer << major << "." << minor << "." << patch;
  return buffer.str();
 }

--- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc
@@ -199,12 +199,15 @@ void AutoGrowthBestFitAllocator::Trace() const {
    cur_idle_bytes += it->second->size_;
  }

-  VLOG(1) << "alloc:" << total_alloc_size_ / static_cast<double>(1024 * 1024)
-          << "m free:" << total_free_size_ / static_cast<double>(1024 * 1024)
+  VLOG(1) << "alloc:"
+          << total_alloc_size_ / static_cast<double>(1024 * 1024)  // NOLINT
+          << "m free:"
+          << total_free_size_ / static_cast<double>(1024 * 1024)  // NOLINT
          << "m busy:"
-          << (total_alloc_size_ - total_free_size_) /
+          << (total_alloc_size_ - total_free_size_) /  // NOLINT
                 static_cast<double>(1024 * 1024)
-          << "m idle:" << cur_idle_bytes / static_cast<double>(1024 * 1024)
+          << "m idle:"
+          << cur_idle_bytes / static_cast<double>(1024 * 1024)  // NOLINT
          << "m alloc_times:" << total_alloc_times_
          << " free_times:" << total_free_times_
          << " free_blocks_num:" << free_blocks_.size()

--- a/paddle/fluid/memory/allocation/mmap_allocator.cc
+++ b/paddle/fluid/memory/allocation/mmap_allocator.cc
@@ -345,7 +345,8 @@ int MemoryMapAllocationPool::FindFromCache(const int &flag,
                                           const std::string &file_name,
                                           bool check_refcount) {
  std::lock_guard<std::mutex> guard(mtx_);
-  for (size_t idx = 0; idx < memory_map_allocations_.size(); idx++) {
+  for (int idx = 0; idx < static_cast<int>(memory_map_allocations_.size());
+       idx++) {
    if (memory_map_allocations_.at(idx).flags_ == flag &&
        memory_map_allocations_.at(idx).data_size_ == data_size) {
      if (file_name.empty() ||

--- a/paddle/fluid/platform/init.cc
+++ b/paddle/fluid/platform/init.cc
@@ -92,7 +92,7 @@ bool InitGflags(std::vector<std::string> args) {
    args.insert(args.begin(), "dummy");
    std::vector<char *> argv;
    std::string line;
-    int argc = args.size();
+    int argc = static_cast<int>(args.size());
    for (auto &arg : args) {
      argv.push_back(const_cast<char *>(arg.data()));
      line += arg;

--- a/paddle/fluid/platform/profiler.cc
+++ b/paddle/fluid/platform/profiler.cc
@@ -859,7 +859,8 @@ std::string PrintHostEvents() {
    oss << thr_evt_sec.thread_id << std::endl;
    for (const auto &evt : thr_evt_sec.events) {
      oss << "{ " << evt.name << " | " << evt.start_ns << "ns | " << evt.end_ns
-          << "ns | " << (evt.end_ns - evt.start_ns) / 1000.000 << "us }"
+          << "ns | " << (evt.end_ns - evt.start_ns) / 1000.000
+          << "us }"  // NOLINT
          << std::endl;
    }
  }

--- a/paddle/fluid/platform/profiler/chrometracing_logger.cc
+++ b/paddle/fluid/platform/profiler/chrometracing_logger.cc
@@ -450,7 +450,8 @@ void ChromeTracingLogger::HandleTypeMemcpy(
  MemcpyEventInfo memcpy_info = device_node.MemcpyInfo();
  float memory_bandwidth = 0;
  if (device_node.Duration() > 0) {
-    memory_bandwidth = memcpy_info.num_bytes * 1.0 / device_node.Duration();
+    memory_bandwidth =
+        memcpy_info.num_bytes * 1.0 / device_node.Duration();  // NOLINT
  }
  float dur = nsToMsFloat(device_node.Duration());
  std::string dur_display;

--- a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc
+++ b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc
@@ -287,7 +287,7 @@ DeserializationReader::RestoreOperatorSupplementEventNode(
      auto shape_vector_proto = shape_vectors_proto.shapes(j);
      std::vector<int64_t> shape;
      for (int k = 0; k < shape_vector_proto.size_size(); k++) {
-        shape.push_back(shape_vector_proto.size(k));
+        shape.push_back(shape_vector_proto.size(k));  // NOLINT
      }
      input_shape_vec.push_back(shape);
    }

--- a/paddle/fluid/platform/timer.cc
+++ b/paddle/fluid/platform/timer.cc
@@ -49,9 +49,9 @@ int Timer::Count() { return _count; }

 double Timer::ElapsedUS() { return static_cast<double>(_elapsed); }

-double Timer::ElapsedMS() { return _elapsed / 1000.0; }
+double Timer::ElapsedMS() { return _elapsed / 1000.0; }  // NOLINT

-double Timer::ElapsedSec() { return _elapsed / 1000000.0; }
+double Timer::ElapsedSec() { return _elapsed / 1000000.0; }  // NOLINT

 int64_t Timer::Tickus() {
  gettimeofday(&_now, nullptr);

--- a/paddle/phi/core/distributed/auto_parallel/r_to_s_reshard_function.cc
+++ b/paddle/phi/core/distributed/auto_parallel/r_to_s_reshard_function.cc
@@ -68,7 +68,7 @@ void RToSReshardFunction::Eval(phi::DeviceContext* dev_ctx,
          << " process participate in.";

  std::vector<int64_t> split_num_vec =
-      BalancedSplit(in.dims()[split_axis], num_of_process);
+      BalancedSplit(in.dims()[static_cast<int>(split_axis)], num_of_process);
  IntArray sections(split_num_vec);

  std::vector<DenseTensor> split_out_vec;

--- a/paddle/phi/core/distributed/auto_parallel/s_to_r_reshard_function.cc
+++ b/paddle/phi/core/distributed/auto_parallel/s_to_r_reshard_function.cc
@@ -50,8 +50,8 @@ bool SToRReshardFunction::IsSuitable(const DistTensor& in,
      GetSplitAxisWithDimsMapping(in_dims_mapping);
  int64_t split_axis = split_axis_to_mesh_axis.begin()->first;
  int64_t num_of_process = in_process_mesh.size();
-  flag &=
-      (in.local_dims()[split_axis] * num_of_process == in.dims()[split_axis]);
+  flag &= (in.local_dims()[static_cast<int>(split_axis)] * num_of_process ==
+           in.dims()[static_cast<int>(split_axis)]);

  return flag;
 }
@@ -89,10 +89,11 @@ void SToRReshardFunction::Eval(DeviceContext* dev_ctx,
    // first we need to split the result on axis 0,
    // then we need to concat the split result on input split axis.
    int64_t default_split_axis = 0;
-    int64_t num_of_process = in_process_ids.size();
+    int64_t num_of_process = static_cast<int64_t>(in_process_ids.size());

    IntArray sections(std::vector<int64_t>(
-        num_of_process, in.value().dims()[default_split_axis]));
+        num_of_process,
+        in.value().dims()[static_cast<int>(default_split_axis)]));
    std::vector<DenseTensor> split_out_vec;
    RESHARD_FUNCTOR(dev_ctx,
                    Split,

--- a/paddle/phi/infermeta/backward.cc
+++ b/paddle/phi/infermeta/backward.cc
@@ -376,7 +376,7 @@ void FFTC2RGradInferMeta(const MetaTensor& x,
  out->set_dtype(ToComplexType(x.dtype()));

  phi::DDim out_dim = x.dims();
-  const int64_t last_fft_axis = axes.back();
+  const int last_fft_axis = static_cast<int>(axes.back());
  if (last_dim_size > 0) {
    out_dim.at(last_fft_axis) = last_dim_size / 2 + 1;
  } else if (config.is_runtime) {
@@ -534,7 +534,7 @@ void InstanceNormGradInferMeta(const MetaTensor& x,
      phi::errors::InvalidArgument(
          "The X@GRAD in InstanceNormGradInferMeta can't be nullptr."));
  const auto x_dims = x.dims();
-  const int C = x_dims[1];
+  const int C = static_cast<int>(x_dims[1]);
  x_grad->set_dims(x_dims);
  x_grad->set_dtype(x.dtype());
  x_grad->set_layout(x.layout());
@@ -563,7 +563,7 @@ void InstanceNormDoubleGradInferMeta(const MetaTensor& x,
      phi::errors::InvalidArgument(
          "The DX in InstanceNormDoubleGradInferMeta can't be nullptr."));
  const auto x_dims = x.dims();
-  const int C = x_dims[1];
+  const int C = static_cast<int>(x_dims[1]);
  dx->set_dims(x_dims);
  dx->set_dtype(x.dtype());
  dx->set_layout(x.layout());
@@ -1076,12 +1076,12 @@ void TransposeGradInferMeta(const MetaTensor& x,
  std::vector<int> formated_axis = axis;
  for (size_t i = 0; i < axis.size(); i++) {
    if (axis[i] < 0) {
-      formated_axis[i] = axis[i] + x_rank;
+      formated_axis[i] = static_cast<int>(axis[i] + x_rank);
    }
  }

  std::vector<int> reversed_axis(axis);
-  for (size_t i = 0; i < formated_axis.size(); i++) {
+  for (int i = 0; i < static_cast<int>(formated_axis.size()); i++) {
    reversed_axis[formated_axis[i]] = i;
  }

@@ -1151,7 +1151,7 @@ void UnStackGradInferMeta(const std::vector<const MetaTensor*>& out_grad,
  if (axis < 0) axis += (rank + 1);

  auto vec = phi::vectorize<int>(input_dims[0]);
-  vec.insert(vec.begin() + axis, input_dims.size());
+  vec.insert(vec.begin() + axis, static_cast<int>(input_dims.size()));
  x_grad->set_dims(phi::make_ddim(vec));
  x_grad->set_dtype(out_grad[0]->dtype());
 }

--- a/paddle/phi/infermeta/binary.cc
+++ b/paddle/phi/infermeta/binary.cc
@@ -450,7 +450,7 @@ void ConvInferMeta(const MetaTensor& input,
  std::vector<int> dilations = dilations_t;
  auto in_dims = input.dims();
  auto filter_dims = filter.dims();
-  int dilation_size = dilations.size();
+  int dilation_size = static_cast<int>(dilations.size());
  for (int i = 0; i < dilation_size; ++i) {
    PADDLE_ENFORCE_GT(
        dilations[i],
@@ -492,7 +492,7 @@ void ConvInferMeta(const MetaTensor& input,
          filter_dims,
          filter_dims.size()));

-  int stride_size = strides.size();
+  int stride_size = static_cast<int>(strides.size());
  for (int i = 0; i < stride_size; ++i) {
    PADDLE_ENFORCE_GT(
        strides[i],
@@ -579,11 +579,12 @@ void ConvInferMeta(const MetaTensor& input,
        (in_data_dims[i] <= 0 || filter_dims[i + 2] <= 0)) {
      output_shape.push_back(-1);
    } else {
-      const int dkernel = dilations[i] * (filter_data_dims[i] - 1) + 1;
-      int output_size =
+      const int dkernel =
+          static_cast<int>(dilations[i] * (filter_data_dims[i] - 1) + 1);
+      int output_size = static_cast<int>(
          (in_data_dims[i] + paddings[2 * i] + paddings[2 * i + 1] - dkernel) /
              strides[i] +
-          1;
+          1);
      output_shape.push_back(output_size);
    }
  }
@@ -660,7 +661,7 @@ void ConvTransposeInferMeta(const MetaTensor& x,
          filter_dims,
          filter_dims.size()));

-  int stride_size = strides.size();
+  int stride_size = static_cast<int>(strides.size());
  for (int i = 0; i < stride_size; ++i) {
    PADDLE_ENFORCE_GT(
        strides[i],
@@ -734,7 +735,7 @@ void ConvTransposeInferMeta(const MetaTensor& x,
    output_shape.push_back(filter_dims[1] * groups);
  }
  const int offset = (data_layout != DataLayout::kNHWC ? 2 : 1);
-  for (size_t i = 0; i < strides.size(); ++i) {
+  for (int i = 0; i < static_cast<int>(strides.size()); ++i) {
    auto filter_extent = dilations_[i] * (filter_dims[i + 2] - 1) + 1;
    auto infer_shape = (config.is_runtime || x_dims[i + offset] > 0)
                           ? (x_dims[i + offset] - 1) * strides[i] -
@@ -1115,7 +1116,7 @@ void DropoutNdInferMeta(const MetaTensor& x,

    std::for_each(
        axis.begin(), axis.end(), [&mask_dims, &x_dims](const int64_t& t) {
-          mask_dims[t] = x_dims[t];
+          mask_dims[t] = x_dims[static_cast<int>(t)];
        });

    mask->set_dims(make_ddim(mask_dims));
@@ -1125,7 +1126,7 @@ void DropoutNdInferMeta(const MetaTensor& x,

 void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) {
  auto x_dims = x.dims();
-  auto x_rank = static_cast<size_t>(x_dims.size());
+  int x_rank = static_cast<int>(x_dims.size());
  PADDLE_ENFORCE_EQ(true,
                    1 == x_rank || 2 == x_rank,
                    phi::errors::PreconditionNotMet(
@@ -1136,14 +1137,14 @@ void DotInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) {
  auto y_dims = y.dims();
  PADDLE_ENFORCE_EQ(
      true,
-      x_rank == static_cast<size_t>(y_dims.size()),
+      x_rank == static_cast<int>(y_dims.size()),
      phi::errors::PreconditionNotMet(
          "ShapeError: The shape of input tensor Y: %s should match with "
          "input tenosr X: %s",
          y_dims.to_str(),
          x_dims.to_str()));
  bool shape_match = true;
-  for (size_t i = 0; i < x_rank; ++i) {
+  for (int i = 0; i < x_rank; ++i) {
    if (x_dims[i] != y_dims[i]) {
      shape_match = false;
      break;
@@ -1458,10 +1459,10 @@ void FusedMatmulInferMeta(const MetaTensor& x,
    }
  }
  if (!x_broadcasted) {
-    new_dims.push_back(M);
+    new_dims.push_back(M);  // NOLINT
  }
  if (!y_broadcasted) {
-    new_dims.push_back(N);
+    new_dims.push_back(N);  // NOLINT
  }
  if (x_broadcasted && y_broadcasted) {
    new_dims.push_back(1);
@@ -1558,10 +1559,10 @@ void GatherInferMeta(const MetaTensor& x,
      } else {
        std::vector<int> out_dim_vec;
        for (int i = 0; i < axis_v; i++) {
-          out_dim_vec.push_back(input_dim[i]);
+          out_dim_vec.push_back(input_dim[i]);  // NOLINT
        }
        for (int i = axis_v + 1; i < input_dim.size(); i++) {
-          out_dim_vec.push_back(input_dim[i]);
+          out_dim_vec.push_back(input_dim[i]);  // NOLINT
        }
        auto output_dims = phi::make_ddim(out_dim_vec);
        out->set_dims(output_dims);
@@ -1572,21 +1573,21 @@ void GatherInferMeta(const MetaTensor& x,
  } else {
    if (axis.FromTensor() || axis_v == 0) {
      // if axis.FromTensor(), we can not obtain correct shape of output
-      int batch_size = index_dims[0];
+      int batch_size = static_cast<int>(index_dims[0]);
      phi::DDim output_dims(input_dim);
      output_dims[0] = batch_size;
      out->set_dims(output_dims);
      out->set_dtype(x.dtype());
      out->share_lod(x);
    } else {
-      int index_size = index_dims[0];
+      int index_size = static_cast<int>(index_dims[0]);
      std::vector<int> out_dim_vec;
      for (int i = 0; i < axis_v; i++) {
-        out_dim_vec.push_back(input_dim[i]);
+        out_dim_vec.push_back(input_dim[i]);  // NOLINT
      }
      out_dim_vec.push_back(index_size);
      for (int i = axis_v + 1; i < input_dim.size(); i++) {
-        out_dim_vec.push_back(input_dim[i]);
+        out_dim_vec.push_back(input_dim[i]);  // NOLINT
      }
      auto output_dims = phi::make_ddim(out_dim_vec);
      out->set_dims(output_dims);
@@ -1602,7 +1603,7 @@ void GatherNdInferMeta(const MetaTensor& x,
  auto x_dims = x.dims();
  auto x_dims_size = x_dims.size();
  auto index_dims = index.dims();
-  auto index_dims_size = index_dims.size();
+  int index_dims_size = index_dims.size();

  PADDLE_ENFORCE_LE(
      index_dims[index_dims_size - 1],
@@ -1620,7 +1621,9 @@ void GatherNdInferMeta(const MetaTensor& x,
  for (int i = 0; i < index_dims_size - 1; ++i) {
    result_dims.emplace_back(index_dims[i]);
  }
-  for (int i = index_dims[index_dims_size - 1]; i < x_dims_size; ++i) {
+  for (int i = static_cast<int>(index_dims[index_dims_size - 1]);
+       i < x_dims_size;
+       ++i) {
    result_dims.emplace_back(x_dims[i]);
  }

@@ -2010,8 +2013,8 @@ void LUUnpackInferMeta(const MetaTensor& x,
      2,
      phi::errors::InvalidArgument("The rank of input must greater than 2."));

-  int m = x_dims[x_rank - 1];
-  int n = x_dims[x_rank - 2];
+  int m = static_cast<int>(x_dims[x_rank - 1]);
+  int n = static_cast<int>(x_dims[x_rank - 2]);
  int min_mn = std::min(m, n);
  if (unpack_ludata) {
    auto ldims = x_dims;
@@ -2157,10 +2160,10 @@ void MatmulInferMeta(const MetaTensor& x,
    }
  }
  if (!x_broadcasted) {
-    new_dims.push_back(M);
+    new_dims.push_back(M);  // NOLINT
  }
  if (!y_broadcasted) {
-    new_dims.push_back(N);
+    new_dims.push_back(N);  // NOLINT
  }

  auto ddim_out = phi::make_ddim(new_dims);
@@ -2227,10 +2230,10 @@ void MatmulInt8InferMeta(const MetaTensor& x,
    }
  }
  if (!x_broadcasted) {
-    new_dims.push_back(M);
+    new_dims.push_back(M);  // NOLINT
  }
  if (!y_broadcasted) {
-    new_dims.push_back(N);
+    new_dims.push_back(N);  // NOLINT
  }

  auto ddim_out = phi::make_ddim(new_dims);
@@ -2397,8 +2400,8 @@ void MatrixRankTolInferMeta(const MetaTensor& x,
      phi::errors::InvalidArgument("The dims of input must be greater than 2"));

  if (hermitian) {
-    int rows = dim_x[dim_x.size() - 2];
-    int cols = dim_x[dim_x.size() - 1];
+    int rows = static_cast<int>(dim_x[dim_x.size() - 2]);
+    int cols = static_cast<int>(dim_x[dim_x.size() - 1]);
    PADDLE_ENFORCE_EQ(rows,
                      cols,
                      phi::errors::InvalidArgument(
@@ -2508,7 +2511,7 @@ void PReluInferMeta(const MetaTensor& x,
  } else if (mode == "element") {
    auto alpha_dim = alpha.dims();
    auto alpha_rank = alpha_dim.size();
-    auto x_rank = x_dim.size();
+    int x_rank = x_dim.size();
    PADDLE_ENFORCE_GE(x_rank,
                      1,
                      phi::errors::InvalidArgument(
@@ -2527,7 +2530,7 @@ void PReluInferMeta(const MetaTensor& x,
            x_rank));
    size_t x_product = 1;
    size_t alpha_product = 1;
-    for (int64_t i = x_rank - 1; i > 0; i--) {
+    for (int i = x_rank - 1; i > 0; i--) {
      x_product *= x_dim[i];
      alpha_product *= alpha_dim[i];
    }
@@ -2647,7 +2650,7 @@ void PriorBoxInferMeta(const MetaTensor& input,
  std::vector<int64_t> dim_vec(4);
  dim_vec[0] = input_dims[2];
  dim_vec[1] = input_dims[3];
-  dim_vec[2] = num_priors;
+  dim_vec[2] = static_cast<int64_t>(num_priors);
  dim_vec[3] = 4;

  out->set_dtype(input.dtype());
@@ -2720,8 +2723,8 @@ void SearchsortedInferMeta(const MetaTensor& sorted_sequence,
  if (sequences_dims.size() != values_dims.size()) {
    flag = false;
  }
-  const auto& sequences_dims_size = sequences_dims.size();
-  for (int64_t dim = 0; dim < sequences_dims_size - 1; ++dim) {
+  const int& sequences_dims_size = sequences_dims.size();
+  for (int dim = 0; dim < sequences_dims_size - 1; ++dim) {
    if (sequences_dims[dim] != values_dims[dim]) {
      flag = false;
      break;
@@ -2918,9 +2921,9 @@ void LstsqInferMeta(const MetaTensor& x,
  int x_rank = x_dims.size();
  int y_rank = y_dims.size();

-  int m = x_dims[x_rank - 2];
-  int n = x_dims[x_rank - 1];
-  int nrhs = y_dims[x_rank - 1];
+  int m = static_cast<int>(x_dims[x_rank - 2]);
+  int n = static_cast<int>(x_dims[x_rank - 1]);
+  int nrhs = static_cast<int>(y_dims[x_rank - 1]);

  PADDLE_ENFORCE_GE(
      x_rank,
@@ -3004,7 +3007,7 @@ void YoloBoxInferMeta(const MetaTensor& x,
                      MetaConfig config) {
  auto dim_x = x.dims();
  auto dim_imgsize = img_size.dims();
-  int anchor_num = anchors.size() / 2;
+  int anchor_num = static_cast<int>(anchors.size() / 2);

  PADDLE_ENFORCE_EQ(
      dim_x.size(),
@@ -3089,7 +3092,7 @@ void YoloBoxInferMeta(const MetaTensor& x,

  int box_num;
  if ((dim_x[2] > 0 && dim_x[3] > 0) || config.is_runtime) {
-    box_num = dim_x[2] * dim_x[3] * anchor_num;
+    box_num = static_cast<int>(dim_x[2] * dim_x[3] * anchor_num);
  } else {
    box_num = -1;
  }
@@ -3181,10 +3184,10 @@ void SolveInferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) {
    new_dims.assign(y_dims_vec.begin(), y_dims_vec.end() - 2);
  }
  if (!x_broadcasted) {
-    new_dims.push_back(M);
+    new_dims.push_back(M);  // NOLINT
  }
  if (!y_broadcasted) {
-    new_dims.push_back(N);
+    new_dims.push_back(N);  // NOLINT
  }
  if (x_broadcasted && y_broadcasted) {
    new_dims.push_back(1);
@@ -3232,7 +3235,7 @@ void UnpoolInferMeta(const MetaTensor& x,
  if (config.is_runtime || !output_size.FromTensor()) {
    output_size_val = output_size.GetData();
  }
-  for (size_t i = 0; i < ksize.size(); ++i) {
+  for (int i = 0; i < static_cast<int>(ksize.size()); ++i) {
    if (!config.is_runtime && in_x_dims[i + 2] <= 0) {
      output_shape.push_back(-1);
    } else {
@@ -3273,7 +3276,7 @@ void Unpool3dInferMeta(const MetaTensor& x,
                        in_y_dims));

  std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
-  for (size_t i = 0; i < ksize.size(); ++i) {
+  for (int i = 0; i < static_cast<int>(ksize.size()); ++i) {
    if (!config.is_runtime && in_x_dims[i + 2] <= 0) {
      output_shape.push_back(-1);
    } else {

--- a/paddle/phi/infermeta/fusion.cc
+++ b/paddle/phi/infermeta/fusion.cc
@@ -207,14 +207,14 @@ void Conv1dXPUInferMeta(const MetaTensor& x,
          groups));

  std::vector<int64_t> out_shape({in_dims[0], filter_dims[0]});
-  out_shape.push_back(ConvOutSize(in_dims[2],
-                                  filter_dims[2],
+  out_shape.push_back(ConvOutSize(static_cast<int>(in_dims[2]),
+                                  static_cast<int>(filter_dims[2]),
                                  dilations,
                                  paddings[0],
                                  paddings[1],
                                  strides));
  // set output and output max dims
-  out->set_dims(DDim(out_shape.data(), out_shape.size()));
+  out->set_dims(DDim(out_shape.data(), static_cast<int>(out_shape.size())));
  out->set_dtype(x.dtype());
  out->set_layout(x.layout());
  out_max->set_dims(phi::make_ddim({6}));
@@ -264,9 +264,9 @@ void Conv2dXPUInferMeta(const MetaTensor& x,
          filter_dims.size()));

  const auto input_channels = in_dims[1];
-  int stride_size = strides.size();
+  int stride_size = static_cast<int>(strides.size());
  int in_sub_stride_size = in_dims.size() - stride_size;
-  int dilation_size = dilations.size();
+  int dilation_size = static_cast<int>(dilations.size());
  PADDLE_ENFORCE_EQ(
      in_dims.size(),
      strides.size() + 2U,
@@ -333,16 +333,16 @@ void Conv2dXPUInferMeta(const MetaTensor& x,
                                ksize);

  std::vector<int64_t> out_shape({in_dims[0], filter_dims[0]});
-  for (size_t i = 0; i < strides.size(); ++i) {
-    out_shape.push_back(ConvOutSize(in_dims[i + 2],
-                                    filter_dims[i + 2],
+  for (int i = 0; i < static_cast<int>(strides.size()); ++i) {
+    out_shape.push_back(ConvOutSize(static_cast<int>(in_dims[i + 2]),
+                                    static_cast<int>(filter_dims[i + 2]),
                                    dilations[i],
                                    paddings_vec[i * 2],
                                    paddings_vec[i * 2 + 1],
                                    strides[i]));
  }
  // set output and output max dims
-  out->set_dims(DDim(out_shape.data(), out_shape.size()));
+  out->set_dims(DDim(out_shape.data(), static_cast<int>(out_shape.size())));
  out_max->set_dims(phi::make_ddim({6}));
  out->set_dtype(out_dtype);
 }
@@ -388,10 +388,10 @@ void FcXPUInferMeta(const MetaTensor& x,
                    MetaTensor* out_max) {
  std::vector<int> out_shape(in_num_col_dims + 1);
  for (int i = 0; i < in_num_col_dims; i++) {
-    out_shape[i] = x.dims()[i];
+    out_shape[i] = static_cast<int>(x.dims()[i]);
  }
-  out_shape[in_num_col_dims] = w.dims()[0];
-  out->set_dims(DDim(out_shape.data(), out_shape.size()));
+  out_shape[in_num_col_dims] = static_cast<int>(w.dims()[0]);
+  out->set_dims(DDim(out_shape.data(), static_cast<int>(out_shape.size())));
  out->set_dtype(out_dtype);
  out->set_layout(x.layout());
  out_max->set_dims(phi::make_ddim({6}));
@@ -671,7 +671,7 @@ void ConvTransposeXPUInferMeta(const MetaTensor& x,
          filter_dims,
          filter_dims.size()));

-  int stride_size = strides.size();
+  int stride_size = static_cast<int>(strides.size());
  for (int i = 0; i < stride_size; ++i) {
    PADDLE_ENFORCE_GT(
        strides[i],
@@ -744,7 +744,7 @@ void ConvTransposeXPUInferMeta(const MetaTensor& x,
    output_shape.push_back(filter_dims[1] * groups);
  }
  const int offset = (data_format != "NHWC" ? 2 : 1);
-  for (size_t i = 0; i < strides.size(); ++i) {
+  for (int i = 0; i < static_cast<int>(strides.size()); ++i) {
    auto filter_extent = dilations_[i] * (filter_dims[i + 2] - 1) + 1;
    auto infer_shape = (x_dims[i + offset] > 0)
                           ? (x_dims[i + offset] - 1) * strides[i] -
@@ -901,7 +901,7 @@ void FusedScaleBiasReluConvBnstatsInferMeta(
      phi::errors::InvalidArgument("Expect group to be 1, got %d.", groups));

  const auto input_channels = in_dims[in_dims.size() - 1];
-  int dilation_size = dilations.size();
+  int dilation_size = static_cast<int>(dilations.size());
  for (int i = 0; i < dilation_size; ++i) {
    PADDLE_ENFORCE_GT(
        dilations[i],
@@ -943,9 +943,9 @@ void FusedScaleBiasReluConvBnstatsInferMeta(
                                ksize);

  std::vector<int64_t> out_shape({in_dims[0]});
-  for (size_t i = 0; i < strides.size(); ++i) {
-    out_shape.push_back(ConvOutSize(in_dims[i + 1],
-                                    filter_dims[i + 2],
+  for (int i = 0; i < static_cast<int>(strides.size()); ++i) {
+    out_shape.push_back(ConvOutSize(static_cast<int>(in_dims[i + 1]),
+                                    static_cast<int>(filter_dims[i + 2]),
                                    dilations[i],
                                    paddings_vec[i * 2],
                                    paddings_vec[i * 2 + 1],
@@ -955,7 +955,7 @@ void FusedScaleBiasReluConvBnstatsInferMeta(
  // make shape for other outputs
  auto c_dims = phi::make_ddim({filter_dims[0]});
  // set output and output max dims
-  out->set_dims(DDim(out_shape.data(), out_shape.size()));
+  out->set_dims(DDim(out_shape.data(), static_cast<int>(out_shape.size())));
  out_running_mean->set_dims(c_dims);
  out_running_var->set_dims(c_dims);
  saved_mean->set_dims(c_dims);

--- a/paddle/phi/infermeta/multiary.cc
+++ b/paddle/phi/infermeta/multiary.cc
@@ -795,7 +795,7 @@ void BroadcastTensorsInferMeta(const std::vector<const MetaTensor*>& x,
      int axis = static_cast<int>(input_ddim.size()) - index - 1;
      int dim_size = 1;
      if (axis >= 0) {
-        dim_size = input_ddim[axis];
+        dim_size = static_cast<int>(input_ddim[axis]);
      }

      if (target_dim_size != 1 && dim_size != 1 &&
@@ -857,7 +857,7 @@ void CoalesceTensorInferMeta(const std::vector<const MetaTensor*>& input,
                             MetaTensor* fused_output,
                             MetaConfig config) {
  if (size_of_dtype == -1) {
-    size_of_dtype = phi::SizeOf(dtype);
+    size_of_dtype = static_cast<int>(phi::SizeOf(dtype));
  }
  if (config.is_runtime) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
@@ -902,7 +902,7 @@ void CoalesceTensorInferMeta(const std::vector<const MetaTensor*>& input,
                                   align_size) /
                             size_of_dtype
                       : static_cast<size_t>(size);
-        numel += len;
+        numel += static_cast<int64_t>(len);
      }
      if (fused_output) {
        fused_output->set_dims(phi::make_ddim({numel}));
@@ -925,7 +925,7 @@ void CheckMemoryContinueInferMeta(const std::vector<const MetaTensor*>& input,
    const auto& dim = item->dims();
    auto size = phi::product(dim);
    auto len = size * phi::SizeOf(item->dtype());
-    numel += len;
+    numel += static_cast<int64_t>(len);
  }
  output->set_dims(phi::make_ddim({numel}));
  output->set_dtype(phi::DataType::INT8);
@@ -1215,16 +1215,17 @@ void DeformableConvInferMeta(const MetaTensor& x,
  }

  std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
-  for (size_t i = 0; i < strides.size(); ++i) {
+  for (int i = 0; i < static_cast<int>(strides.size()); ++i) {
    if (!config.is_runtime &&
        (in_dims[i + 2] <= 0 || filter_dims[i + 2] <= 0)) {
      output_shape.push_back(-1);
    } else {
-      output_shape.push_back(ConvOutputSize(in_dims[i + 2],
-                                            filter_dims[i + 2],
-                                            dilations[i],
-                                            paddings[i],
-                                            strides[i]));
+      output_shape.push_back(
+          ConvOutputSize(static_cast<int>(in_dims[i + 2]),
+                         static_cast<int>(filter_dims[i + 2]),
+                         dilations[i],
+                         paddings[i],
+                         strides[i]));
    }
  }

@@ -1628,7 +1629,7 @@ void FusedLayerNormInferMeta(const MetaTensor& x,

  int32_t rows = 1;
  for (int i = 0; i < begin_norm_axis; i++) {
-    rows *= x.dims()[i];
+    rows *= static_cast<int32_t>(x.dims()[i]);
  }

  if (norm_weight) {
@@ -2025,9 +2026,10 @@ static void Interpolate1DInferShapeCheck(
              scale_w));
      if (scale_w > 0.) {
        // round down
-        out_w_tmp = (data_layout == DataLayout::kNCHW
-                         ? static_cast<int>(dim_x[2] * scale_w)
-                         : static_cast<int>(dim_x[1] * scale_w));
+        out_w_tmp =
+            static_cast<int>(data_layout == DataLayout::kNCHW
+                                 ? static_cast<float>(dim_x[2]) * scale_w
+                                 : static_cast<float>(dim_x[1]) * scale_w);
        // protect when input shape is -1
        out_w_tmp = out_w_tmp > 0 ? out_w_tmp : -1;
      }
@@ -2171,12 +2173,14 @@ static void Interpolate2DInferShapeCheck(
              scale_h));
      if (scale_h > 0. && scale_w > 0.) {
        // round down
-        out_h_tmp = (data_layout == DataLayout::kNCHW
-                         ? static_cast<int>(dim_x[2] * scale_h)
-                         : static_cast<int>(dim_x[1] * scale_h));
-        out_w_tmp = (data_layout == DataLayout::kNCHW
-                         ? static_cast<int>(dim_x[3] * scale_w)
-                         : static_cast<int>(dim_x[2] * scale_w));
+        out_h_tmp =
+            static_cast<int>(data_layout == DataLayout::kNCHW
+                                 ? static_cast<float>(dim_x[2]) * scale_h
+                                 : static_cast<float>(dim_x[1]) * scale_h);
+        out_w_tmp =
+            static_cast<int>(data_layout == DataLayout::kNCHW
+                                 ? static_cast<float>(dim_x[3]) * scale_w
+                                 : static_cast<float>(dim_x[2]) * scale_w);
        // protect when input shape is -1
        out_h_tmp = out_h_tmp > 0 ? out_h_tmp : -1;
        out_w_tmp = out_w_tmp > 0 ? out_w_tmp : -1;
@@ -2325,15 +2329,18 @@ static void Interpolate3DInferShapeCheck(
              scale_d));
      if (scale_d > 0. && scale_h > 0. && scale_w > 0.) {
        // round down
-        out_d_tmp = (data_layout == DataLayout::kNCHW
-                         ? static_cast<int>(dim_x[2] * scale_d)
-                         : static_cast<int>(dim_x[1] * scale_d));
-        out_h_tmp = (data_layout == DataLayout::kNCHW
-                         ? static_cast<int>(dim_x[3] * scale_h)
-                         : static_cast<int>(dim_x[2] * scale_h));
-        out_w_tmp = (data_layout == DataLayout::kNCHW
-                         ? static_cast<int>(dim_x[4] * scale_w)
-                         : static_cast<int>(dim_x[3] * scale_w));
+        out_d_tmp =
+            static_cast<int>(data_layout == DataLayout::kNCHW
+                                 ? static_cast<float>(dim_x[2]) * scale_d
+                                 : static_cast<float>(dim_x[1]) * scale_d);
+        out_h_tmp =
+            static_cast<int>(data_layout == DataLayout::kNCHW
+                                 ? static_cast<float>(dim_x[3]) * scale_h
+                                 : static_cast<float>(dim_x[2]) * scale_h);
+        out_w_tmp =
+            static_cast<int>(data_layout == DataLayout::kNCHW
+                                 ? static_cast<float>(dim_x[4]) * scale_w
+                                 : static_cast<float>(dim_x[3]) * scale_w);
        // protect when input shape is -1
        out_d_tmp = out_d_tmp > 0 ? out_d_tmp : -1;
        out_h_tmp = out_h_tmp > 0 ? out_h_tmp : -1;
@@ -2860,13 +2867,13 @@ void MeshgridInferMeta(const std::vector<const MetaTensor*>& inputs,
                       std::vector<MetaTensor*> outputs) {
  const size_t inputs_num = inputs.size();

-  auto out_shape = std::vector<int>(inputs_num);
+  std::vector<int> out_shape = std::vector<int>(inputs_num);

  for (size_t i = 0; i < inputs.size(); i++) {
    if (inputs[i]->dims().size() == 0) {
      out_shape[i] = 1;
    } else {
-      out_shape[i] = inputs[i]->dims()[0];
+      out_shape[i] = static_cast<int>(inputs[i]->dims()[0]);
    }
  }
  auto out_dims = phi::make_ddim(std::vector<int>(out_shape));
@@ -3317,7 +3324,7 @@ void RnnInferMeta(const MetaTensor& x,
  out->set_dims(out_dims);
  out->set_dtype(x.dtype());

-  int state_num = pre_state.size();
+  int state_num = static_cast<int>(pre_state.size());
  for (int i = 0; i < state_num; ++i) {
    state[i]->set_dims(pre_state[i]->dims());
    state[i]->set_dtype(x.dtype());
@@ -3482,7 +3489,7 @@ void SendUERecvInferMeta(const MetaTensor& x,
  std::vector<int> x_dims2(x_dims1.begin() + 1, x_dims1.end());
  std::vector<int> y_dims2(y_dims1.begin() + 1, y_dims1.end());

-  int max_dim = std::max(x_dims2.size(), y_dims2.size());
+  int max_dim = static_cast<int>(std::max(x_dims2.size(), y_dims2.size()));
  int axis = std::abs(static_cast<int>(x_dims2.size() - y_dims2.size()));
  std::vector<int> x_dims_array(max_dim);
  std::vector<int> y_dims_array(max_dim);
@@ -3552,7 +3559,7 @@ void SendUVInferMeta(const MetaTensor& x,
  auto y_dims1 = phi::vectorize<int>(y_dims);
  std::vector<int> x_dims2(x_dims1.begin() + 1, x_dims1.end());
  std::vector<int> y_dims2(y_dims1.begin() + 1, y_dims1.end());
-  int max_dim = std::max(x_dims2.size(), y_dims2.size());
+  int max_dim = static_cast<int>(std::max(x_dims2.size(), y_dims2.size()));
  int axis = std::abs(static_cast<int>(x_dims2.size() - y_dims2.size()));
  std::vector<int> x_dims_array(max_dim);
  std::vector<int> y_dims_array(max_dim);
@@ -3565,7 +3572,7 @@ void SendUVInferMeta(const MetaTensor& x,
                                     out_dims_array.data(),
                                     max_dim,
                                     axis);
-  out_dims_array.insert(out_dims_array.begin(), src_index_dims[0]);
+  out_dims_array.insert(out_dims_array.begin(), src_index_dims[0]);  // NOLINT
  out->set_dims(phi::make_ddim(out_dims_array));
 }

@@ -3603,7 +3610,7 @@ void StackInferMeta(const std::vector<const MetaTensor*>& x,
          axis));
  if (axis < 0) axis += (rank + 1);
  auto vec = phi::vectorize<int64_t>(out_dim);
-  vec.insert(vec.begin() + axis, input_dims.size());
+  vec.insert(vec.begin() + axis, input_dims.size());  // NOLINT
  out->set_dims(phi::make_ddim(vec));
  out->set_dtype(x.at(0)->dtype());
  out->share_lod(*x.at(0));
@@ -3692,7 +3699,7 @@ void WarpctcInferMeta(const MetaTensor& logits,
  int sequence_width = 0;

  if (logits_length) {
-    sequence_width = logits_dims[2];
+    sequence_width = static_cast<int>(logits_dims[2]);
  } else {
    sequence_width =
        static_cast<int>(phi::product(logits_dims) / logits_dims[0]);
@@ -3726,7 +3733,7 @@ void WarprnntInferMeta(const MetaTensor& input,
                       MetaTensor* loss,
                       MetaTensor* warpctcgrad) {
  auto acts_dims = input.dims();
-  int D = acts_dims[3];
+  int D = static_cast<int>(acts_dims[3]);

  PADDLE_ENFORCE_GE(
      blank,
@@ -3835,8 +3842,8 @@ void YoloLossInferMeta(const MetaTensor& x,
  auto dim_x = x.dims();
  auto dim_gtbox = gt_box.dims();
  auto dim_gtlabel = gt_label.dims();
-  int anchor_num = anchors.size() / 2;
-  int mask_num = anchor_mask.size();
+  int anchor_num = static_cast<int>(anchors.size() / 2);
+  int mask_num = static_cast<int>(anchor_mask.size());

  PADDLE_ENFORCE_EQ(dim_x.size(),
                    4,
@@ -4183,10 +4190,10 @@ void MaskedMultiheadAttentionInferMeta(const MetaTensor& x,
                                       MetaTensor* out,
                                       MetaTensor* cache_kv_out,
                                       MetaTensor* beam_cache_offset_out) {
-  int bsz = x.dims()[0];
+  int bsz = static_cast<int>(x.dims()[0]);
  auto cache_kv_dims = cache_kv.dims();
-  int num_head = cache_kv.dims()[2];
-  int dim_head = cache_kv.dims()[4];
+  int num_head = static_cast<int>(cache_kv.dims()[2]);
+  int dim_head = static_cast<int>(cache_kv.dims()[4]);

  PADDLE_ENFORCE_EQ(
      cache_kv_dims.size(),

--- a/paddle/phi/infermeta/sparse/binary.cc
+++ b/paddle/phi/infermeta/sparse/binary.cc
@@ -69,7 +69,7 @@ inline void ResetSubmKernelSizeAndStrides(const DDim& kernel_dims,
                                          std::vector<int>* paddings,
                                          std::vector<int>* strides) {
  for (uint64_t i = 0; i < paddings->size(); i++) {
-    (*paddings)[i] = kernel_dims[i] / 2;
+    (*paddings)[i] = kernel_dims[i] / 2;  // NOLINT
    (*strides)[i] = 1;
  }
 }
@@ -95,7 +95,7 @@ void Conv3dInferMeta(const MetaTensor& x,

  std::vector<int> kernel_sizes(kernel_dims.size());
  for (int i = 0; i < kernel_dims.size(); i++) {
-    kernel_sizes[i] = kernel_dims[i];
+    kernel_sizes[i] = static_cast<int>(kernel_dims[i]);
  }

  std::vector<int> subm_paddings(paddings), subm_strides(strides);
@@ -143,8 +143,8 @@ void Pool3dInferMeta(const MetaTensor& x,
  const auto& x_dims = x.dims();
  DDim out_dims = {1, 1, 1, 1, 1};

-  const std::vector<int>& real_kernel_sizes =
-      PoolResetKernel(kernel_sizes, x_dims[4], x_dims[4]);
+  const std::vector<int>& real_kernel_sizes = PoolResetKernel(
+      kernel_sizes, static_cast<int>(x_dims[4]), static_cast<int>(x_dims[4]));
  GetOutShape(
      x_dims, real_kernel_sizes, paddings, dilations, strides, &out_dims);
  out->set_dtype(x.dtype());

--- a/paddle/phi/infermeta/ternary.cc
+++ b/paddle/phi/infermeta/ternary.cc
@@ -1099,7 +1099,7 @@ void ScatterNdAddInferMeta(const MetaTensor& x,
  const auto& ref_dims = x.dims();
  auto ref_dims_size = ref_dims.size();
  const auto& index_dims = index.dims();
-  auto index_dims_size = index_dims.size();
+  int index_dims_size = static_cast<int>(index_dims.size());
  const auto& updates_dims = updates.dims();
  auto updates_dims_size = updates_dims.size();

@@ -1135,10 +1135,12 @@ void ScatterNdAddInferMeta(const MetaTensor& x,

    // update.shape = index.shape[:-1] + output.shape[index.shape[-1]:]
    std::vector<int64_t> r_updates_dims;
-    for (int64_t i = 0; i < index_dims_size - 1; ++i) {
+    for (int i = 0; i < index_dims_size - 1; ++i) {
      r_updates_dims.emplace_back(index_dims[i]);
    }
-    for (int64_t i = index_dims[index_dims_size - 1]; i < ref_dims_size; ++i) {
+    for (int i = static_cast<int>(index_dims[index_dims_size - 1]);
+         i < ref_dims_size;
+         ++i) {
      r_updates_dims.emplace_back(ref_dims[i]);
    }
    // check for non-0d updates
@@ -1265,11 +1267,11 @@ void SpectralNormInferMeta(const MetaTensor& weight,
          "Attr(power_iters) should be greater equal then 0, but received %d",
          power_iters));

-  int h = dim_weight[dim];
+  int h = static_cast<int>(dim_weight[dim]);
  int w = 1;
  for (int i = 0; i < rank_weight; i++) {
    if (i != dim) {
-      w *= dim_weight[i];
+      w *= static_cast<int>(dim_weight[i]);
    }
  }
  auto dim_u = u.dims();

--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -221,7 +221,7 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
      if (flatten) {
        all_element_num = phi::product(x_dims);
      } else {
-        all_element_num = x_dims[int_axis];
+        all_element_num = x_dims[static_cast<int>(int_axis)];
      }
      PADDLE_ENFORCE_LE(
          all_element_num,
@@ -243,11 +243,13 @@ void ArgMinMaxInferMeta(const MetaTensor& x,
      vec = {};
    }
  } else {
-    for (int64_t i = 0; i < int_axis; i++) vec.emplace_back(x_dims[i]);
+    for (int64_t i = 0; i < int_axis; i++)
+      vec.emplace_back(x_dims[static_cast<int>(i)]);
    if (keepdims) {
      vec.emplace_back(static_cast<int64_t>(1));
    }
-    for (int64_t i = int_axis + 1; i < x_rank; i++) vec.emplace_back(x_dims[i]);
+    for (int64_t i = int_axis + 1; i < x_rank; i++)
+      vec.emplace_back(x_dims[static_cast<int>(i)]);
  }

  out->set_dims(phi::make_ddim(vec));
@@ -314,7 +316,7 @@ void AsComplexInferMeta(const MetaTensor& input, MetaTensor* output) {
          "Expected the rank of input(X) to be equal to or greater than 1."
          "But received rank of input(X) = %d",
          input_rank));
-  const int last_dim_size = in_dims[input_rank - 1];
+  const int last_dim_size = static_cast<int>(in_dims[input_rank - 1]);
  PADDLE_ENFORCE_EQ(
      last_dim_size,
      2,
@@ -675,7 +677,7 @@ void CropInferMeta(const MetaTensor& x,
  }

  auto out_dims = std::vector<int64_t>(shape.size(), -1);
-  for (size_t i = 0; i < shape_dims.size(); ++i) {
+  for (int i = 0; i < static_cast<int>(shape_dims.size()); ++i) {
    if (shape_dims[i] > 0) {
      out_dims[i] = static_cast<int64_t>(shape_dims[i]);
    } else {
@@ -763,7 +765,7 @@ void DiagEmbedInferMeta(
                        dim1,
                        dim2));

-  int new_dim_len = offset_ + x_dims[x_dims.size() - 1];
+  int new_dim_len = static_cast<int>(offset_ + x_dims[x_dims.size() - 1]);
  auto sizes = vectorize(x_dims);
  sizes.pop_back();
  sizes.insert(sizes.begin() + std::min(dim1_, dim2_), new_dim_len);
@@ -1171,11 +1173,11 @@ void ExpandInferMeta(const MetaTensor& x,
  auto out_rank =
      std::max(static_cast<size_t>(x_dims.size()), expand_shape.size());
  std::vector<int64_t> out_shape(out_rank);
-  for (size_t i = 0; i < expand_shape.size(); ++i) {
+  for (int i = 0; i < static_cast<int>(expand_shape.size()); ++i) {
    if (x_dims[i] == -1) {
      out_shape[i] = -1;
    } else if (expand_shape[i] == -1) {
-      if (static_cast<size_t>(x_dims.size()) > i) {
+      if (static_cast<int>(x_dims.size()) > i) {
        out_shape[i] = x_dims[i];
      } else {
        out_shape[i] = -1;
@@ -1257,7 +1259,7 @@ void FFTC2RInferMeta(const MetaTensor& x,
      out,
      phi::errors::InvalidArgument("Output of fft_c2r should not be null."));
  const phi::DDim x_dim = x.dims();
-  const int64_t last_fft_axis = axes.back();
+  const int last_fft_axis = static_cast<int>(axes.back());

  // only ensure that fft axes' size greater than zero at runtime
  // they might be -1 to indicate unknown size ar compile time
@@ -1322,7 +1324,7 @@ void FFTR2CInferMeta(const MetaTensor& x,
    out->share_dims(x);
  } else {
    phi::DDim out_dim = x.dims();
-    const int64_t last_fft_axis = axes.back();
+    const int last_fft_axis = static_cast<int>(axes.back());
    const int64_t last_fft_dim_size = x_dim[last_fft_axis];
    out_dim.at(last_fft_axis) = last_fft_dim_size / 2 + 1;
    out->set_dims(out_dim);
@@ -1379,7 +1381,7 @@ void FlattenWithXShapeInferMeta(const MetaTensor& x,
  out_shape.reserve(in_dims_size - stop_axis + start_axis + 1);

  for (int i = 0; i < start_axis; ++i) {
-    out_shape.push_back(x_dims[i]);
+    out_shape.push_back(x_dims[i]);  // NOLINT
  }
  for (int i = start_axis; i <= stop_axis; i++) {
    if (x_dims[i] == -1 || outer == -1) {
@@ -1388,9 +1390,9 @@ void FlattenWithXShapeInferMeta(const MetaTensor& x,
      outer *= x_dims[i];
    }
  }
-  out_shape.push_back(outer);
+  out_shape.push_back(outer);  // NOLINT
  for (int i = stop_axis + 1; i < in_dims_size; i++) {
-    out_shape.push_back(x_dims[i]);
+    out_shape.push_back(x_dims[i]);  // NOLINT
  }
  const auto& out_dims = phi::make_ddim(out_shape);
  out->set_dims(out_dims);
@@ -1600,9 +1602,10 @@ void FoldInferMeta(const MetaTensor& x,

  std::vector<int> out_dims;
  // batch_size
-  out_dims.push_back(in_dims[0]);
+  out_dims.push_back(in_dims[0]);  // NOLINT
  // output_plane
-  int output_channels = in_dims[1] / (kernel_width * kernel_height);
+  int output_channels =
+      static_cast<int>(in_dims[1] / (kernel_width * kernel_height));
  out_dims.push_back(output_channels);

  int blocks_height = (output_sizes[0] + 2 * paddings[0] -
@@ -1730,11 +1733,11 @@ void FrameInferMeta(const MetaTensor& x,
  int end_axis;

  if (axis == 0) {
-    seq_length = x_dims[0];
+    seq_length = static_cast<int>(x_dims[0]);
    start_axis = 1;
    end_axis = x_rank - 1;
  } else {
-    seq_length = x_dims[x_rank - 1];
+    seq_length = static_cast<int>(x_dims[x_rank - 1]);
    start_axis = 0;
    end_axis = x_rank - 2;
  }
@@ -1859,11 +1862,11 @@ static phi::DDim ValidateShape(const std::vector<int64_t> shape,
              "be -1. But received shape = [%s], shape[%d] is also -1.",
              phi::make_ddim(shape),
              i));
-      unk_dim_idx = i;
+      unk_dim_idx = static_cast<int>(i);
      output_shape[i] = shape[i];
    } else if (shape[i] == 0) {
      if (static_cast<int>(i) < in_dims.size()) {
-        output_shape[i] = in_dims[i];
+        output_shape[i] = in_dims[static_cast<int>(i)];
      } else {
        PADDLE_ENFORCE_EQ(
            in_size,
@@ -1966,7 +1969,7 @@ void InferMetaFromVecValue(const MetaTensor& x,

 void InverseInferMeta(const MetaTensor& x, MetaTensor* out) {
  auto input_dims = x.dims();
-  int64_t input_rank = input_dims.size();
+  int input_rank = static_cast<int>(input_dims.size());
  PADDLE_ENFORCE_GE(
      input_rank,
      2,
@@ -1975,7 +1978,7 @@ void InverseInferMeta(const MetaTensor& x, MetaTensor* out) {
          "But received: Input(Input)'s dimension = %d, shape = [%s].",
          input_rank,
          input_dims));
-  for (int64_t i = 0; i < input_rank; ++i) {
+  for (int i = 0; i < input_rank; ++i) {
    PADDLE_ENFORCE_EQ(
        (input_dims[i] == -1) || (input_dims[i] > 0),
        true,
@@ -2065,13 +2068,13 @@ void KthvalueInferMeta(const MetaTensor& x,
            axis));
  }
  std::vector<int64_t> dimvec;
-  for (int64_t i = 0; i < axis; i++) {
+  for (int i = 0; i < axis; i++) {
    dimvec.emplace_back(input_dims[i]);
  }
  if (keepdim && dim_size > 0) {
    dimvec.emplace_back(static_cast<int64_t>(1));
  }
-  for (int64_t i = axis + 1; i < dim_size; i++) {
+  for (int i = axis + 1; i < dim_size; i++) {
    dimvec.emplace_back(input_dims[i]);
  }
  DDim dims = phi::make_ddim(dimvec);
@@ -2148,8 +2151,8 @@ void LUInferMeta(const MetaTensor& x,
      phi::errors::InvalidArgument("The rank of input must greater than 2."));
  out->set_dims(x_dims);
  out->set_dtype(x.dtype());
-  int m = x_dims[x_rank - 1];
-  int n = x_dims[x_rank - 2];
+  int m = static_cast<int>(x_dims[x_rank - 1]);
+  int n = static_cast<int>(x_dims[x_rank - 2]);
  int min_mn = std::min(m, n);
  auto dims_vec = phi::vectorize(x_dims);
  PADDLE_ENFORCE_NOT_NULL(
@@ -2187,8 +2190,8 @@ void MatrixRankInferMeta(const MetaTensor& x,
                        "The dims of input must be greater than 2."));

  if (hermitian) {
-    int rows = dim_x[dim_x.size() - 2];
-    int cols = dim_x[dim_x.size() - 1];
+    int rows = static_cast<int>(dim_x[dim_x.size() - 2]);
+    int cols = static_cast<int>(dim_x[dim_x.size() - 1]);
    PADDLE_ENFORCE_EQ(rows,
                      cols,
                      phi::errors::InvalidArgument(
@@ -2268,7 +2271,7 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x,

  if (global_pooling) {
    kernel_size_.resize(static_cast<size_t>(x_dims.size()) - 2);
-    for (size_t i = 0; i < kernel_size_.size(); ++i) {
+    for (int i = 0; i < static_cast<int>(kernel_size_.size()); ++i) {
      paddings_[i] = 0;
      kernel_size_[i] = static_cast<int>(x_dims[i + 2]);
    }
@@ -2301,12 +2304,15 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x,
    output_shape.insert(
        output_shape.end(), kernel_size_.begin(), kernel_size_.end());
  } else {
-    for (size_t i = 0; i < kernel_size_.size(); ++i) {
+    for (int i = 0; i < static_cast<int>(kernel_size_.size()); ++i) {
      if ((!config.is_runtime) && (x_dims[i + 2] < 0)) {
        output_shape.push_back(x_dims[i + 2]);
      } else {
-        output_shape.push_back(funcs::MaxPoolOutputSize(
-            x_dims[i + 2], kernel_size_[i], paddings_[i], strides[i]));
+        output_shape.push_back(
+            funcs::MaxPoolOutputSize(static_cast<int>(x_dims[i + 2]),
+                                     kernel_size_[i],
+                                     paddings_[i],
+                                     strides[i]));
      }
    }
  }
@@ -2365,13 +2371,13 @@ void ModeInferMeta(const MetaTensor& x,
  }

  std::vector<int64_t> dimvec;
-  for (int64_t i = 0; i < axis; i++) {
+  for (int i = 0; i < axis; i++) {
    dimvec.emplace_back(input_dims[i]);
  }
  if (keepdim && dim_size > 0) {
    dimvec.emplace_back(static_cast<int64_t>(1));
  }
-  for (int64_t i = axis + 1; i < dim_size; i++) {
+  for (int i = axis + 1; i < dim_size; i++) {
    dimvec.emplace_back(input_dims[i]);
  }
  DDim dims = phi::make_ddim(dimvec);
@@ -2391,7 +2397,7 @@ void MultinomialInferMeta(const MetaTensor& x,
                          MetaConfig config) {
  auto int_num_samples = num_samples.to<int>();
  auto x_dim = x.dims();
-  int64_t x_rank = x_dim.size();
+  int x_rank = static_cast<int>(x_dim.size());
  PADDLE_ENFORCE_GT(x_rank,
                    0,
                    errors::InvalidArgument(
@@ -2406,7 +2412,7 @@ void MultinomialInferMeta(const MetaTensor& x,
                        x_rank));

  std::vector<int64_t> out_dims(x_rank);
-  for (int64_t i = 0; i < x_rank - 1; i++) {
+  for (int i = 0; i < x_rank - 1; i++) {
    out_dims[i] = x_dim[i];
  }

@@ -2484,7 +2490,7 @@ void NanmedianInferMeta(const MetaTensor& x,
    for (int64_t i = 0; i < x_rank; i++) {
      if (std::find(formated_axis.begin(), formated_axis.end(), i) ==
          formated_axis.end()) {
-        out_dim.push_back(x_dim[i]);
+        out_dim.push_back(x_dim[i]);  // NOLINT
      } else if (keep_dim) {
        out_dim.push_back(1);
      }
@@ -2611,13 +2617,13 @@ void OverlapAddInferMeta(const MetaTensor& x,
  int start_axis;
  int end_axis;
  if (axis == 0) {
-    n_frames = x_dims[0];
-    frame_length = x_dims[1];
+    n_frames = static_cast<int>(x_dims[0]);
+    frame_length = static_cast<int>(x_dims[1]);
    start_axis = 2;
    end_axis = x_rank - 1;
  } else {
-    n_frames = x_dims[x_rank - 1];
-    frame_length = x_dims[x_rank - 2];
+    n_frames = static_cast<int>(x_dims[x_rank - 1]);
+    frame_length = static_cast<int>(x_dims[x_rank - 2]);
    start_axis = 0;
    end_axis = x_rank - 3;
  }
@@ -3127,12 +3133,13 @@ void PoolInferMeta(const MetaTensor& x,
      if ((!config.is_runtime) && (data_dims[i] < 0)) {
        output_shape.push_back(data_dims[i]);
      } else {
-        output_shape.push_back(funcs::PoolOutputSize(data_dims[i],
-                                                     kernel_size_[i],
-                                                     paddings_[2 * i],
-                                                     paddings_[2 * i + 1],
-                                                     strides[i],
-                                                     ceil_mode));
+        output_shape.push_back(
+            funcs::PoolOutputSize(static_cast<int>(data_dims[i]),
+                                  kernel_size_[i],
+                                  paddings_[2 * i],
+                                  paddings_[2 * i + 1],
+                                  strides[i],
+                                  ceil_mode));
      }
    }
  }
@@ -3169,8 +3176,8 @@ void QrInferMeta(const MetaTensor& x,
      phi::errors::InvalidArgument("the rank of input must greater than 2"));
  bool compute_q;
  bool reduced_mode;
-  int m = x_dims[x_rank - 2];
-  int n = x_dims[x_rank - 1];
+  int m = static_cast<int>(x_dims[x_rank - 2]);
+  int n = static_cast<int>(x_dims[x_rank - 1]);
  int min_mn = std::min(m, n);
  std::tie(compute_q, reduced_mode) = phi::funcs::ParseQrMode(mode);

@@ -3199,7 +3206,7 @@ DDim ReduceInferDim(const MetaTensor& x,
                    const std::vector<int64_t>& axis,
                    bool keep_dim,
                    bool reduce_all) {
-  auto x_rank = x.dims().size();
+  int x_rank = x.dims().size();

  std::vector<int64_t> formated_axis = axis;
  for (size_t i = 0; i < axis.size(); ++i) {
@@ -3246,7 +3253,7 @@ DDim ReduceInferDim(const MetaTensor& x,
  reduce_all = reduce_all || full_dim;

  std::vector<int64_t> out_dim_vector;
-  for (int64_t i = 0; i < x_rank; ++i) {
+  for (int i = 0; i < x_rank; ++i) {
    if (reduce_all || dims_set.find(i) != dims_set.end()) {
      if (keep_dim) {
        out_dim_vector.push_back(1);
@@ -3452,7 +3459,7 @@ void ReverseInferMeta(const MetaTensor& x,
                    true,
                    phi::errors::InvalidArgument("'axis' can not be empty."));
  const auto& x_dims = x.dims();
-  for (int a : axis_data) {
+  for (int64_t a : axis_data) {
    PADDLE_ENFORCE_LT(a,
                      x_dims.size(),
                      phi::errors::OutOfRange(
@@ -3827,14 +3834,14 @@ void SplitInferMeta(const MetaTensor& x,
    int num_of_unknow = 0;
    int sum_of_section = 0;

-    for (size_t i = 0; i < sections_data.size(); ++i) {
+    for (int i = 0; i < static_cast<int>(sections_data.size()); ++i) {
      sections_vec.push_back(sections_data[i]);

      if (sections_data[i] == unknow_dim_val) {
        num_of_unknow++;
        unknow_dim_idx = i;
      } else {
-        sum_of_section += sections_data[i];
+        sum_of_section += static_cast<int>(sections_data[i]);
      }
    }

@@ -3958,7 +3965,7 @@ void SqueezeInferMeta(const MetaTensor& x,

  if (!config.is_runtime && axes.FromTensor()) {
    // compile time infershape, set all elements to -1.
-    int output_size = x.dims().size() - axes.GetData().size();
+    int output_size = static_cast<int>(x.dims().size() - axes.GetData().size());
    if (x.dims().size() == 0 && output_size == -1) {
      output_size = 0;
    }
@@ -3970,7 +3977,7 @@ void SqueezeInferMeta(const MetaTensor& x,
    tmp.reserve(axes.GetData().size());
    std::for_each(axes.GetData().begin(),
                  axes.GetData().end(),
-                  [&tmp](const int64_t& t) { tmp.push_back(t); });
+                  [&tmp](const int64_t& t) { tmp.push_back(t); });  // NOLINT
    auto out_dims = funcs::GetOutputSqueezeShape(tmp, x_dims, false);
    out->set_dims(out_dims);
    if (x_dims[0] == out_dims[0]) {
@@ -4221,8 +4228,8 @@ void SvdInferMeta(const MetaTensor& x,
      in_dims.size(),
      2,
      phi::errors::InvalidArgument("the rank of input must greater than 2"));
-  int m = in_dims[x_rank - 2];
-  int n = in_dims[x_rank - 1];
+  int m = static_cast<int>(in_dims[x_rank - 2]);
+  int n = static_cast<int>(in_dims[x_rank - 1]);
  int k = std::min(m, n);
  u->set_dims(!full_matrices ? UDDim(in_dims, k) : UDDim(in_dims, m));
  vh->set_dims(!full_matrices ? VHDDim(in_dims, k) : VHDDim(in_dims, n));
@@ -4495,7 +4502,7 @@ void TransposeInferMeta(const MetaTensor& x,
                        MetaTensor* out) {
  auto x_dims = x.dims();
  int x_rank = x_dims.size();
-  int axis_size = axis.size();
+  int axis_size = static_cast<int>(axis.size());

  // Note: x_rank > axis_size when fuse squeeze2 + transpose2, else x_rank ==
  // axis_size
@@ -4572,7 +4579,7 @@ void UnbindInferMeta(const MetaTensor& x,
  axis = axis < 0 ? in_dims.size() + axis : axis;

  for (int i = 0; i < in_dims.size(); ++i) {
-    if (i != axis) out_dim.push_back(in_dims[i]);
+    if (i != axis) out_dim.push_back(in_dims[i]);  // NOLINT
  }
  auto out_dims = phi::make_ddim(out_dim);

@@ -4746,17 +4753,18 @@ void UnfoldInferMeta(const MetaTensor& x,
          dilations[1]));

  std::vector<int> out_dims;
-  out_dims.push_back(in_dims[0]);
-  int output_channels = in_dims[1] * kernel_sizes[0] * kernel_sizes[1];
+  out_dims.push_back(in_dims[0]);  // NOLINT
+  int output_channels =
+      static_cast<int>(in_dims[1] * kernel_sizes[0] * kernel_sizes[1]);
  out_dims.push_back(output_channels);

-  int output_height = phi::funcs::CalcOutputSize(in_dims[2],
+  int output_height = phi::funcs::CalcOutputSize(static_cast<int>(in_dims[2]),
                                                 kernel_sizes[0],
                                                 dilations[0],
                                                 paddings[0],
                                                 paddings[2],
                                                 strides[0]);
-  int output_width = phi::funcs::CalcOutputSize(in_dims[3],
+  int output_width = phi::funcs::CalcOutputSize(static_cast<int>(in_dims[3]),
                                                kernel_sizes[1],
                                                dilations[1],
                                                paddings[1],
@@ -5028,7 +5036,7 @@ void UnsqueezeInferMeta(const MetaTensor& x,
                        "should be in the range of [1, 6] (Eigen limit)"));
  if (!config.is_runtime && axes.FromTensor()) {
    // compile time infershape.  set all elements to -1.
-    int output_size = x.dims().size() + axes.GetData().size();
+    int output_size = static_cast<int>(x.dims().size() + axes.GetData().size());
    std::vector<int64_t> vec_out_dims(output_size, -1);
    out->set_dtype(x.dtype());
    out->set_dims(phi::make_ddim(vec_out_dims));

--- a/paddle/phi/kernels/cpu/lars_momentum_kernel.cc
+++ b/paddle/phi/kernels/cpu/lars_momentum_kernel.cc
@@ -36,7 +36,7 @@ void LarsMomentumKernel(
    std::vector<DenseTensor*> param_out,
    std::vector<DenseTensor*> velocity_out,
    std::vector<DenseTensor*> master_param_out) {
-  int op_num = param.size();
+  int op_num = static_cast<int>(param.size());
  T mu_ = static_cast<T>(mu);
  for (int i = 0; i < op_num; ++i) {
    auto* lr = learning_rate[i]->data<T>();

--- a/paddle/phi/kernels/cpu/prelu_kernel.cc
+++ b/paddle/phi/kernels/cpu/prelu_kernel.cc
@@ -30,7 +30,7 @@ void PReluKernel(const Context& dev_ctx,
  const T* alpha_ptr = alpha.data<T>();
  T* o_ptr = dev_ctx.template Alloc<T>(out);

-  int numel = x.numel();
+  int numel = static_cast<int>(x.numel());
  auto dim = x.dims();
  int index = 0;
  int i = 0;
@@ -38,22 +38,22 @@ void PReluKernel(const Context& dev_ctx,
    if (data_format == "NCHW") {
      int temp = 1;
      for (int j = 2; j < dim.size(); j++) {
-        temp *= dim[j];
+        temp *= static_cast<int>(dim[j]);
      }
      for (i = 0; i < numel; i++) {
-        index = (i / temp) % dim[1];
+        index = static_cast<int>((i / temp) % dim[1]);
        o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[index] * x_ptr[i];
      }
    } else {
      for (i = 0; i < numel; i++) {
-        index = i % dim[dim.size() - 1];
+        index = static_cast<int>(i % dim[dim.size() - 1]);
        o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[index] * x_ptr[i];
      }
    }
  } else if (mode == "element") {
    int temp = 1;
    for (int j = 1; j < dim.size(); j++) {
-      temp *= dim[j];
+      temp *= static_cast<int>(dim[j]);
    }
    for (i = 0; i < numel; i++) {
      index = i % temp;

--- a/paddle/phi/kernels/cpu/prior_box_kernel.cc
+++ b/paddle/phi/kernels/cpu/prior_box_kernel.cc
@@ -57,9 +57,10 @@ void PriorBoxKernel(const Context& ctx,
    step_height = new_step_h;
  }

-  int num_priors = new_aspect_ratios.size() * min_sizes.size();
+  int num_priors =
+      static_cast<int>(new_aspect_ratios.size() * min_sizes.size());
  if (!max_sizes.empty()) {
-    num_priors += max_sizes.size();
+    num_priors += static_cast<int>(max_sizes.size());
  }

  ctx.template Alloc<T>(out);
@@ -148,7 +149,7 @@ void PriorBoxKernel(const Context& ctx,
    var_et(0, i) = variances[i];
  }

-  int box_num = feature_height * feature_width * num_priors;
+  int box_num = static_cast<int>(feature_height * feature_width * num_priors);
  auto var_dim = var->dims();
  var->Resize({box_num, static_cast<int>(variances.size())});


--- a/paddle/phi/kernels/cpu/psroi_pool_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/psroi_pool_grad_kernel.cc
@@ -34,10 +34,10 @@ void PsroiPoolGradKernel(const Context& ctx,
                         DenseTensor* dx) {
  if (dx) {
    const auto& in_dims = x.dims();
-    int input_channels = in_dims[1];
-    int height = in_dims[2];
-    int width = in_dims[3];
-    int rois_num_t = rois.dims()[0];
+    int input_channels = static_cast<int>(in_dims[1]);
+    int height = static_cast<int>(in_dims[2]);
+    int width = static_cast<int>(in_dims[3]);
+    int rois_num_t = static_cast<int>(rois.dims()[0]);

    // set roi batch id
    DenseTensor rois_batch_id_list;
@@ -45,7 +45,7 @@ void PsroiPoolGradKernel(const Context& ctx,
    int* rois_batch_id_data = ctx.template Alloc<int>(&rois_batch_id_list);
    int rois_batch_size;
    if (rois_num.get_ptr()) {
-      rois_batch_size = rois_num->numel();
+      rois_batch_size = static_cast<int>(rois_num->numel());
      auto* rois_num_t_data = rois_num->data<int>();
      int start = 0;
      for (int n = 0; n < rois_batch_size; ++n) {
@@ -56,7 +56,7 @@ void PsroiPoolGradKernel(const Context& ctx,
      }
    } else {
      auto rois_lod = rois.lod().back();
-      rois_batch_size = rois_lod.size() - 1;
+      rois_batch_size = static_cast<int>(rois_lod.size()) - 1;
      // calculate batch id index for each roi according to LoD
      for (int n = 0; n < rois_batch_size; ++n) {
        for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
@@ -73,7 +73,7 @@ void PsroiPoolGradKernel(const Context& ctx,
    set_zero(ctx, dx, static_cast<T>(0));

    // backpropagate gradient per output pixel
-    int dout_size = dout.numel();
+    int dout_size = static_cast<int>(dout.numel());
    for (int i = 0; i < dout_size; ++i) {
      // The output is in order (n, c, ph, pw)
      int pw = i % pooled_width;

--- a/paddle/phi/kernels/cpu/psroi_pool_kernel.cc
+++ b/paddle/phi/kernels/cpu/psroi_pool_kernel.cc
@@ -31,11 +31,11 @@ void PsroiPoolKernel(const Context& ctx,
                     float spatial_scale,
                     DenseTensor* out) {
  auto in_dims = x.dims();
-  int batch_size = in_dims[0];
-  int input_channels = in_dims[1];
-  int height = in_dims[2];
-  int width = in_dims[3];
-  int rois_num_t = rois.dims()[0];
+  int batch_size = static_cast<int>(in_dims[0]);
+  int input_channels = static_cast<int>(in_dims[1]);
+  int height = static_cast<int>(in_dims[2]);
+  int width = static_cast<int>(in_dims[3]);
+  int rois_num_t = static_cast<int>(rois.dims()[0]);

  PADDLE_ENFORCE_EQ(input_channels,
                    output_channels * pooled_height * pooled_width,
@@ -55,7 +55,7 @@ void PsroiPoolKernel(const Context& ctx,

  int rois_batch_size;
  if (rois_num.get_ptr()) {
-    rois_batch_size = rois_num->numel();
+    rois_batch_size = static_cast<int>(rois_num->numel());
    auto* rois_num_data = rois_num->data<int>();
    PADDLE_ENFORCE_EQ(
        rois_batch_size,
@@ -84,13 +84,13 @@ void PsroiPoolKernel(const Context& ctx,
    }
  } else {
    auto rois_lod = rois.lod().back();
-    rois_batch_size = rois_lod.size() - 1;
+    rois_batch_size = static_cast<int>(rois_lod.size()) - 1;
    PADDLE_ENFORCE_EQ(
        rois_batch_size,
        batch_size,
        errors::InvalidArgument("the rois_batch_size and input(X) "
                                "batch_size should be the same."));
-    int rois_num_with_lod = rois_lod[rois_batch_size];
+    int rois_num_with_lod = static_cast<int>(rois_lod[rois_batch_size]);
    PADDLE_ENFORCE_EQ(rois_num_with_lod,
                      rois_num_t,
                      errors::InvalidArgument(
@@ -127,12 +127,14 @@ void PsroiPoolKernel(const Context& ctx,
    T bin_size_w = roi_width / static_cast<T>(pooled_width);

    // calculate each pixel of the output feature map.
-    int out_roi_offset = n * out_stride[0];
+    int out_roi_offset = static_cast<int>(n * out_stride[0]);
    for (int c = 0; c < output_channels; ++c) {
      // per category
-      int out_plane_offset = out_roi_offset + c * out_stride[1];
+      int out_plane_offset =
+          static_cast<int>(out_roi_offset + c * out_stride[1]);
      for (int ph = 0; ph < pooled_height; ++ph) {
-        int out_row_offset = out_plane_offset + ph * out_stride[2];
+        int out_row_offset =
+            static_cast<int>(out_plane_offset + ph * out_stride[2]);
        for (int pw = 0; pw < pooled_width; ++pw) {
          // calculate w and h at input feature map
          int hstart = floor(static_cast<T>(ph) * bin_size_h + roi_start_h);
@@ -147,14 +149,14 @@ void PsroiPoolKernel(const Context& ctx,

          int output_index = out_row_offset + pw;
          int input_channel = (c * pooled_height + ph) * pooled_width + pw;
-          int input_plane_offset =
-              roi_batch_id * in_stride[0] + input_channel * in_stride[1];
+          int input_plane_offset = static_cast<int>(
+              roi_batch_id * in_stride[0] + input_channel * in_stride[1]);
          const T* offset_input_data = input_data + input_plane_offset;
          T out_sum = 0.;
          bool is_empty = (hend <= hstart) || (wend <= wstart);
          for (int ih = hstart; ih < hend; ++ih) {
            for (int iw = wstart; iw < wend; ++iw) {
-              int input_index = ih * in_stride[2] + iw;
+              int input_index = static_cast<int>(ih * in_stride[2] + iw);
              out_sum += offset_input_data[input_index];
            }
          }

--- a/paddle/phi/kernels/cpu/qr_kernel.cc
+++ b/paddle/phi/kernels/cpu/qr_kernel.cc
@@ -37,11 +37,11 @@ void QrKernel(const Context& ctx,
      numel, 0, errors::PreconditionNotMet("The input of QR is empty."));
  auto x_dims = x.dims();
  int x_rank = x_dims.size();
-  int m = x_dims[x_rank - 2];
-  int n = x_dims[x_rank - 1];
+  int m = static_cast<int>(x_dims[x_rank - 2]);
+  int n = static_cast<int>(x_dims[x_rank - 1]);
  int min_mn = std::min(m, n);
  int k = reduced_mode ? min_mn : m;
-  int batch_size = numel / (m * n);
+  int batch_size = static_cast<int>(numel / (m * n));
  int x_stride = m * n;
  int q_stride = m * k;
  int r_stride = k * n;

--- a/paddle/phi/kernels/cpu/rnn_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/rnn_grad_kernel.cc
@@ -83,8 +83,8 @@ struct GradCell {
      auto& place = *dev_ctx.eigen_device();
      auto mask = EigenMatrix<T>::From(
          mask_tensor, phi::make_ddim({mask_tensor.dims()[1], 1}));
-      auto mask_broadcast =
-          mask.broadcast(Eigen::DSizes<int, 2>(1, grad_pre_hidden->dims()[2]));
+      auto mask_broadcast = mask.broadcast(Eigen::DSizes<int, 2>(
+          1, static_cast<int>(grad_pre_hidden->dims()[2])));
      auto pre_hidden_grad = EigenMatrix<T>::Reshape(
          *grad_pre_hidden, grad_pre_hidden->dims().size() - 1);
      auto pre_hidden_bak_grad = EigenMatrix<T>::Reshape(
@@ -599,8 +599,8 @@ struct GradLayer {
    auto& place = *dev_ctx.eigen_device();
    auto mask = EigenMatrix<T>::From(
        mask_tensor, phi::make_ddim({mask_tensor.dims()[1], 1}));
-    auto mask_broadcast =
-        mask.broadcast(Eigen::DSizes<int, 2>(1, grad_output->dims()[2]));
+    auto mask_broadcast = mask.broadcast(
+        Eigen::DSizes<int, 2>(1, static_cast<int>(grad_output->dims()[2])));

    auto last_h_grad =
        EigenMatrix<T>::Reshape(*grad_last_h, grad_last_h->dims().size() - 1);
@@ -716,8 +716,8 @@ struct SingleGradLayer : GradLayer<T, GradCellType> {
    phi::funcs::SetConstant<CPUContext, T> zero;
    zero(dev_ctx, input_grad, static_cast<T>(0.0));

-    int time_step = input->dims()[0];
-    int batch_size = input->dims()[1];
+    int time_step = static_cast<int>(input->dims()[0]);
+    int batch_size = static_cast<int>(input->dims()[1]);
    int direction_num = is_bidirec ? 2 : 1;

    // in this section, create the gate_state_grad for the postprocess calculate
@@ -825,8 +825,8 @@ struct BidirGradLayer : GradLayer<T, GradCellType> {
                  int hidden_size,
                  const std::string& mode,
                  int gate_num) {
-    int time_step = input->dims()[0];
-    int batch_size = input->dims()[1];
+    int time_step = static_cast<int>(input->dims()[0]);
+    int batch_size = static_cast<int>(input->dims()[1]);
    int direction_num = is_bidirec ? 2 : 1;
    // split the output two tensor to output_forward, output_backward
    phi::funcs::SetConstant<CPUContext, T> zero;
@@ -1009,8 +1009,8 @@ void RnnGradFunc(const CPUContext& dev_ctx,
  }

  // get the input_size, batch_size, time_step
-  const int time_step = x.dims()[0];
-  const int batch_size = x.dims()[1];
+  const int time_step = static_cast<int>(x.dims()[0]);
+  const int batch_size = static_cast<int>(x.dims()[1]);
  const int direction_num = is_bidirec ? 2 : 1;

  // allocate the memory and initization the x_grad

--- a/paddle/phi/kernels/cpu/rnn_kernel.cc
+++ b/paddle/phi/kernels/cpu/rnn_kernel.cc
@@ -218,7 +218,7 @@ struct Layer {
                  bool is_test,
                  DenseTensor* cache_input) {
    // crate the temp input for the X * W_ih^T + Bias_ih
-    const int& hidden_size = weight.dims()[0];
+    const int& hidden_size = weight.dims()[0];  // NOLINT
    cache_input->Resize(
        phi::make_ddim({input.dims()[0], input.dims()[1], hidden_size}));
    if (is_test) {
@@ -242,8 +242,8 @@ struct Layer {
        EigenMatrix<T>::Reshape(*cache_input, cache_input->dims().size() - 1);
    auto bias_ih_tmp =
        EigenMatrix<T>::From(bias_ih, phi::make_ddim({1, bias_ih.dims()[0]}));
-    const int row_num =
-        phi::product(cache_input->dims()) / cache_input->dims()[2];
+    const int row_num = static_cast<int>(phi::product(cache_input->dims()) /
+                                         cache_input->dims()[2]);
    in = in + bias_ih_tmp.broadcast(Eigen::DSizes<int, 2>(row_num, 1));
    if (is_gru(mode)) {
      // reset_gate update_gate cell_gate = [1, 1, 0]
@@ -279,8 +279,8 @@ struct Layer {
        mask_tensor, phi::make_ddim({mask_tensor.dims()[1], 1}));
    auto pre_h = EigenMatrix<T>::Reshape(*init_h, init_h->dims().size() - 1);
    auto curr_h = EigenMatrix<T>::Reshape(*last_h, last_h->dims().size() - 1);
-    auto mask_broadcast =
-        mask.broadcast(Eigen::DSizes<int, 2>(1, output->dims()[2]));
+    auto mask_broadcast = mask.broadcast(
+        Eigen::DSizes<int, 2>(1, static_cast<int>(output->dims()[2])));
    curr_h.device(place) = out * mask_broadcast + pre_h * (1 - mask_broadcast);
    out.device(place) = out * mask_broadcast;

@@ -332,7 +332,7 @@ struct Layer {
        is_reverse = true;
      }
    }
-    const int time_step = input->dims()[0];
+    const int time_step = static_cast<int>(input->dims()[0]);
    this->preprocess(dev_ctx,
                     *input,
                     vec[0 + offset * 4],
@@ -532,7 +532,7 @@ struct Layer {
        is_reverse = true;
      }
    }
-    const int time_step = input->dims()[0];
+    const int time_step = static_cast<int>(input->dims()[0]);
    this->preprocess(dev_ctx,
                     *input,
                     vec[0 + offset * 4],
@@ -749,9 +749,9 @@ struct BidirLayer : public Layer<T, CellType> {
    std::vector<DenseTensor> output_vec(2);
    DenseTensor forward_input_w, forward_cell_value, forward_cell_act_value;
    DenseTensor backward_input_w, backward_cell_value, backward_cell_act_value;
-    int time_step = input->dims()[0];
-    int batch_size = input->dims()[1];
-    int hidden_size = output->dims()[2];
+    int time_step = static_cast<int>(input->dims()[0]);
+    int batch_size = static_cast<int>(input->dims()[1]);
+    int hidden_size = static_cast<int>(output->dims()[2]);
    for (int i = 0; i < 2; ++i) {
      output_vec[i].Resize({time_step, batch_size, hidden_size / 2});
      dev_ctx.Alloc<T>(&output_vec[i]);

--- a/paddle/phi/kernels/cpu/roi_align_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/roi_align_grad_kernel.cc
@@ -85,7 +85,7 @@ void RoiAlignGradKernel(const Context& dev_ctx,
  int channels = in_dims[1];
  int height = in_dims[2];
  int width = in_dims[3];
-  int rois_num = boxes.dims()[0];
+  int rois_num = static_cast<int>(boxes.dims()[0]);

  if (!dx) {
    return;
@@ -96,7 +96,7 @@ void RoiAlignGradKernel(const Context& dev_ctx,

  int boxes_batch_size;
  if (boxes_num) {
-    boxes_batch_size = boxes_num->numel();
+    boxes_batch_size = static_cast<int>(boxes_num->numel());
    auto* boxes_num_data = boxes_num->data<int>();
    int start = 0;
    for (int n = 0; n < boxes_batch_size; ++n) {
@@ -107,7 +107,7 @@ void RoiAlignGradKernel(const Context& dev_ctx,
    }
  } else {
    auto boxes_lod = boxes.lod().back();
-    boxes_batch_size = boxes_lod.size() - 1;
+    boxes_batch_size = static_cast<int>(boxes_lod.size() - 1);
    for (int n = 0; n < boxes_batch_size; ++n) {
      for (std::size_t i = boxes_lod[n]; i < boxes_lod[n + 1]; ++i) {
        box_batch_id_data[i] = n;
@@ -119,7 +119,7 @@ void RoiAlignGradKernel(const Context& dev_ctx,
  phi::funcs::SetConstant<Context, T> set_zero;
  set_zero(dev_ctx, dx, static_cast<T>(0));

-  int output_grad_size = out_grad.numel();
+  int output_grad_size = static_cast<int>(out_grad.numel());

  if ((!out_grad.IsInitialized()) || (output_grad_size <= 0)) {
    return;

--- a/paddle/phi/kernels/cpu/roi_align_kernel.cc
+++ b/paddle/phi/kernels/cpu/roi_align_kernel.cc
@@ -187,11 +187,11 @@ void RoiAlignKernel(const Context& dev_ctx,
                    bool aligned,
                    DenseTensor* out) {
  auto in_dims = x.dims();
-  int batch_size = in_dims[0];
-  int channels = in_dims[1];
-  int height = in_dims[2];
-  int width = in_dims[3];
-  int rois_num = boxes.dims()[0];
+  int batch_size = static_cast<int>(in_dims[0]);
+  int channels = static_cast<int>(in_dims[1]);
+  int height = static_cast<int>(in_dims[2]);
+  int width = static_cast<int>(in_dims[3]);
+  int rois_num = static_cast<int>(boxes.dims()[0]);

  if (rois_num == 0) {
    dev_ctx.template Alloc<T>(out);
@@ -207,7 +207,7 @@ void RoiAlignKernel(const Context& dev_ctx,
  int* roi_batch_id_data = roi_batch_id_list.data<int>();
  int boxes_batch_size;
  if (boxes_num) {
-    boxes_batch_size = boxes_num->numel();
+    boxes_batch_size = static_cast<int>(boxes_num->numel());
    PADDLE_ENFORCE_EQ(
        boxes_batch_size,
        batch_size,
@@ -233,7 +233,7 @@ void RoiAlignKernel(const Context& dev_ctx,
        errors::InvalidArgument("Input(ROIs) Tensor of ROIAlignOp "
                                "does not contain LoD information."));
    auto boxes_lod = lod.back();
-    int boxes_batch_size = boxes_lod.size() - 1;
+    int boxes_batch_size = static_cast<int>(boxes_lod.size() - 1);
    PADDLE_ENFORCE_EQ(
        boxes_batch_size,
        batch_size,
@@ -243,7 +243,7 @@ void RoiAlignKernel(const Context& dev_ctx,
            "batch_size = %d",
            boxes_batch_size,
            batch_size));
-    int boxes_num_with_lod = boxes_lod[boxes_batch_size];
+    int boxes_num_with_lod = static_cast<int>(boxes_lod[boxes_batch_size]);
    PADDLE_ENFORCE_EQ(
        rois_num,
        boxes_num_with_lod,

--- a/paddle/phi/kernels/cpu/roi_pool_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/roi_pool_grad_kernel.cc
@@ -33,13 +33,13 @@ void RoiPoolGradKernel(const Context& dev_ctx,
                       float spatial_scale,
                       DenseTensor* dx) {
  if (dx) {
-    int rois_num = boxes.dims()[0];
+    int rois_num = static_cast<int>(boxes.dims()[0]);
    DenseTensor box_batch_id_list = Empty<int>(dev_ctx, {rois_num});
    int* box_batch_id_data = box_batch_id_list.data<int>();

    int boxes_batch_size;
    if (boxes_num) {
-      boxes_batch_size = boxes_num->numel();
+      boxes_batch_size = static_cast<int>(boxes_num->numel());
      auto* boxes_num_data = boxes_num->data<int>();
      int start = 0;
      for (int n = 0; n < boxes_batch_size; ++n) {
@@ -50,7 +50,7 @@ void RoiPoolGradKernel(const Context& dev_ctx,
      }
    } else {
      auto boxes_lod = boxes.lod().back();
-      boxes_batch_size = boxes_lod.size() - 1;
+      boxes_batch_size = static_cast<int>(boxes_lod.size() - 1);
      for (int n = 0; n < boxes_batch_size; ++n) {
        for (size_t i = boxes_lod[n]; i < boxes_lod[n + 1]; ++i) {
          box_batch_id_data[i] = n;
@@ -71,7 +71,7 @@ void RoiPoolGradKernel(const Context& dev_ctx,
    auto roi_stride = phi::stride(boxes.dims());
    auto out_stride = phi::stride(out_grad.dims());

-    int channels = x.dims()[1];
+    int channels = static_cast<int>(x.dims()[1]);

    for (int n = 0; n < rois_num; ++n) {
      int roi_batch_idx = box_batch_id_data[n];

--- a/paddle/phi/kernels/cpu/roi_pool_kernel.cc
+++ b/paddle/phi/kernels/cpu/roi_pool_kernel.cc
@@ -31,11 +31,11 @@ void RoiPoolKernel(const Context& dev_ctx,
                   DenseTensor* out,
                   DenseTensor* arg_max) {
  auto x_dims = x.dims();
-  int batch_size = x_dims[0];
-  int channels = x_dims[1];
-  int height = x_dims[2];
-  int width = x_dims[3];
-  int rois_num = boxes.dims()[0];
+  int batch_size = static_cast<int>(x_dims[0]);
+  int channels = static_cast<int>(x_dims[1]);
+  int height = static_cast<int>(x_dims[2]);
+  int width = static_cast<int>(x_dims[3]);
+  int rois_num = static_cast<int>(boxes.dims()[0]);

  if (rois_num == 0) {
    dev_ctx.template Alloc<T>(out);
@@ -54,7 +54,7 @@ void RoiPoolKernel(const Context& dev_ctx,

  int boxes_batch_size;
  if (boxes_num) {
-    boxes_batch_size = boxes_num->numel();
+    boxes_batch_size = static_cast<int>(boxes_num->numel());
    PADDLE_ENFORCE_EQ(
        boxes_batch_size,
        batch_size,
@@ -70,13 +70,13 @@ void RoiPoolKernel(const Context& dev_ctx,
    }
  } else {
    auto boxes_lod = boxes.lod().back();
-    boxes_batch_size = boxes_lod.size() - 1;
+    boxes_batch_size = static_cast<int>(boxes_lod.size() - 1);
    PADDLE_ENFORCE_EQ(
        boxes_batch_size,
        batch_size,
        phi::errors::InvalidArgument("The boxes_batch_size and imgs "
                                     "batch_size must be the same."));
-    int rois_num_with_lod = boxes_lod[boxes_batch_size];
+    int rois_num_with_lod = static_cast<int>(boxes_lod[boxes_batch_size]);
    PADDLE_ENFORCE_EQ(
        rois_num,
        rois_num_with_lod,

--- a/paddle/phi/kernels/cpu/rrelu_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/rrelu_grad_kernel.cc
@@ -28,7 +28,7 @@ void RReluGradKernel(const Context& dev_ctx,
  const T* n_ptr = noise.data<T>();
  const T* x_ptr = x.data<T>();
  const T* out_grad_ptr = out_grad.data<T>();
-  int numel = x.numel();
+  int numel = static_cast<int>(x.numel());
  if (!x_grad) return;

  int i = 0;

--- a/paddle/phi/kernels/cpu/rrelu_kernel.cc
+++ b/paddle/phi/kernels/cpu/rrelu_kernel.cc
@@ -32,7 +32,7 @@ void RReluKernel(const Context& dev_ctx,
  T* o_ptr = dev_ctx.template Alloc<T>(out);
  T* n_ptr = dev_ctx.template Alloc<T>(noise);
  T zero = static_cast<T>(0);
-  int numel = x.numel();
+  int numel = static_cast<int>(x.numel());
  int i = 0;

  if (is_test) {

--- a/paddle/phi/kernels/cpu/send_u_recv_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/send_u_recv_grad_kernel.cc
@@ -79,7 +79,7 @@ void GraphSendRecvGradOpKernelLaunchHelper(
    DenseTensor* x_grad,
    const DenseTensor* dst_count = nullptr,
    const DenseTensor* out = nullptr) {
-  const int& index_size = dst_index.dims()[0];
+  const int& index_size = dst_index.dims()[0];  // NOLINT

  ctx.template Alloc<T>(x_grad);
  T* p_output = x_grad->data<T>();

--- a/paddle/phi/kernels/cpu/send_u_recv_kernel.cc
+++ b/paddle/phi/kernels/cpu/send_u_recv_kernel.cc
@@ -86,7 +86,7 @@ void GraphSendRecvOpKernelLaunchHelper(const Context& ctx,
                                       int64_t out_size,
                                       DenseTensor* out,
                                       DenseTensor* dst_count = nullptr) {
-  const int& index_size = src_index.dims()[0];
+  const int& index_size = src_index.dims()[0];  // NOLINT

  const auto& src_dims = x.dims();
  int64_t memset_size = 1;

--- a/paddle/phi/kernels/cpu/send_ue_recv_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/send_ue_recv_grad_kernel.cc
@@ -365,7 +365,7 @@ void GraphSendUERecvGradOpKernelLaunchHelper(
    DenseTensor* y_grad,
    const DenseTensor* dst_count = nullptr,
    const DenseTensor* out = nullptr) {
-  const int& index_size = dst_index.dims()[0];
+  const int& index_size = dst_index.dims()[0];  // NOLINT

  ctx.template Alloc<T>(x_grad);
  T* x_grad_data = x_grad->data<T>();

--- a/paddle/phi/kernels/cpu/send_ue_recv_kernel.cc
+++ b/paddle/phi/kernels/cpu/send_ue_recv_kernel.cc
@@ -115,7 +115,7 @@ void GraphSendUERecvOpKernelLaunchHelper(const Context& ctx,
                                         int64_t out_size,
                                         DenseTensor* out,
                                         DenseTensor* dst_count = nullptr) {
-  const int& index_size = src_index.dims()[0];
+  const int& index_size = src_index.dims()[0];  // NOLINT
  auto out_dims = out->dims();
  int64_t memset_size = 1;
  std::vector<int64_t> dims_ = phi::vectorize(out_dims);

--- a/paddle/phi/kernels/cpu/send_uv_kernel.cc
+++ b/paddle/phi/kernels/cpu/send_uv_kernel.cc
@@ -57,7 +57,7 @@ void GraphSendUVOpKernelLaunchHelper(const Context& ctx,
                                     const DenseTensor& dst_index,
                                     const std::string& message_op,
                                     DenseTensor* out) {
-  const int& index_size = src_index.dims()[0];
+  const int& index_size = src_index.dims()[0];  // NOLINT
  PADDLE_ENFORCE_GT(
      index_size,
      0,

--- a/paddle/phi/kernels/cpu/sgd_kernel.cc
+++ b/paddle/phi/kernels/cpu/sgd_kernel.cc
@@ -70,9 +70,10 @@ void sgd_dense_param_sparse_grad_impl(const DenseTensor& param,
  phi::jit::sgd_attr_t attr;
  attr.param_height = param_out->dims()[0];
  attr.param_width = param_out->numel() / attr.param_height;
-  attr.grad_height = grad_rows.size();  // note: it is not grad->height()
+  attr.grad_height =
+      static_cast<int>(grad_rows.size());  // note: it is not grad->height()
  attr.grad_width = grad_value.numel() / attr.grad_height;
-  attr.selected_rows_size = grad_rows.size();
+  attr.selected_rows_size = static_cast<int>(grad_rows.size());

  auto sgd =
      phi::jit::KernelFuncs<phi::jit::SgdTuple<T>, phi::CPUPlace>::Cache().At(

--- a/paddle/phi/kernels/cpu/shuffle_batch_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/shuffle_batch_grad_kernel.cc
@@ -29,11 +29,11 @@ void ShuffleBatchGradKernel(const Context& dev_ctx,
  auto embed_size = out_grad.dims()[out_grad.dims().size() - 1];
  auto elem_size = 1;
  for (auto i = 0; i < out_grad.dims().size() - 1; i++)
-    elem_size *= out_grad.dims()[i];
+    elem_size *= static_cast<int>(out_grad.dims()[i]);

  std::vector<int> idx_vec_grad(elem_size);
  auto* shuffleidx_data = shuffleidx.data<int64_t>();
-  for (size_t i = 0; i < idx_vec_grad.size(); i++) {
+  for (int i = 0; i < static_cast<int>(idx_vec_grad.size()); i++) {
    idx_vec_grad[shuffleidx_data[i]] = i;
  }


--- a/paddle/phi/kernels/cpu/shuffle_batch_kernel.cc
+++ b/paddle/phi/kernels/cpu/shuffle_batch_kernel.cc
@@ -27,12 +27,13 @@ void ShuffleBatchKernel(const Context& dev_ctx,
                        DenseTensor* shuffleidx,
                        DenseTensor* seed_out) {
  auto x_embed_size = x.dims()[x.dims().size() - 1];
-  auto elem_size = 1;
-  for (auto i = 0; i < x.dims().size() - 1; i++) elem_size *= x.dims()[i];
+  int elem_size = 1;
+  for (auto i = 0; i < x.dims().size() - 1; i++)
+    elem_size *= static_cast<int>(x.dims()[i]);

  std::vector<int64_t> idx_vec;  // record shuffled order
  idx_vec.reserve(elem_size);
-  for (auto i = 0; i < elem_size; i++) {
+  for (int i = 0; i < elem_size; i++) {
    idx_vec.push_back(i);
  }
  int64_t seed_int = 0;
@@ -48,7 +49,7 @@ void ShuffleBatchKernel(const Context& dev_ctx,
    std::random_device rnd;
    int64_t seed_tmp = rnd();
    std::default_random_engine rng(seed_tmp);
-    const int n = idx_vec.size();
+    const int n = static_cast<int>(idx_vec.size());
    std::vector<int> v(n);
    std::iota(v.begin(), v.end(), 0);
    std::vector<bool> visit(n, false);
@@ -73,7 +74,7 @@ void ShuffleBatchKernel(const Context& dev_ctx,
        std::shuffle(v.begin(), v.end(), rng);
        idx_vec[curr] = v.back();
        v.pop_back();
-        curr = idx_vec[curr];
+        curr = static_cast<int>(idx_vec[curr]);
      }
    }
  };

--- a/paddle/phi/kernels/cpu/sigmoid_cross_entropy_with_logits_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/sigmoid_cross_entropy_with_logits_grad_kernel.cc
@@ -31,7 +31,7 @@ void SigmoidCrossEntropyWithLogitsGradKernel(
    DenseTensor* in_grad) {
  auto dx_data = dev_ctx.template Alloc<T>(in_grad);

-  int limit = in_grad->numel();
+  int limit = static_cast<int>(in_grad->numel());
  auto x_data = x.data<T>();
  auto label_data = label.data<T>();
  auto dout_data = out_grad.data<T>();

--- a/paddle/phi/kernels/cpu/sigmoid_cross_entropy_with_logits_kernel.cc
+++ b/paddle/phi/kernels/cpu/sigmoid_cross_entropy_with_logits_kernel.cc
@@ -32,7 +32,7 @@ void SigmoidCrossEntropyWithLogitsKernel(
    int ignore_index,
    DenseTensor* out) {
  auto out_data = dev_ctx.template Alloc<T>(out);
-  int limit = out->numel();
+  int limit = static_cast<int>(out->numel());
  auto x_data = x.data<T>();
  auto label_data = label.data<T>();
  auto pos_weight_data =

--- a/paddle/phi/kernels/cpu/stack_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/stack_grad_kernel.cc
@@ -26,7 +26,7 @@ void StackGradKernel(const Context& dev_ctx,
                     int axis,
                     std::vector<DenseTensor*> x_grad) {
  if (axis < 0) axis += out.dims().size();
-  int n = out.dims()[axis];
+  int n = static_cast<int>(out.dims()[axis]);
  std::vector<T*> dx_datas(n);  // NOLINT

  for (int i = 0; i < n; i++) {
@@ -38,8 +38,8 @@ void StackGradKernel(const Context& dev_ctx,
  }
  auto dy_data = out.data<T>();
  int pre = 1;
-  for (int i = 0; i < axis; ++i) pre *= out.dims()[i];
-  int total_num = out.numel();
+  for (int i = 0; i < axis; ++i) pre *= static_cast<int>(out.dims()[i]);
+  int total_num = static_cast<int>(out.numel());
  int post = total_num / (n * pre);
  auto dx_data_arr = dx_datas.data();
  phi::funcs::StackGradFunctorForRange(

--- a/paddle/phi/kernels/cpu/stack_kernel.cc
+++ b/paddle/phi/kernels/cpu/stack_kernel.cc
@@ -41,8 +41,8 @@ void StackKernel(const Context& dev_ctx,

  int pre = 1, post = 1;
  auto& dim = x[0]->dims();
-  for (auto i = 0; i < axis; ++i) pre *= dim[i];
-  for (auto i = axis; i < dim.size(); ++i) post *= dim[i];
+  for (auto i = 0; i < axis; ++i) pre *= static_cast<int>(dim[i]);
+  for (auto i = axis; i < dim.size(); ++i) post *= static_cast<int>(dim[i]);

  auto x_data_arr = x_datas.data();


--- a/paddle/phi/kernels/cpu/svd_kernel.cc
+++ b/paddle/phi/kernels/cpu/svd_kernel.cc
@@ -100,8 +100,8 @@ void SvdKernel(const Context& dev_ctx,
  DenseTensor trans_x = ::phi::TransposeLast2Dim<T>(dev_ctx, X);
  auto* x_data = trans_x.data<T>();
  auto x_dims = X.dims();
-  int rows = x_dims[x_dims.size() - 2];
-  int cols = x_dims[x_dims.size() - 1];
+  int rows = static_cast<int>(x_dims[x_dims.size() - 2]);
+  int cols = static_cast<int>(x_dims[x_dims.size() - 1]);
  // int k = std::min(rows, cols);
  // int col_u = full ? rows : k;
  // int col_v = full ? cols : k;
@@ -113,7 +113,7 @@ void SvdKernel(const Context& dev_ctx,
      0,
      cols,
      errors::InvalidArgument("The col of Input(X) should be greater than 0."));
-  int batches = numel / (rows * cols);
+  int batches = static_cast<int>(numel / (rows * cols));
  auto* U_out = dev_ctx.template Alloc<phi::dtype::Real<T>>(U);
  auto* VH_out = dev_ctx.template Alloc<phi::dtype::Real<T>>(VH);
  auto* S_out = dev_ctx.template Alloc<phi::dtype::Real<T>>(S);

--- a/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc
@@ -94,21 +94,24 @@ void TemporalShiftGradKernel(const Context& dev_ctx,
  int t = seg_num;
  const DataLayout data_layout = phi::StringToDataLayout(data_format_str);

-  const int nt = output_grad->dims()[0];
-  const int c = (data_layout == DataLayout::kNCHW ? output_grad->dims()[1]
-                                                  : output_grad->dims()[3]);
-  const int h = (data_layout == DataLayout::kNCHW ? output_grad->dims()[2]
-                                                  : output_grad->dims()[1]);
-  const int w = (data_layout == DataLayout::kNCHW ? output_grad->dims()[3]
-                                                  : output_grad->dims()[2]);
+  const int nt = static_cast<int>(output_grad->dims()[0]);
+  const int c = static_cast<int>(data_layout == DataLayout::kNCHW
+                                     ? output_grad->dims()[1]
+                                     : output_grad->dims()[3]);
+  const int h = static_cast<int>(data_layout == DataLayout::kNCHW
+                                     ? output_grad->dims()[2]
+                                     : output_grad->dims()[1]);
+  const int w = static_cast<int>(data_layout == DataLayout::kNCHW
+                                     ? output_grad->dims()[3]
+                                     : output_grad->dims()[2]);

  const int hw = h * w;
  const int chw = c * hw;
  const int tchw = t * chw;
  const int ntchw = nt * chw;

-  const int c1 = static_cast<int>(c * shift_ratio);
-  const int c2 = static_cast<int>(c * 2 * shift_ratio);
+  const int c1 = static_cast<int>(static_cast<float>(c) * shift_ratio);
+  const int c2 = static_cast<int>(static_cast<float>(c) * 2.f * shift_ratio);

  DDim in_grad_dims =
      (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})

--- a/paddle/phi/kernels/cpu/temporal_shift_kernel.cc
+++ b/paddle/phi/kernels/cpu/temporal_shift_kernel.cc
@@ -94,21 +94,21 @@ void TemporalShiftKernel(const Context& dev_ctx,
  int t = seg_num;
  const DataLayout data_layout = phi::StringToDataLayout(data_format_str);

-  const int nt = input->dims()[0];
-  const int c =
-      (data_layout == DataLayout::kNCHW ? input->dims()[1] : input->dims()[3]);
-  const int h =
-      (data_layout == DataLayout::kNCHW ? input->dims()[2] : input->dims()[1]);
-  const int w =
-      (data_layout == DataLayout::kNCHW ? input->dims()[3] : input->dims()[2]);
+  const int nt = static_cast<int>(input->dims()[0]);
+  const int c = static_cast<int>(
+      data_layout == DataLayout::kNCHW ? input->dims()[1] : input->dims()[3]);
+  const int h = static_cast<int>(
+      data_layout == DataLayout::kNCHW ? input->dims()[2] : input->dims()[1]);
+  const int w = static_cast<int>(
+      data_layout == DataLayout::kNCHW ? input->dims()[3] : input->dims()[2]);

  const int hw = h * w;
  const int chw = c * hw;
  const int tchw = t * chw;
  const int ntchw = nt * chw;

-  const int c1 = static_cast<int>(c * shift_ratio);
-  const int c2 = static_cast<int>(c * 2 * shift_ratio);
+  const int c1 = static_cast<int>(static_cast<float>(c) * shift_ratio);
+  const int c2 = static_cast<int>(static_cast<float>(c) * 2.f * shift_ratio);

  DDim out_dims =
      (data_layout == DataLayout::kNCHW ? phi::make_ddim({nt, c, h, w})

--- a/paddle/phi/kernels/cpu/top_k_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/top_k_grad_kernel.cc
@@ -102,7 +102,7 @@ void TopkGradKernel(const Context& dev_ctx,
    trans.emplace_back(axis);
    phi::DDim trans_dims(out_dims);
    phi::DDim trans_in_dims(in_dims);
-    for (size_t i = 0; i < trans.size(); i++) {
+    for (int i = 0; i < static_cast<int>(trans.size()); i++) {
      trans_dims[i] = out_dims[trans[i]];
      trans_in_dims[i] = in_dims[trans[i]];
    }
@@ -113,7 +113,7 @@ void TopkGradKernel(const Context& dev_ctx,
    trans_ind.Resize(trans_dims);
    dev_ctx.template Alloc<T>(&trans_dO);
    dev_ctx.template Alloc<int64_t>(&trans_ind);
-    int ndims = trans.size();
+    int ndims = static_cast<int>(trans.size());

    // Do transpose
    funcs::TransCompute<phi::CPUContext, T>(

--- a/paddle/phi/kernels/cpu/top_k_kernel.cc
+++ b/paddle/phi/kernels/cpu/top_k_kernel.cc
@@ -198,17 +198,17 @@ void TopkKernel(const Context& dev_ctx,
    // get the trans input_dims, out_dims
    phi::DDim trans_dims(in_dims);
    phi::DDim trans_out_dims(out->dims());
-    for (size_t i = 0; i < trans.size(); i++) {
+    for (int i = 0; i < static_cast<int>(trans.size()); i++) {
      trans_dims[i] = in_dims[trans[i]];
    }
-    for (size_t i = 0; i < trans.size(); i++) {
+    for (int i = 0; i < static_cast<int>(trans.size()); i++) {
      trans_out_dims[i] = out_dims[trans[i]];
    }

    DenseTensor trans_inp;
    trans_inp.Resize(trans_dims);
    dev_ctx.template Alloc<T>(&trans_inp);
-    int ndims = trans.size();
+    int ndims = static_cast<int>(trans.size());

    // transpose the input value
    funcs::TransCompute<phi::CPUContext, T>(

--- a/paddle/phi/kernels/cpu/transpose_kernel.cc
+++ b/paddle/phi/kernels/cpu/transpose_kernel.cc
@@ -32,7 +32,7 @@ void TransposeKernel(const Context& ctx,
  std::vector<int> formated_axis = axis;
  for (size_t i = 0; i < axis.size(); i++) {
    if (axis[i] < 0) {
-      formated_axis[i] = axis[i] + x_rank;
+      formated_axis[i] = static_cast<int>(axis[i] + x_rank);
    }
  }

@@ -40,7 +40,7 @@ void TransposeKernel(const Context& ctx,
  if (out->numel() == 0) {
    return;
  }
-  int rank = formated_axis.size();
+  int rank = static_cast<int>(formated_axis.size());
  switch (rank) {
    case 0:
      phi::Copy<Context>(ctx, x, ctx.GetPlace(), false, out);

--- a/paddle/phi/kernels/cpu/triangular_solve_kernel.cc
+++ b/paddle/phi/kernels/cpu/triangular_solve_kernel.cc
@@ -37,8 +37,8 @@ void TriangularSolveKernel(const Context& dev_ctx,
  std::vector<int64_t> y_bst_dims_vec;
  std::tie(x_bst_dims_vec, y_bst_dims_vec) =
      funcs::MatrixGetBroadcastDims(x, y);
-  int x_bst_ndim = x_bst_dims_vec.size();
-  int y_bst_ndim = y_bst_dims_vec.size();
+  int x_bst_ndim = static_cast<int>(x_bst_dims_vec.size());
+  int y_bst_ndim = static_cast<int>(y_bst_dims_vec.size());

  // Tensor broadcast to 'out' and temp 'x_bst'
  IntArray x_bst_dims(x_bst_dims_vec);
@@ -56,7 +56,7 @@ void TriangularSolveKernel(const Context& dev_ctx,
  int N = static_cast<int>(y_bst_dims_vec[y_bst_ndim - 1]);
  int batch_size = 1;
  for (int i = 0; i < x_bst_ndim - 2; i++) {
-    batch_size *= x_bst_dims_vec[i];
+    batch_size *= static_cast<int>(x_bst_dims_vec[i]);
  }

  auto blas = phi::funcs::GetBlas<CPUContext, T>(dev_ctx);

--- a/paddle/phi/kernels/cpu/trunc_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/trunc_grad_kernel.cc
@@ -25,7 +25,7 @@ void TruncGradKernel(const Context& dev_ctx,
                     DenseTensor* in_grad) {
  T* dx_data = dev_ctx.template Alloc<T>(in_grad);

-  int numel = in_grad->numel();
+  int numel = static_cast<int>(in_grad->numel());
  memset(dx_data, 0.0, numel * sizeof(T));
 }


--- a/paddle/phi/kernels/cpu/unpool_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/unpool_grad_kernel.cc
@@ -40,12 +40,12 @@ void UnpoolGradKernel(const Context& dev_ctx,
  const T* output_grad_data = out_grad.data<T>();
  phi::funcs::SetConstant<Context, T> zero;
  zero(dev_ctx, x_grad, static_cast<T>(0));
-  const int batch_size = x.dims()[0];
-  const int input_height = x.dims()[2];
-  const int input_width = x.dims()[3];
-  const int output_channels = out.dims()[1];
-  const int output_height = out.dims()[2];
-  const int output_width = out.dims()[3];
+  const int batch_size = static_cast<int>(x.dims()[0]);
+  const int input_height = static_cast<int>(x.dims()[2]);
+  const int input_width = static_cast<int>(x.dims()[3]);
+  const int output_channels = static_cast<int>(out.dims()[1]);
+  const int output_height = static_cast<int>(out.dims()[2]);
+  const int output_width = static_cast<int>(out.dims()[3]);
  int input_feasize = input_height * input_width;
  int output_feasize = output_height * output_width;
  const int* indices_data = indices.data<int>();
@@ -91,14 +91,14 @@ void Unpool3dGradKernel(const Context& dev_ctx,
  phi::funcs::SetConstant<Context, T> zero;
  zero(dev_ctx, x_grad, static_cast<T>(0));

-  const int batch_size = x.dims()[0];
-  const int input_depth = x.dims()[2];
-  const int input_height = x.dims()[3];
-  const int input_width = x.dims()[4];
-  const int output_channels = out.dims()[1];
-  const int output_depth = out.dims()[2];
-  const int output_height = out.dims()[3];
-  const int output_width = out.dims()[4];
+  const int batch_size = static_cast<int>(x.dims()[0]);
+  const int input_depth = static_cast<int>(x.dims()[2]);
+  const int input_height = static_cast<int>(x.dims()[3]);
+  const int input_width = static_cast<int>(x.dims()[4]);
+  const int output_channels = static_cast<int>(out.dims()[1]);
+  const int output_depth = static_cast<int>(out.dims()[2]);
+  const int output_height = static_cast<int>(out.dims()[3]);
+  const int output_width = static_cast<int>(out.dims()[4]);
  int input_feasize = input_depth * input_height * input_width;
  int output_feasize = output_depth * output_height * output_width;
  const int* indices_data = indices.data<int>();

--- a/paddle/phi/kernels/cpu/unpool_kernel.cc
+++ b/paddle/phi/kernels/cpu/unpool_kernel.cc
@@ -38,12 +38,12 @@ void UnpoolKernel(const Context& dev_ctx,
    phi::funcs::SetConstant<Context, T> set_zero;
    set_zero(dev_ctx, out, static_cast<T>(0));
  }
-  const int batch_size = x.dims()[0];
-  const int input_height = x.dims()[2];
-  const int input_width = x.dims()[3];
-  const int output_channels = out->dims()[1];
-  const int output_height = out->dims()[2];
-  const int output_width = out->dims()[3];
+  const int batch_size = static_cast<int>(x.dims()[0]);
+  const int input_height = static_cast<int>(x.dims()[2]);
+  const int input_width = static_cast<int>(x.dims()[3]);
+  const int output_channels = static_cast<int>(out->dims()[1]);
+  const int output_height = static_cast<int>(out->dims()[2]);
+  const int output_width = static_cast<int>(out->dims()[3]);
  int input_feasize = input_height * input_width;
  int output_feasize = output_height * output_width;
  const T* input_data = x.data<T>();
@@ -87,14 +87,14 @@ void Unpool3dKernel(const Context& dev_ctx,
    phi::funcs::SetConstant<Context, T> set_zero;
    set_zero(dev_ctx, out, static_cast<T>(0));
  }
-  const int batch_size = x.dims()[0];
-  const int input_depth = x.dims()[2];
-  const int input_height = x.dims()[3];
-  const int input_width = x.dims()[4];
-  const int output_channels = out->dims()[1];
-  const int output_depth = out->dims()[2];
-  const int output_height = out->dims()[3];
-  const int output_width = out->dims()[4];
+  const int batch_size = static_cast<int>(x.dims()[0]);
+  const int input_depth = static_cast<int>(x.dims()[2]);
+  const int input_height = static_cast<int>(x.dims()[3]);
+  const int input_width = static_cast<int>(x.dims()[4]);
+  const int output_channels = static_cast<int>(out->dims()[1]);
+  const int output_depth = static_cast<int>(out->dims()[2]);
+  const int output_height = static_cast<int>(out->dims()[3]);
+  const int output_width = static_cast<int>(out->dims()[4]);
  int input_feasize = input_depth * input_height * input_width;
  int output_feasize = output_depth * output_height * output_width;
  const T* input_data = x.data<T>();

--- a/paddle/phi/kernels/cpu/weighted_sample_neighbors_kernel.cc
+++ b/paddle/phi/kernels/cpu/weighted_sample_neighbors_kernel.cc
@@ -219,7 +219,7 @@ void WeightedSampleNeighborsKernel(const Context& dev_ctx,
  const T* x_data = x.data<T>();
  const T* eids_data =
      (eids.get_ptr() == nullptr ? nullptr : eids.get_ptr()->data<T>());
-  int bs = x.dims()[0];
+  int bs = static_cast<int>(x.dims()[0]);

  std::vector<T> output;
  std::vector<int> output_count;

--- a/paddle/phi/kernels/cpu/yolo_box_kernel.cc
+++ b/paddle/phi/kernels/cpu/yolo_box_kernel.cc
@@ -38,13 +38,13 @@ void YoloBoxKernel(const Context& dev_ctx,
  auto* input = &x;
  auto* imgsize = &img_size;
  float scale = scale_x_y;
-  float bias = -0.5 * (scale - 1.);
+  float bias = -0.5f * (scale - 1.f);

-  const int n = input->dims()[0];
-  const int h = input->dims()[2];
-  const int w = input->dims()[3];
-  const int box_num = boxes->dims()[1];
-  const int an_num = anchors.size() / 2;
+  const int n = static_cast<int>(input->dims()[0]);
+  const int h = static_cast<int>(input->dims()[2]);
+  const int w = static_cast<int>(input->dims()[3]);
+  const int box_num = static_cast<int>(boxes->dims()[1]);
+  const int an_num = static_cast<int>(anchors.size() / 2);
  int input_size_h = downsample_ratio * h;
  int input_size_w = downsample_ratio * w;


--- a/paddle/phi/kernels/cpu/yolo_loss_grad_kernel.cc
+++ b/paddle/phi/kernels/cpu/yolo_loss_grad_kernel.cc
@@ -140,12 +140,12 @@ void YoloLossGradKernel(const Context& dev_ctx,
  auto input_grad = x_grad;
  auto* objness_mask = &objectness_mask;

-  const int n = input_grad->dims()[0];
-  const int c = input_grad->dims()[1];
-  const int h = input_grad->dims()[2];
-  const int w = input_grad->dims()[3];
-  const int mask_num = anchor_mask.size();
-  const int b = gt_match_mask.dims()[1];
+  const int n = static_cast<int>(input_grad->dims()[0]);
+  const int c = static_cast<int>(input_grad->dims()[1]);
+  const int h = static_cast<int>(input_grad->dims()[2]);
+  const int w = static_cast<int>(input_grad->dims()[3]);
+  const int mask_num = static_cast<int>(anchor_mask.size());
+  const int b = static_cast<int>(gt_match_mask.dims()[1]);
  int input_size = downsample_ratio * h;

  const int stride = h * w;

--- a/paddle/phi/kernels/cpu/yolo_loss_kernel.cc
+++ b/paddle/phi/kernels/cpu/yolo_loss_kernel.cc
@@ -40,7 +40,7 @@ static T L1Loss(T x, T y) {
 }

 static int GetMaskIndex(std::vector<int> mask, int val) {
-  for (size_t i = 0; i < mask.size(); i++) {
+  for (int i = 0; i < static_cast<int>(mask.size()); i++) {
    if (mask[i] == val) {
      return i;
    }
@@ -196,14 +196,14 @@ void YoloLossKernel(const Context& dev_ctx,
  auto* input = &x;
  auto objness_mask = objectness_mask;
  float scale = scale_x_y;
-  float bias = -0.5 * (scale - 1.);
+  float bias = -0.5f * (scale - 1.f);

-  const int n = input->dims()[0];
-  const int h = input->dims()[2];
-  const int w = input->dims()[3];
-  const int an_num = anchors.size() / 2;
-  const int mask_num = anchor_mask.size();
-  const int b = gt_box.dims()[1];
+  const int n = static_cast<int>(input->dims()[0]);
+  const int h = static_cast<int>(input->dims()[2]);
+  const int w = static_cast<int>(input->dims()[3]);
+  const int an_num = static_cast<int>(anchors.size() / 2);
+  const int mask_num = static_cast<int>(anchor_mask.size());
+  const int b = static_cast<int>(gt_box.dims()[1]);
  int input_size = downsample_ratio * h;

  const int stride = h * w;

--- a/paddle/phi/kernels/funcs/jit/benchmark.cc
+++ b/paddle/phi/kernels/funcs/jit/benchmark.cc
@@ -97,11 +97,11 @@ struct BenchFunc {
    for (int i = 0; i < FLAGS_burning; ++i) {
      tgt(args...);
    }
-    auto start = phi::PosixInNsec() * 1e-3;
+    double start = static_cast<double>(phi::PosixInNsec()) * 1e-3;
    for (int i = 0; i < FLAGS_repeat; ++i) {
      tgt(args...);
    }
-    auto end = phi::PosixInNsec() * 1e-3;
+    double end = static_cast<double>(phi::PosixInNsec()) * 1e-3;
    return static_cast<double>(end - start) / FLAGS_repeat;
  }
 };

--- a/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
+++ b/paddle/phi/kernels/selected_rows/cpu/adam_kernel.cc
@@ -192,12 +192,12 @@ void AdamDenseParamSparseGradKernel(
                 "multi thread, currently "
              << param_row_count;
    }
-    for (size_t i = 0; i < grad_rows.size(); ++i) {
+    for (int i = 0; i < static_cast<int>(grad_rows.size()); ++i) {
      row_id_to_grad_row_offset[grad_rows[i]] = i;
    }
    std::vector<std::future<void>> fs;
-    int64_t line_in_each_thread =
-        param_row_count / FLAGS_inner_op_parallelism + 1;
+    int64_t line_in_each_thread = static_cast<int64_t>(
+        param_row_count / FLAGS_inner_op_parallelism + static_cast<int64_t>(1));
    for (int i = 0; i < FLAGS_inner_op_parallelism; ++i) {
      int64_t start = i * line_in_each_thread;
      int64_t end = (i + 1) * line_in_each_thread;

--- a/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc
+++ b/paddle/phi/kernels/selected_rows/hsigmoid_loss_grad_kernel.cc
@@ -61,7 +61,7 @@ void HSigmoidLossGradKernel(const Context& ctx,
  w_grad->set_height(w.dims()[0]);
  auto* w_grad_value = w_grad->mutable_value();
  phi::DDim temp_dim(w.dims());
-  temp_dim[0] = real_rows.size();
+  temp_dim[0] = static_cast<int>(real_rows.size());
  w_grad_value->Resize(temp_dim);
  phi::HSigmoidLossGradKernelImpl<T>(ctx,
                                     x,

--- a/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/coalesce_kernel.cc
@@ -52,7 +52,7 @@ void CoalesceCooCPUKernel(const CPUContext& dev_ctx,
    IntT index = x_indexs[i];
    if (indices_to_index.find(index) == indices_to_index.end()) {
      std::vector<int64_t> indexs;
-      indexs.push_back(i);
+      indexs.push_back(static_cast<int>(i));
      indices_to_index[index] = indexs;
    } else {
      indices_to_index[index].push_back(i);

--- a/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/conv_grad_kernel.cc
@@ -49,10 +49,12 @@ void Conv3dCooGradCPUKernel(const CPUContext& dev_ctx,
  const auto& kernel_dims = kernel.dims();
  const bool is2D = kernel_dims.size() == 4 ? true : false;
  const int kernel_size =
-      is2D ? kernel_dims[0] * kernel_dims[1]
-           : kernel_dims[0] * kernel_dims[1] * kernel_dims[2];
-  const int in_channels = is2D ? kernel_dims[2] : kernel_dims[3];
-  const int out_channels = is2D ? kernel_dims[3] : kernel_dims[4];
+      static_cast<int>(is2D ? kernel_dims[0] * kernel_dims[1]
+                            : kernel_dims[0] * kernel_dims[1] * kernel_dims[2]);
+  const int in_channels =
+      static_cast<int>(is2D ? kernel_dims[2] : kernel_dims[3]);
+  const int out_channels =
+      static_cast<int>(is2D ? kernel_dims[3] : kernel_dims[4]);

  int rulebook_len = 0;
  const IntT* rulebook_ptr = phi::funcs::sparse::GetRulebookPtr<IntT>(

--- a/paddle/phi/kernels/sparse/cpu/conv_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/conv_kernel.cc
@@ -47,8 +47,9 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,
  const auto& x_dims = x.dims();
  const bool is2D = x_dims.size() == 4 ? true : false;
  const auto& kernel_dims = kernel.dims();
-  int kernel_size = is2D ? kernel_dims[0] * kernel_dims[1]
-                         : kernel_dims[0] * kernel_dims[1] * kernel_dims[2];
+  int kernel_size =
+      static_cast<int>(is2D ? kernel_dims[0] * kernel_dims[1]
+                            : kernel_dims[0] * kernel_dims[1] * kernel_dims[2]);

  int count_tmp = is2D ? 4 : 5;
  std::vector<int> out_dims_vec(count_tmp, 1);
@@ -56,7 +57,7 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,

  std::vector<int> kernel_sizes(kernel_dims.size());
  for (int i = 0; i < kernel_dims.size(); i++) {
-    kernel_sizes[i] = kernel_dims[i];
+    kernel_sizes[i] = static_cast<int>(kernel_dims[i]);
  }

  std::vector<int> subm_paddings(paddings), subm_strides(strides);
@@ -69,8 +70,10 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,

  phi::funcs::sparse::GetOutShape(
      x_dims, kernel_sizes, subm_paddings, dilations, subm_strides, &out_dims);
-  const int in_channels = is2D ? kernel_dims[2] : kernel_dims[3];
-  const int out_channels = is2D ? kernel_dims[3] : kernel_dims[4];
+  const int in_channels =
+      static_cast<int>(is2D ? kernel_dims[2] : kernel_dims[3]);
+  const int out_channels =
+      static_cast<int>(is2D ? kernel_dims[3] : kernel_dims[4]);

  // Second algorithm:
  // https://pdfs.semanticscholar.org/5125/a16039cabc6320c908a4764f32596e018ad3.pdf
@@ -112,7 +115,7 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,

    UpdateRulebookAndOutIndex<T, CPUContext, IntT>(
        dev_ctx, x, kernel_size, out_channels, out_dims, &tmp_rulebook, out);
-    n = tmp_rulebook.dims()[1];
+    n = static_cast<int>(tmp_rulebook.dims()[1]);
    rulebook_ptr = tmp_rulebook.data<IntT>();

    phi::funcs::sparse::SaveToTable(

--- a/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/elementwise_grad_kernel.cc
@@ -122,8 +122,8 @@ void CopyCsrValues(const Context& dev_ctx,
  Copy(dev_ctx, x.cols(), dev_ctx.GetPlace(), false, dx->mutable_cols());

  const auto& x_dims = x.dims();
-  int batch = x_dims.size() == 2 ? 1 : x_dims[0];
-  int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
+  int batch = static_cast<int>(x_dims.size() == 2 ? 1 : x_dims[0]);
+  int rows = static_cast<int>(x_dims.size() == 2 ? x_dims[0] : x_dims[1]);

  const IntT* x_crows_ptr = x.crows().data<IntT>();
  const IntT* x_cols_ptr = x.cols().data<IntT>();

--- a/paddle/phi/kernels/sparse/cpu/mask_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/mask_kernel.cc
@@ -52,7 +52,7 @@ void MaskCooCPUKernel(const CPUContext& dev_ctx,

  const int64_t non_zero_num = mask.nnz();
  auto dims_2d = flatten_to_2d(dims, sparse_dim);
-  const int cols = dims_2d[1];
+  const int cols = static_cast<int>(dims_2d[1]);
  const IntT* indices_ptr = indices.data<IntT>();

  std::vector<IntT> out_indexs(non_zero_num), sparse_offsets(sparse_dim);

--- a/paddle/phi/kernels/sparse/cpu/pool_grad_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/pool_grad_kernel.cc
@@ -34,8 +34,8 @@ void MaxPoolCooGradCPUKernel(const CPUContext& dev_ctx,
                             const std::vector<int>& kernel_sizes,
                             SparseCooTensor* x_grad) {
  int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2];
-  const int channels = x.dims()[4];
-  int rulebook_len = rulebook.dims()[1];
+  const int channels = static_cast<int>(x.dims()[4]);
+  int rulebook_len = static_cast<int>(rulebook.dims()[1]);
  const IntT* rulebook_ptr = rulebook.data<IntT>();
  std::vector<int> offsets(kernel_size + 1);
  const int* counter_ptr = counter.data<int>();

--- a/paddle/phi/kernels/sparse/cpu/pool_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/pool_kernel.cc
@@ -42,7 +42,9 @@ void MaxPoolCooCPUKernel(const CPUContext& dev_ctx,
  const auto& x_dims = x.dims();
  int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2];
  const std::vector<int>& real_kernel_sizes =
-      phi::funcs::sparse::PoolResetKernel(kernel_sizes, x_dims[4], x_dims[4]);
+      phi::funcs::sparse::PoolResetKernel(kernel_sizes,
+                                          static_cast<int>(x_dims[4]),
+                                          static_cast<int>(x_dims[4]));
  DDim out_dims = {1, 1, 1, 1, 1};
  phi::funcs::sparse::GetOutShape(
      x_dims, real_kernel_sizes, paddings, dilations, strides, &out_dims);
@@ -66,7 +68,7 @@ void MaxPoolCooCPUKernel(const CPUContext& dev_ctx,
  UpdateRulebookAndOutIndex<T, CPUContext, IntT>(
      dev_ctx, x, kernel_size, in_channels, out_dims, rulebook, out);

-  int rulebook_len = rulebook->dims()[1];
+  int rulebook_len = static_cast<int>(rulebook->dims()[1]);
  const IntT* rulebook_ptr = rulebook->data<IntT>();

  counter->Resize({kernel_size});

--- a/paddle/phi/kernels/sparse/cpu/reshape_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/reshape_kernel.cc
@@ -68,7 +68,7 @@ void ReshapeCooCPUKernel(const Context& dev_ctx,
    for (int i = 0; i < x.sparse_dim(); ++i) {
      location += x_indices_data[i * x_nnz + j] * x_sparse_part_strides[i];
    }
-    for (size_t i = 0; i < out_sparse_part_dims.size(); ++i) {
+    for (int i = 0; i < static_cast<int>(out_sparse_part_dims.size()); ++i) {
      out_indices_data[i * x_nnz + j] = location / out_sparse_part_strides[i];
      location %= out_sparse_part_strides[i];
    }

--- a/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/softmax_grad_kernel.cc
@@ -48,9 +48,9 @@ void SoftmaxCsrGradKernel(const Context& dev_ctx,
  int row_number = 1;
  for (int i = 0; i < out_rank - 1; ++i) {
    if (i < out_rank - 2) {
-      batch_size *= out_dim[i];
+      batch_size *= static_cast<int>(out_dim[i]);
    } else if (i == out_rank - 2) {
-      row_number = out_dim[i];
+      row_number = static_cast<int>(out_dim[i]);
    }
  }


--- a/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/softmax_kernel.cc
@@ -45,9 +45,9 @@ void SoftmaxCsrKernel(const Context& dev_ctx,
  int row_number = 1;
  for (int i = 0; i < x_rank - 1; ++i) {
    if (i < x_rank - 2) {
-      batch_size *= x_dim[i];
+      batch_size *= static_cast<int>(x_dim[i]);
    } else if (i == x_rank - 2) {
-      row_number = x_dim[i];
+      row_number = static_cast<int>(x_dim[i]);
    }
  }


--- a/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/sparse_utils_kernel.cc
@@ -48,9 +48,9 @@ inline int64_t GetNonZeroNum(const DenseTensor& dense,
          sparse_dim,
          dims.size()));

-  auto dims_2d = flatten_to_2d(dims, sparse_dim);
-  const int rows = dims_2d[0];
-  const int cols = dims_2d[1];
+  auto dims_2d = flatten_to_2d(dims, static_cast<int>(sparse_dim));
+  const int rows = static_cast<int>(dims_2d[0]);
+  const int cols = static_cast<int>(dims_2d[1]);

  const T* data = dense.data<T>();
  int64_t non_zero_num = 0;
@@ -87,15 +87,15 @@ void DenseToCooKernel(const Context& dev_ctx,
  int64_t* indices_data = indices.data<int64_t>();
  T* values_data = values.data<T>();

-  auto dims_2d = flatten_to_2d(x_dims, sparse_dim);
-  const int rows = dims_2d[0];
-  const int cols = dims_2d[1];
+  auto dims_2d = flatten_to_2d(x_dims, static_cast<int>(sparse_dim));
+  const int rows = static_cast<int>(dims_2d[0]);
+  const int cols = static_cast<int>(dims_2d[1]);

  int index = 0;
  for (int i = 0; i < rows; i++) {
    if (!IsZero(x_data + i * cols, cols)) {
      int64_t sparse_index = i;
-      for (int64_t j = sparse_dim - 1; j >= 0; j--) {
+      for (int j = static_cast<int>(sparse_dim - 1); j >= 0; j--) {
        indices_data[j * non_zero_num + index] = sparse_index % x_dims[j];
        sparse_index /= x_dims[j];
      }
@@ -138,8 +138,8 @@ void CsrToCooCPUKernel(const CPUContext& dev_ctx,
  IntT* coo_cols_data = coo_rows_data + non_zero_num;
  T* coo_values_data = values.data<T>();

-  int batch = x_dims.size() == 2 ? 1 : x_dims[0];
-  int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
+  int batch = static_cast<int>(x_dims.size() == 2 ? 1 : x_dims[0]);
+  int rows = static_cast<int>(x_dims.size() == 2 ? x_dims[0] : x_dims[1]);

  int index = 0;
  for (int b = 0; b < batch; b++) {
@@ -182,8 +182,8 @@ void CooToCsrCPUKernel(const CPUContext& dev_ctx,
                        "SparseCsrTensor only support 2-D or 3-D matrix"));
  const int64_t non_zero_num = x.nnz();

-  int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
-  int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
+  int batchs = static_cast<int>(x_dims.size() == 2 ? 1 : x_dims[0]);
+  int rows = static_cast<int>(x_dims.size() == 2 ? x_dims[0] : x_dims[1]);

  phi::DenseTensor crows = phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)});
  phi::DenseTensor cols = phi::Empty<IntT>(dev_ctx, {non_zero_num});
@@ -221,9 +221,9 @@ void CooToCsrCPUKernel(const CPUContext& dev_ctx,

  for (int b = 0; b < batchs; b++) {
    int batch_start = 0;
-    int batch_non_zero_num = offsets[b];
+    int batch_non_zero_num = static_cast<int>(offsets[b]);
    if (b > 0) {
-      batch_start = offsets[b - 1];
+      batch_start = static_cast<int>(offsets[b - 1]);
      batch_non_zero_num -= batch_start;
    }
    auto* coo_rows_ptr = coo_rows_data + batch_start;
@@ -283,11 +283,11 @@ void CooToDenseCPUKernel(const CPUContext& dev_ctx,

  int64_t base_offset = 1;
  for (int64_t i = 0; i < dense_dim; i++) {
-    base_offset *= dense_dims[sparse_dim + i];
+    base_offset *= dense_dims[static_cast<int>(sparse_dim + i)];
  }
  std::vector<int64_t> sparse_offsets(sparse_dim);
  int64_t offset = 1;
-  for (int i = sparse_dim - 1; i >= 0; i--) {
+  for (int i = static_cast<int>(sparse_dim - 1); i >= 0; i--) {
    sparse_offsets[i] = offset;
    offset *= dense_dims[i];
  }

--- a/paddle/phi/kernels/sparse/cpu/transpose_grad_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/transpose_grad_kernel.cc
@@ -26,7 +26,7 @@ namespace sparse {
 std::vector<int> get_cpu_grad_perm(std::vector<int> perm) {
  std::vector<int> grad_perm(perm.size());
  for (unsigned int i = 0; i < perm.size(); ++i) {
-    grad_perm[perm[i]] = i;
+    grad_perm[perm[i]] = static_cast<int>(i);
  }
  return grad_perm;
 }

--- a/paddle/phi/kernels/sparse/cpu/transpose_kernel.cc
+++ b/paddle/phi/kernels/sparse/cpu/transpose_kernel.cc
@@ -179,7 +179,7 @@ void TransposeCsrKernel(const Context& dev_ctx,
        int64_t x_cols_offset = 0;
        int out_cols_index = 0;
        for (int i = 0; i < x.dims()[0]; ++i) {
-          int x_crows_index = i * (x_n_rows + 1);
+          int x_crows_index = static_cast<int>(i * (x_n_rows + 1));
          int64_t start = x_crows_data[x_crows_index + k];
          int64_t end = x_crows_data[x_crows_index + 1 + k];
          out_crows_data[i + 1] = end - start;

--- a/paddle/phi/kernels/stride/as_strided_kernel.cc
+++ b/paddle/phi/kernels/stride/as_strided_kernel.cc
@@ -24,8 +24,8 @@ void AsStridedKernel(const Context& dev_ctx,
                     const std::vector<int64_t>& stride,
                     int64_t offset,
                     DenseTensor* out) {
-  out->Resize(DDim(dims.data(), dims.size()));
-  out->set_strides(DDim(stride.data(), stride.size()));
+  out->Resize(DDim(dims.data(), static_cast<int>(dims.size())));
+  out->set_strides(DDim(stride.data(), static_cast<int>(stride.size())));
  out->set_offset(offset);
  out->ResetHolder(input.Holder());
 }

--- a/paddle/phi/kernels/stride/diagonal_kernel.cc
+++ b/paddle/phi/kernels/stride/diagonal_kernel.cc
@@ -30,25 +30,27 @@ void DiagonalStridedKernel(const Context& dev_ctx,
                           DenseTensor* out) {
  size_t x_rank = x.dims().size();
  if (axis1 < 0) {
-    axis1 += x_rank;
+    axis1 += static_cast<int>(x_rank);
  }
  if (axis2 < 0) {
-    axis2 += x_rank;
+    axis2 += static_cast<int>(x_rank);
  }

  int64_t diag_size;
-  int64_t x_offset = x.offset();
+  int64_t x_offset = static_cast<int64_t>(x.offset());
  if (offset >= 0) {
    diag_size = std::max<int64_t>(
        std::min(x.dims()[axis1], x.dims()[axis2] - offset), 0);
    if (diag_size != 0) {
-      x_offset += offset * x.strides()[axis2] * SizeOf(x.dtype());
+      x_offset +=
+          static_cast<int64_t>(offset * x.strides()[axis2] * SizeOf(x.dtype()));
    }
  } else {
    diag_size = std::max<int64_t>(
        std::min(x.dims()[axis1] + offset, x.dims()[axis2]), 0);
    if (diag_size != 0) {
-      x_offset -= offset * x.strides()[axis1] * SizeOf(x.dtype());
+      x_offset -=
+          static_cast<int64_t>(offset * x.strides()[axis1] * SizeOf(x.dtype()));
    }
  }

@@ -62,7 +64,7 @@ void DiagonalStridedKernel(const Context& dev_ctx,
  stride.push_back(x.strides()[axis1] + x.strides()[axis2]);

  auto meta = out->meta();
-  auto tmp_dim = DDim(shape.data(), shape.size());
+  auto tmp_dim = DDim(shape.data(), static_cast<int>(shape.size()));
  // if (product(meta.dims) > 0 && meta.dims != tmp_dim) {
  //   PADDLE_THROW(
  //       phi::errors::Fatal("Diagonal kernel stride compute diff, infer shape
@@ -72,7 +74,7 @@ void DiagonalStridedKernel(const Context& dev_ctx,
  //                          tmp_dim));
  // }
  meta.dims = tmp_dim;
-  meta.strides = DDim(stride.data(), stride.size());
+  meta.strides = DDim(stride.data(), static_cast<int>(stride.size()));
  meta.offset = x_offset;
  out->set_meta(meta);
  out->ResetHolder(x.Holder());

--- a/paddle/phi/kernels/stride/index_select_kernel.cc
+++ b/paddle/phi/kernels/stride/index_select_kernel.cc
@@ -32,15 +32,16 @@ void IndexSelectStridedKernel(const Context& ctx,

  std::vector<int64_t> shape = phi::vectorize<int64_t>(x.dims());
  std::vector<int64_t> stride = phi::vectorize<int64_t>(x.strides());
-  int64_t offset = x.offset();
+  int64_t offset = static_cast<int64_t>(x.offset());

-  offset = offset + index * stride[dim] * SizeOf(output->dtype());
+  offset = static_cast<int64_t>(offset +
+                                index * stride[dim] * SizeOf(output->dtype()));
  shape.erase(shape.begin() + dim);
  stride.erase(stride.begin() + dim);

  auto meta = output->meta();
  meta.offset = offset;
-  auto tmp_dim = DDim(shape.data(), shape.size());
+  auto tmp_dim = DDim(shape.data(), static_cast<int>(shape.size()));
  // if (product(meta.dims) > 0 && meta.dims != tmp_dim) {
  //   PADDLE_THROW(
  //       phi::errors::Fatal("Index_select kernel stride compute diff, infer "
@@ -49,7 +50,7 @@ void IndexSelectStridedKernel(const Context& ctx,
  //                          tmp_dim));
  // }
  meta.dims = tmp_dim;
-  meta.strides = DDim(stride.data(), stride.size());
+  meta.strides = DDim(stride.data(), static_cast<int>(stride.size()));
  output->set_meta(meta);
  output->ResetHolder(x.Holder());
 }

--- a/paddle/phi/kernels/stride/slice_kernel.cc
+++ b/paddle/phi/kernels/stride/slice_kernel.cc
@@ -49,17 +49,18 @@ void SliceStridedKernel(const Context& ctx,

  std::vector<int64_t> output_dims = phi::vectorize<int64_t>(input.dims());
  std::vector<int64_t> output_stride = phi::vectorize<int64_t>(input.strides());
-  int64_t output_offset = input.offset();
+  int64_t output_offset = static_cast<int64_t>(input.offset());

  for (size_t i = 0; i < new_axes.size(); ++i) {
-    output_offset = output_offset + starts[i] * output_stride[new_axes[i]] *
-                                        SizeOf(out->dtype());
+    output_offset = static_cast<int64_t>(
+        output_offset +
+        starts[i] * output_stride[new_axes[i]] * SizeOf(out->dtype()));
    output_dims[new_axes[i]] = ends[i] - starts[i];
  }

  std::vector<uint8_t> decrease_flag(output_dims.size(), 0);
  if (decrease_axis.size() > 0) {
-    for (size_t i = 0; i < decrease_axis.size(); ++i) {
+    for (int i = 0; i < static_cast<int>(decrease_axis.size()); ++i) {
      int64_t axis = decrease_axis[i];
      decrease_flag[axis] = 1;
    }
@@ -84,7 +85,7 @@ void SliceStridedKernel(const Context& ctx,

  auto meta = out->meta();
  meta.offset = output_offset;
-  auto tmp_dim = DDim(output_dims.data(), output_dims.size());
+  auto tmp_dim = DDim(output_dims.data(), static_cast<int>(output_dims.size()));
  // if (product(meta.dims) > 0 && meta.dims != tmp_dim) {
  //   PADDLE_THROW(
  //       phi::errors::Fatal("Slice kernel stride compute diff, infer shape is
@@ -94,7 +95,8 @@ void SliceStridedKernel(const Context& ctx,
  //                          tmp_dim));
  // }
  meta.dims = tmp_dim;
-  meta.strides = DDim(output_stride.data(), output_stride.size());
+  meta.strides =
+      DDim(output_stride.data(), static_cast<int>(output_stride.size()));
  out->set_meta(meta);
  out->ResetHolder(input.Holder());
 }

--- a/paddle/phi/kernels/stride/split_kernel.cc
+++ b/paddle/phi/kernels/stride/split_kernel.cc
@@ -28,7 +28,7 @@ void SplitStridedKernel(const Context& dev_ctx,
                        const IntArray& sections UNUSED,
                        const Scalar& axis_scalar,
                        std::vector<DenseTensor*> outs) {
-  int64_t num = outs.size();
+  int64_t num = static_cast<int64_t>(outs.size());
  int64_t start = 0;

  int axis = axis_scalar.to<int>();

--- a/paddle/phi/kernels/stride/squeeze_kernel.cc
+++ b/paddle/phi/kernels/stride/squeeze_kernel.cc
@@ -49,7 +49,8 @@ void SqueezeInferStridedKernel(const Context& dev_ctx,
    } else {
      for (auto& item : axes) {
        item = item < 0 ? item + input_stride.size() : item;
-        if (item != 0 && input_stride[item] == input_stride[item - 1]) {
+        if (item != 0 && input_stride[static_cast<int>(item)] ==
+                             input_stride[static_cast<int>(item) - 1]) {
          axes_set.insert(item);
        }
      }
@@ -65,7 +66,8 @@ void SqueezeInferStridedKernel(const Context& dev_ctx,

    auto meta = out->meta();
    meta.offset = input.offset();
-    meta.strides = DDim(output_stride.data(), output_stride.size());
+    meta.strides =
+        DDim(output_stride.data(), static_cast<int>(output_stride.size()));
    out->set_meta(meta);
    return;
  }
@@ -80,7 +82,7 @@ void SqueezeInferStridedKernel(const Context& dev_ctx,
  } else {
    for (auto item : axes) {
      auto axis = item < 0 ? item + input_dims.size() : item;
-      if (input_dims[axis] == 1) {
+      if (input_dims[static_cast<int>(axis)] == 1) {
        axes_set.insert(axis);
      }
    }
@@ -94,7 +96,7 @@ void SqueezeInferStridedKernel(const Context& dev_ctx,
  }

  auto meta = out->meta();
-  auto tmp_dim = DDim(output_dims.data(), output_dims.size());
+  auto tmp_dim = DDim(output_dims.data(), static_cast<int>(output_dims.size()));
  // if (product(meta.dims) > 0 && meta.dims != tmp_dim) {
  //   PADDLE_THROW(
  //       phi::errors::Fatal("Unsqueeze kernel stride compute diff, infer
@@ -104,7 +106,8 @@ void SqueezeInferStridedKernel(const Context& dev_ctx,
  //                          tmp_dim));
  // }
  meta.dims = tmp_dim;
-  meta.strides = DDim(output_stride.data(), output_stride.size());
+  meta.strides =
+      DDim(output_stride.data(), static_cast<int>(output_stride.size()));
  meta.offset = input.offset();
  out->set_meta(meta);
  out->ResetHolder(input.Holder());

--- a/paddle/phi/kernels/stride/strided_slice_kernel.cc
+++ b/paddle/phi/kernels/stride/strided_slice_kernel.cc
@@ -37,7 +37,7 @@ void StridedSliceRawStridedKernel(const Context& dev_ctx,

  std::vector<int64_t> output_dims = phi::vectorize<int64_t>(input.dims());
  std::vector<int64_t> output_stride = phi::vectorize<int64_t>(input.strides());
-  int64_t output_offset = input.offset();
+  int64_t output_offset = static_cast<int64_t>(input.offset());
  for (size_t i = 0; i < axes.size(); ++i) {
    int64_t axis_size = input.dims()[axes[i]];

@@ -82,7 +82,8 @@ void StridedSliceRawStridedKernel(const Context& dev_ctx,
      starts[i] = (strides[i] < 0) ? axis_size - 1 : axis_size;
    }

-    output_offset += starts[i] * output_stride[axes[i]] * SizeOf(out->dtype());
+    output_offset += static_cast<int>(starts[i] * output_stride[axes[i]] *
+                                      SizeOf(out->dtype()));
    output_dims[axes[i]] = dim;
    output_stride[axes[i]] *= strides[i];
  }
@@ -107,7 +108,7 @@ void StridedSliceRawStridedKernel(const Context& dev_ctx,

  auto meta = out->meta();
  meta.offset = output_offset;
-  auto tmp_dim = DDim(output_dims.data(), output_dims.size());
+  auto tmp_dim = DDim(output_dims.data(), static_cast<int>(output_dims.size()));
  // if (product(meta.dims) > 0 && meta.dims != tmp_dim) {
  //   PADDLE_THROW(
  //       phi::errors::Fatal("Striede_slice kernel stride compute diff, infer "
@@ -116,7 +117,8 @@ void StridedSliceRawStridedKernel(const Context& dev_ctx,
  //                          tmp_dim));
  // }
  meta.dims = tmp_dim;
-  meta.strides = DDim(output_stride.data(), output_stride.size());
+  meta.strides =
+      DDim(output_stride.data(), static_cast<int>(output_stride.size()));
  out->set_meta(meta);
  out->ResetHolder(input.Holder());
 }

--- a/paddle/phi/kernels/stride/tensor_unfold_kernel.cc
+++ b/paddle/phi/kernels/stride/tensor_unfold_kernel.cc
@@ -30,7 +30,8 @@ void TensorUnfoldKernel(const Context& dev_ctx,

  const DDim& input_dims = input.dims();
  const DDim& input_stride = input.strides();
-  int64_t max_size = input_dims.size() == 0 ? 1 : input_dims[axis];
+  int64_t max_size =
+      input_dims.size() == 0 ? 1 : input_dims[static_cast<int>(axis)];

  PADDLE_ENFORCE_LE(
      size,
@@ -48,7 +49,8 @@ void TensorUnfoldKernel(const Context& dev_ctx,
  std::vector<int64_t> stride(input_dims.size() + 1);

  shape[input_dims.size()] = size;
-  stride[input_dims.size()] = input_dims.size() == 0 ? 1 : input_stride[axis];
+  stride[input_dims.size()] =
+      input_dims.size() == 0 ? 1 : input_stride[static_cast<int>(axis)];
  for (int i = 0; i < input_dims.size(); ++i) {
    if (i == axis) {
      shape[i] = (input_dims[i] - size) / step + 1;
@@ -59,8 +61,8 @@ void TensorUnfoldKernel(const Context& dev_ctx,
    }
  }

-  out->Resize(DDim(shape.data(), shape.size()));
-  out->set_strides(DDim(stride.data(), stride.size()));
+  out->Resize(DDim(shape.data(), static_cast<int>(shape.size())));
+  out->set_strides(DDim(stride.data(), static_cast<int>(stride.size())));
  out->set_offset(input.offset());
  out->ResetHolder(input.Holder());
 }

--- a/paddle/phi/kernels/stride/transpose_grad_kernel.cc
+++ b/paddle/phi/kernels/stride/transpose_grad_kernel.cc
@@ -28,12 +28,12 @@ void TransposeGradStridedKernel(const Context& dev_ctx,
  std::vector<int> formated_axis = axis;
  for (size_t i = 0; i < axis_size; i++) {
    if (axis[i] < 0) {
-      formated_axis[i] = axis[i] + axis_size;
+      formated_axis[i] = static_cast<int>(axis[i] + axis_size);
    }
  }

  std::vector<int> reversed_axis(axis);
-  for (size_t i = 0; i < axis_size; i++) {
+  for (int i = 0; i < static_cast<int>(axis_size); i++) {
    reversed_axis[formated_axis[i]] = i;
  }


--- a/paddle/phi/kernels/stride/transpose_kernel.cc
+++ b/paddle/phi/kernels/stride/transpose_kernel.cc
@@ -27,7 +27,7 @@ void TransposeStridedKernel(const Context& ctx,
  std::vector<int> formated_axis = axis;
  for (size_t i = 0; i < axis.size(); i++) {
    if (axis[i] < 0) {
-      formated_axis[i] = axis[i] + x_rank;
+      formated_axis[i] = static_cast<int>(axis[i] + x_rank);
    }
  }

@@ -35,7 +35,7 @@ void TransposeStridedKernel(const Context& ctx,
  auto in_stride = x.strides();
  auto in_dims = x.dims();
  meta.strides = in_stride;
-  for (size_t i = 0; i < formated_axis.size(); i++) {
+  for (int i = 0; i < static_cast<int>(formated_axis.size()); i++) {
    meta.strides[i] = in_stride[formated_axis[i]];
    meta.dims[i] = in_dims[formated_axis[i]];
  }

--- a/paddle/phi/kernels/stride/unbind_kernel.cc
+++ b/paddle/phi/kernels/stride/unbind_kernel.cc
@@ -24,7 +24,7 @@ void UnbindStridedKernel(const Context& dev_ctx,
                         const DenseTensor& x,
                         int axis,
                         std::vector<DenseTensor*> outs) {
-  int64_t num = outs.size();
+  int64_t num = static_cast<int64_t>(outs.size());
  int64_t start = 0;

  axis = axis < 0 ? axis + x.dims().size() : axis;

--- a/paddle/phi/kernels/stride/unsqueeze_kernel.cc
+++ b/paddle/phi/kernels/stride/unsqueeze_kernel.cc
@@ -34,7 +34,8 @@ void UnsqueezeInferStridedKernel(const Context& dev_ctx,
  if (input.Holder() == out->Holder() && input.meta() == out->meta()) {
    input_dims = phi::vectorize<int64_t>(out->dims());
    for (int64_t i = static_cast<int64_t>(axes.size() - 1); i >= 0; --i) {
-      axes[i] = axes[i] < 0 ? axes[i] + input_dims.size() : axes[i];
+      axes[i] = static_cast<int64_t>(axes[i] < 0 ? axes[i] + input_dims.size()
+                                                 : axes[i]);
      axes[i] = axes[i] < 0 ? 0 : axes[i];
      input_dims.erase(input_dims.begin() + axes[i]);
    }
@@ -43,8 +44,9 @@ void UnsqueezeInferStridedKernel(const Context& dev_ctx,
  std::vector<int64_t> output_dims = input_dims;
  std::vector<int64_t> output_stride = input_stride;

-  for (auto item : axes) {
-    item = item < 0 ? item + output_dims.size() + 1 : item;
+  for (int64_t item : axes) {
+    item =
+        static_cast<int64_t>(item < 0 ? item + output_dims.size() + 1 : item);
    item = item < 0 ? 0 : item;
    int64_t stride = static_cast<size_t>(item) >= output_dims.size()
                         ? 1
@@ -54,7 +56,7 @@ void UnsqueezeInferStridedKernel(const Context& dev_ctx,
  }

  auto meta = out->meta();
-  auto tmp_dim = DDim(output_dims.data(), output_dims.size());
+  auto tmp_dim = DDim(output_dims.data(), static_cast<int>(output_dims.size()));
  // if (product(meta.dims) > 0 && meta.dims != tmp_dim) {
  //   PADDLE_THROW(
  //       phi::errors::Fatal("Unsqueeze kernel stride compute diff, infer
@@ -64,7 +66,8 @@ void UnsqueezeInferStridedKernel(const Context& dev_ctx,
  //                          tmp_dim));
  // }
  meta.dims = tmp_dim;
-  meta.strides = DDim(output_stride.data(), output_stride.size());
+  meta.strides =
+      DDim(output_stride.data(), static_cast<int>(output_stride.size()));
  meta.offset = input.offset();
  out->set_meta(meta);
  out->ResetHolder(input.Holder());

--- a/paddle/phi/kernels/stride/view_kernel.cc
+++ b/paddle/phi/kernels/stride/view_kernel.cc
@@ -23,7 +23,7 @@ void ViewShapeKernel(const Context& dev_ctx,
                     const DenseTensor& input,
                     const std::vector<int64_t>& dims,
                     DenseTensor* out) {
-  DDim new_dims = DDim(dims.data(), dims.size());
+  DDim new_dims = DDim(dims.data(), static_cast<int>(dims.size()));
  DDim stride;
  if (ReshapeStride(input.dims(), input.strides(), new_dims, stride)) {
    auto meta = input.meta();
@@ -67,11 +67,11 @@ void ViewDtypeKernel(const Context& dev_ctx,

    DDim output_dims = input.dims();
    output_dims[output_dims.size() - 1] =
-        output_dims[output_dims.size() - 1] * times;
+        output_dims[output_dims.size() - 1] * times;  // NOLINT

    DDim output_stride = input.strides();
    for (int i = 0; i < output_stride.size(); i++) {
-      output_stride[i] = output_stride[i] * times;
+      output_stride[i] = output_stride[i] * times;  // NOLINT
    }
    output_stride[output_stride.size() - 1] = 1;

@@ -115,7 +115,7 @@ void ViewDtypeKernel(const Context& dev_ctx,

    DDim output_dims = input.dims();
    output_dims[output_dims.size() - 1] =
-        output_dims[output_dims.size() - 1] / times;
+        output_dims[output_dims.size() - 1] / times;  // NOLINT

    DDim output_stride = input.strides();
    for (int i = 0; i < output_stride.size(); i++) {
@@ -129,7 +129,7 @@ void ViewDtypeKernel(const Context& dev_ctx,
                                       times,
                                       input.dtype(),
                                       dtype));
-      output_stride[i] = output_stride[i] / times;
+      output_stride[i] = output_stride[i] / times;  // NOLINT
    }
    output_stride[output_stride.size() - 1] = 1;


--- a/paddle/phi/kernels/strings/unicode.cc
+++ b/paddle/phi/kernels/strings/unicode.cc
@@ -35,10 +35,10 @@ const uint8_t* GetUniFlagMap() {
 const uint16_t* GetCharcasesMap() {
  if (utils_map[0] == nullptr) {
    for (uint32_t i = 0; i < 65536; ++i) {
-      if (utf8proc_islower(i)) {
-        CHARCASES_MAP[i] = utf8proc_toupper(i);
-      } else if (utf8proc_isupper(i)) {
-        CHARCASES_MAP[i] = utf8proc_tolower(i);
+      if (utf8proc_islower(static_cast<int32_t>(i))) {
+        CHARCASES_MAP[i] = utf8proc_toupper(static_cast<int32_t>(i));
+      } else if (utf8proc_isupper(static_cast<int32_t>(i))) {
+        CHARCASES_MAP[i] = utf8proc_tolower(static_cast<int32_t>(i));
      }
    }
    utils_map[0] = CHARCASES_MAP;