[CodeStyle][CINN] fix cpplint codestyle for [readability/casting] (#55069)

3f5c2b5f · Wang Xin · GitHub · 0fd50551 · 3f5c2b5f · 3f5c2b5f
24 changed file
--- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc
+++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc
@@ -68,8 +68,8 @@ class TestAutoBind : public TestAutoGenRuleBase {
    } else if (total_num <= kMaxBlocks * kMaxThreadsPerBlock) {
      ASSERT_EQ(all_loops.size(), 2);
      EXPECT_EQ(all_loops[0].As<ir::For>()->extent.as_int32(),
-                static_cast<int32_t>(
-                    std::ceil(double(total_num) / kMaxThreadsPerBlock)));
+                static_cast<int32_t>(std::ceil(static_cast<double>(total_num) /
+                                               kMaxThreadsPerBlock)));
      EXPECT_TRUE(all_loops[0].As<ir::For>()->is_gpu_block_binded());
      EXPECT_EQ(all_loops[1].As<ir::For>()->extent.as_int32(),
                kMaxThreadsPerBlock);
@@ -81,9 +81,10 @@ class TestAutoBind : public TestAutoGenRuleBase {
      EXPECT_EQ(all_loops[1].As<ir::For>()->extent.as_int32(),
                kMaxThreadsPerBlock);
      EXPECT_TRUE(all_loops[1].As<ir::For>()->is_gpu_thread_binded());
-      EXPECT_EQ(all_loops[2].As<ir::For>()->extent.as_int32(),
-                static_cast<int32_t>(std::ceil(
-                    double(total_num) / (kMaxBlocks * kMaxThreadsPerBlock))));
+      EXPECT_EQ(
+          all_loops[2].As<ir::For>()->extent.as_int32(),
+          static_cast<int32_t>(std::ceil(static_cast<double>(total_num) /
+                                         (kMaxBlocks * kMaxThreadsPerBlock))));
      EXPECT_FALSE(all_loops[2].As<ir::For>()->is_binded());
    }


--- a/paddle/cinn/backends/codegen_c.cc
+++ b/paddle/cinn/backends/codegen_c.cc
@@ -162,7 +162,7 @@ void CodeGenC::Visit(const ir::Mod *op) {
  auto copied = op->b();
  optim::Simplify(&copied);
  if (copied.is_constant()) {
-    int temp = (int)(copied.get_constant());
+    int temp = static_cast<int>(copied.get_constant());
    if ((temp & (temp - 1)) == 0) {
      os() << "(";
      Print(op->a());

--- a/paddle/cinn/backends/ir_schedule_test.cc
+++ b/paddle/cinn/backends/ir_schedule_test.cc
@@ -2871,11 +2871,11 @@ TEST(IrSchedule, Annotate) {
  ir::IRSchedule ir_sch(ir::ModuleExpr({funcs[0]->body}));
  auto fused = ir_sch.Fuse("B", {0, 1});
  auto block_b = ir_sch.GetBlock("B");
-  ir_sch.Annotate(block_b, "k1", int(64));
+  ir_sch.Annotate(block_b, "k1", 64);
  block_b = ir_sch.GetBlock("B");
-  ir_sch.Annotate(block_b, "k2", bool(true));
+  ir_sch.Annotate(block_b, "k2", true);
  block_b = ir_sch.GetBlock("B");
-  ir_sch.Annotate(block_b, "k3", float(2.0));
+  ir_sch.Annotate(block_b, "k3", 2.0f);
  block_b = ir_sch.GetBlock("B");
  ir_sch.Annotate(block_b, "k4", std::string("v4"));
  std::string expected_expr = R"ROC({
@@ -2915,11 +2915,11 @@ TEST(IrSchedule, Unannotate) {
  ir::IRSchedule ir_sch(ir::ModuleExpr({funcs[0]->body}));
  auto fused = ir_sch.Fuse("B", {0, 1});
  auto block_b = ir_sch.GetBlock("B");
-  ir_sch.Annotate(block_b, "k1", int(64));
+  ir_sch.Annotate(block_b, "k1", 64);
  block_b = ir_sch.GetBlock("B");
-  ir_sch.Annotate(block_b, "k2", bool(true));
+  ir_sch.Annotate(block_b, "k2", true);
  block_b = ir_sch.GetBlock("B");
-  ir_sch.Annotate(block_b, "k3", float(2.0));
+  ir_sch.Annotate(block_b, "k3", 2.0f);
  block_b = ir_sch.GetBlock("B");
  ir_sch.Annotate(block_b, "k4", std::string("v4"));
  block_b = ir_sch.GetBlock("B");

--- a/paddle/cinn/common/float16.h
+++ b/paddle/cinn/common/float16.h
@@ -597,9 +597,9 @@ __host__ __device__ inline bool(isfinite)(const float16& a) {

 __host__ __device__ inline float16(abs)(const float16& a) {
 #if defined(CINN_CUDA_FP16) && (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530)
-  return float16(__habs(a.to_half()));
+  return static_cast<float16>(__habs(a.to_half()));
 #else
-  return float16(fabsf(float(a)));
+  return static_cast<float16>(fabsf(static_cast<float>(a)));
 #endif
 }


--- a/paddle/cinn/common/float16_bfloat16_cuda_test.cu
+++ b/paddle/cinn/common/float16_bfloat16_cuda_test.cu
@@ -100,7 +100,7 @@ __global__ void cast_fp16_to_fp32_cuda_kernel(const float16* input,
                                              float* out) {
  int idx = blockIdx.x * blockDim.x + threadIdx.x;
  if (idx < num) {
-    out[idx] = float(input[idx]);
+    out[idx] = static_cast<float>(input[idx]);
  }
 }

@@ -131,7 +131,7 @@ __global__ void cast_bf16_to_fp32_cuda_kernel(const bfloat16* input,
                                              float* out) {
  int idx = blockIdx.x * blockDim.x + threadIdx.x;
  if (idx < num) {
-    out[idx] = float(input[idx]);
+    out[idx] = static_cast<float>(input[idx]);
  }
 }


--- a/paddle/cinn/frontend/net_builder_test.cc
+++ b/paddle/cinn/frontend/net_builder_test.cc
@@ -484,7 +484,7 @@ TEST(net_build, program_execute_cast) {
    for (int h = 0; h < H; ++h) {
      std::string line;
      int index = h + H * b;
-      float in_data = (float)input_data[index];
+      float in_data = static_cast<float>(input_data[index]);
      float out_data = output_data[index];
      line += (std::to_string(out_data) + ", ");
      EXPECT_EQ(in_data, out_data);
@@ -1339,7 +1339,8 @@ TEST(net_build, program_execute_repeat_axis_0) {
  std::vector<float> output_data = GetTensorData<float>(output_tensor, target);
  for (int m = 0; m < new_M; ++m) {
    for (int n = 0; n < new_N; ++n) {
-      int in_index = n + N * static_cast<int>(std::floor((float)m / repeats));
+      int in_index =
+          n + N * static_cast<int>(std::floor(static_cast<float>(m) / repeats));
      int out_index = n + new_N * m;
      float in_data = input_data[in_index];
      float out_data = output_data[out_index];
@@ -1393,7 +1394,8 @@ TEST(net_build, program_execute_repeat_axis_1) {
  std::vector<float> output_data = GetTensorData<float>(output_tensor, target);
  for (int m = 0; m < new_M; ++m) {
    for (int n = 0; n < new_N; ++n) {
-      int in_index = N * m + static_cast<int>(std::floor((float)n / repeats));
+      int in_index =
+          N * m + static_cast<int>(std::floor(static_cast<float>(n) / repeats));
      int out_index = n + new_N * m;
      float in_data = input_data[in_index];
      float out_data = output_data[out_index];

--- a/paddle/cinn/hlir/framework/graph_compiler.cc
+++ b/paddle/cinn/hlir/framework/graph_compiler.cc
@@ -155,7 +155,7 @@ void Program::Export(const std::vector<std::string>& persistent_vars,
    std::string name = (std::string)varname;
    auto t = scope_->GetTensor(name);
    cinn_buffer_t buffer = *t->buffer();
-    buffer.memory = (uint8_t*)0;
+    buffer.memory = reinterpret_cast<uint8_t*>(0);
    if (std::find(persistent_vars.begin(), persistent_vars.end(), name) !=
        persistent_vars.end()) {
      pvars.emplace_back(t->buffer(),
@@ -206,7 +206,7 @@ void Program::Export(const std::vector<std::string>& persistent_vars,
      tellplaceholder(instplaceholder + findex * 12 + 8, f);
      for (auto& arg : all_args) {
        uintptr_t bufindex = varindex[arg];
-        cinn_pod_value_t v((cinn_buffer_t*)bufindex);
+        cinn_pod_value_t v(reinterpret_cast<cinn_buffer_t*>(bufindex));
        fwrite(&v, sizeof(cinn_pod_value_t), 1, f);
      }
    }

--- a/paddle/cinn/hlir/framework/op_lowering_util.cc
+++ b/paddle/cinn/hlir/framework/op_lowering_util.cc
@@ -722,8 +722,9 @@ void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch,
      need_reduce_last_count *= inshape[i];
    }
  }
-  int warp_reduce_need_sm_count = ceil((need_reduce_last_count * 32) /
-                                       float(target.get_max_threads_per_sm()));
+  int warp_reduce_need_sm_count =
+      ceil((need_reduce_last_count * 32) /
+           static_cast<float>(target.get_max_threads_per_sm()));
  // Set Num_max_threads to 32 is Warp Reduce
  if (target.get_multi_processor_count() < warp_reduce_need_sm_count) {
    max_num_threads = 32;
@@ -805,7 +806,8 @@ void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch,
    }
    LoopOrderAssignReduce(ir_sch, block_name, first_axes, target, true);
    // fuse axis before reduce to bind blockidx.
-    for (int idx = 0; idx < int(inshape.size() - axes.size()) - 1; ++idx) {
+    for (int idx = 0; idx < static_cast<int>(inshape.size() - axes.size()) - 1;
+         ++idx) {
      ir_sch.Fuse(block_name, {0, 1});
    }
  }

--- a/paddle/cinn/hlir/pass/alterlayout_test.cc
+++ b/paddle/cinn/hlir/pass/alterlayout_test.cc
@@ -210,7 +210,7 @@ TEST(conv_bn_conv, conv_bn_conv) {
  attrs["data_format"] = src_layout;

  absl::flat_hash_map<std::string, Program::attr_t> attrs1;
-  attrs1["epsilon"] = (float)0.001;
+  attrs1["epsilon"] = 0.001f;

  auto c = program.conv2d(A, B, attrs);
  auto d = program.batchnorm(c, Scale, Bias, Mean, Variance, attrs1);
@@ -317,7 +317,7 @@ TEST(conv_softmax_conv, conv_softmax_conv) {
  attrs["data_format"] = src_layout;

  absl::flat_hash_map<std::string, Program::attr_t> attrs1;
-  attrs1["axis"] = (int)-1;
+  attrs1["axis"] = static_cast<int>(-1);

  auto c = program.conv2d(A, B, attrs);
  auto d = program.softmax(c, attrs1);
@@ -417,7 +417,7 @@ TEST(conv_mul_conv, conv_mul_conv) {
  attrs["data_format"] = src_layout;

  absl::flat_hash_map<std::string, Program::attr_t> attrs1;
-  attrs1["axis"] = (int)-1;
+  attrs1["axis"] = static_cast<int>(-1);

  auto c = program.conv2d(A, B, attrs);
  auto d = program.mul(c, C, 1, 1);

--- a/paddle/cinn/hlir/pe/nn.cc
+++ b/paddle/cinn/hlir/pe/nn.cc
@@ -806,15 +806,15 @@ std::vector<Tensor> Depthwise_Conv2d_NCHW(const Tensor &input,
  CHECK(weight->shape[1].is_constant());
  CHECK(weight->shape[2].is_constant());
  CHECK(weight->shape[3].is_constant());
-  int B = (int)input->shape[0].get_constant();
-  int O = (int)weight->shape[1].get_constant() *
-          (int)input->shape[1].get_constant();
-  int H = ((int)input->shape[2].get_constant() -
-           (int)weight->shape[2].get_constant() + 2 * pad_h) /
+  int B = static_cast<int>(input->shape[0].get_constant());
+  int O = static_cast<int>(weight->shape[1].get_constant()) *
+          static_cast<int>(input->shape[1].get_constant());
+  int H = (static_cast<int>(input->shape[2].get_constant()) -
+           static_cast<int>(weight->shape[2].get_constant()) + 2 * pad_h) /
              stride_h +
          1;
-  int W = ((int)input->shape[3].get_constant() -
-           (int)weight->shape[3].get_constant() + 2 * pad_w) /
+  int W = (static_cast<int>(input->shape[3].get_constant()) -
+           static_cast<int>(weight->shape[3].get_constant()) + 2 * pad_w) /
              stride_w +
          1;
  output_shape = {

--- a/paddle/cinn/hlir/pe/reduction.cc
+++ b/paddle/cinn/hlir/pe/reduction.cc
@@ -380,7 +380,8 @@ std::vector<ir::Tensor> BlockReduceInternal(const ir::Tensor& A,

  // compute the reduce dimension stride.
  std::vector<Expr> last_reduce_stride(A->shape.size() - axes.front(), Expr(1));
-  for (int idx = A->shape.size(), index = int(last_reduce_stride.size()) - 2;
+  for (int idx = A->shape.size(),
+           index = static_cast<int>(last_reduce_stride.size()) - 2;
       index >= 0;
       --index) {
    last_reduce_stride[index] = last_reduce_stride[index + 1] * A->shape[--idx];
@@ -407,8 +408,8 @@ std::vector<ir::Tensor> BlockReduceInternal(const ir::Tensor& A,
  // compute output shape.
  std::vector<Expr> out_shape(A->shape.begin(),
                              A->shape.begin() + axes.front());
-  int tailf = keep_dim ? (int(A->shape.size()) - axes.front())
-                       : (int(A->shape.size()) - axes.back() - 1);
+  int tailf = keep_dim ? (static_cast<int>(A->shape.size()) - axes.front())
+                       : (static_cast<int>(A->shape.size()) - axes.back() - 1);
  for (int idx = 0; idx < tailf; ++idx) {
    out_shape.push_back(Expr(1));
  }
@@ -538,8 +539,8 @@ std::vector<ir::Tensor> BlockReduce(const ir::Tensor& A,
  // compute output tensor shape.
  std::vector<Expr> out_shape(A->shape.begin(),
                              A->shape.begin() + axes.front());
-  int tailf = keep_dim ? (int(A->shape.size()) - axes.front())
-                       : (int(A->shape.size()) - axes.back() - 1);
+  int tailf = keep_dim ? (static_cast<int>(A->shape.size()) - axes.front())
+                       : (static_cast<int>(A->shape.size()) - axes.back() - 1);
  for (int idx = 0; idx < tailf; ++idx) {
    out_shape.push_back(Expr(1));
  }
@@ -832,7 +833,8 @@ std::vector<ir::Tensor> TwoStepBlockReduceInternal(
  }
  int warp_reduce_need_sm_count =
      ceil((need_reduce_last_count * 32) /
-           float(common::DefaultNVGPUTarget().get_max_threads_per_sm()));
+           static_cast<float>(
+               common::DefaultNVGPUTarget().get_max_threads_per_sm()));
  // Set Num_max_threads to 32 is Warp Reduce
  if (common::DefaultNVGPUTarget().get_multi_processor_count() <
      warp_reduce_need_sm_count) {

--- a/paddle/cinn/hlir/pe/schedule.cc
+++ b/paddle/cinn/hlir/pe/schedule.cc
@@ -2662,7 +2662,7 @@ int gcd(int a, int b) {
 int MaxFactorLessThan(int a, int b) {
  CHECK_GT(a, b);
  int res = 1;
-  for (int i = 2; i <= (int)sqrt((double)a); i++) {
+  for (int i = 2; i <= static_cast<int>(sqrt(static_cast<double>(a))); i++) {
    if (a % i == 0) {
      if (i <= b) res = std::max(res, i);
      if (a / i <= b) res = std::max(res, a / i);

--- a/paddle/cinn/ir/ir_schedule.cc
+++ b/paddle/cinn/ir/ir_schedule.cc
@@ -2189,7 +2189,7 @@ void ScheduleImpl::CopyTransformAndLoopInfo(const Expr& block,
  Expr new_loop;
  VLOG(3) << "changed_loop_num is : " << changed_loop_num;
  VLOG(3) << "old_iter_values.size() is : " << old_iter_values.size();
-  if (changed_loop_num >= (int)old_iter_values.size()) {
+  if (changed_loop_num >= static_cast<int>(old_iter_values.size())) {
    new_loop = optim::IRCopy(block);
    new_loop.As<ir::ScheduleBlockRealize>()->iter_values = new_iter_values;
  } else {

--- a/paddle/cinn/ir/ir_schedule_util.cc
+++ b/paddle/cinn/ir/ir_schedule_util.cc
@@ -74,7 +74,7 @@ int GetLoopExtent(const Expr& loop) {
  CHECK(loop.As<ir::For>());
  CHECK(common::is_zero(loop.As<ir::For>()->min));
  CHECK(loop.As<ir::For>()->extent.is_constant());
-  return (int)loop.As<ir::For>()->extent.get_constant();
+  return static_cast<int>(loop.As<ir::For>()->extent.get_constant());
 }

 void SetCudaAxisInfo(Expr* lowered_func) {
@@ -249,7 +249,8 @@ std::vector<int> ValidateFactors(const std::vector<int>& factors,
        << "In Split, when there is -1 in factors, the other factors' product "
           "should be <= "
           "original loop's extent! Please check.";
-    int minus_one_candidate = (int)ceil((double)total_extent / (double)product);
+    int minus_one_candidate = static_cast<int>(
+        ceil(static_cast<double>(total_extent) / static_cast<double>(product)));
    for (int i = 0; i < validated_factors.size(); ++i) {
      if (validated_factors[i] == -1) {
        validated_factors[i] = minus_one_candidate;
@@ -490,7 +491,7 @@ Expr MakeCacheBlock(const std::vector<IterRange>& buffer_ranges,
      ir::ScheduleBlock::Make(
          block_vars, {}, {}, new_tensor->name, Block::Make({body})));
  Expr new_body = block;
-  for (int i = (int)loop_vars.size() - 1; i >= 0; i--) {
+  for (int i = static_cast<int>(loop_vars.size()) - 1; i >= 0; i--) {
    new_body = For::Make(loop_vars[i],
                         Expr(0),
                         common::AutoSimplify(buffer_ranges[i].extent),
@@ -531,7 +532,9 @@ void FindInsertionPoint(Expr& root, CacheBlockInfo* info, bool is_write) {
            ->body.As<Block>());
  info->loc_block =
      root.As<ScheduleBlockRealize>()->schedule_block.As<ScheduleBlock>()->body;
-  for (int i = 0; i < (int)info->loc_block.As<Block>()->stmts.size(); ++i) {
+  for (int i = 0;
+       i < static_cast<int>(info->loc_block.As<Block>()->stmts.size());
+       ++i) {
    if (Contains(info->loc_block.As<Block>()->stmts[i], producer)) {
      info->loc_pos = i + 1;
      break;
@@ -1075,9 +1078,8 @@ std::vector<IterRange> CalculateRequiredRegions(
            (*find_for_loops.begin()).As<ir::For>()->min,
            (*find_for_loops.begin()).As<ir::For>()->extent);
      } else {
-        int cons = (int)block.As<ir::ScheduleBlockRealize>()
-                       ->iter_values[i]
-                       .is_constant();
+        int cons = static_cast<int>(
+            block.As<ir::ScheduleBlockRealize>()->iter_values[i].is_constant());
        required_buffer_range.emplace_back(Expr(cons), Expr(1));
      }
    }

--- a/paddle/cinn/ir/schedule_desc_test.cc
+++ b/paddle/cinn/ir/schedule_desc_test.cc
@@ -863,32 +863,29 @@ TEST_F(TestScheduleDesc, StepKind_Annotate) {
  auto block_b = ir_sch.GetBlock("B");
  trace.Append(ScheduleDesc::Step(
      "GetBlock", {}, {{"block_name", std::string("B")}}, {block_b}));
-  ir_sch.Annotate(block_b, "k1", int(64));
-  trace.Append(
-      ScheduleDesc::Step("AnnotateIntAttr",
-                         {{"block", std::vector<Expr>({block_b})}},
-                         {{"key", std::string("k1")}, {"value", int(64)}},
-                         {}));
+  ir_sch.Annotate(block_b, "k1", 64);
+  trace.Append(ScheduleDesc::Step("AnnotateIntAttr",
+                                  {{"block", std::vector<Expr>({block_b})}},
+                                  {{"key", std::string("k1")}, {"value", 64}},
+                                  {}));

  block_b = ir_sch.GetBlock("B");
  trace.Append(ScheduleDesc::Step(
      "GetBlock", {}, {{"block_name", std::string("B")}}, {block_b}));
-  ir_sch.Annotate(block_b, "k2", bool(true));
-  trace.Append(
-      ScheduleDesc::Step("AnnotateBoolAttr",
-                         {{"block", std::vector<Expr>({block_b})}},
-                         {{"key", std::string("k2")}, {"value", bool(true)}},
-                         {}));
+  ir_sch.Annotate(block_b, "k2", true);
+  trace.Append(ScheduleDesc::Step("AnnotateBoolAttr",
+                                  {{"block", std::vector<Expr>({block_b})}},
+                                  {{"key", std::string("k2")}, {"value", true}},
+                                  {}));

  block_b = ir_sch.GetBlock("B");
  trace.Append(ScheduleDesc::Step(
      "GetBlock", {}, {{"block_name", std::string("B")}}, {block_b}));
-  ir_sch.Annotate(block_b, "k3", float(2.0));
-  trace.Append(
-      ScheduleDesc::Step("AnnotateFloatAttr",
-                         {{"block", std::vector<Expr>({block_b})}},
-                         {{"key", std::string("k3")}, {"value", float(2.0)}},
-                         {}));
+  ir_sch.Annotate(block_b, "k3", 2.0f);
+  trace.Append(ScheduleDesc::Step("AnnotateFloatAttr",
+                                  {{"block", std::vector<Expr>({block_b})}},
+                                  {{"key", std::string("k3")}, {"value", 2.0f}},
+                                  {}));

  block_b = ir_sch.GetBlock("B");
  trace.Append(ScheduleDesc::Step(
@@ -911,22 +908,20 @@ TEST_F(TestScheduleDesc, StepKind_Unannotate) {
  auto block_b = ir_sch.GetBlock("B");
  trace.Append(ScheduleDesc::Step(
      "GetBlock", {}, {{"block_name", std::string("B")}}, {block_b}));
-  ir_sch.Annotate(block_b, "k1", int(64));
-  trace.Append(
-      ScheduleDesc::Step("AnnotateIntAttr",
-                         {{"block", std::vector<Expr>({block_b})}},
-                         {{"key", std::string("k1")}, {"value", int(64)}},
-                         {}));
+  ir_sch.Annotate(block_b, "k1", 64);
+  trace.Append(ScheduleDesc::Step("AnnotateIntAttr",
+                                  {{"block", std::vector<Expr>({block_b})}},
+                                  {{"key", std::string("k1")}, {"value", 64}},
+                                  {}));

  block_b = ir_sch.GetBlock("B");
  trace.Append(ScheduleDesc::Step(
      "GetBlock", {}, {{"block_name", std::string("B")}}, {block_b}));
-  ir_sch.Annotate(block_b, "k2", bool(true));
-  trace.Append(
-      ScheduleDesc::Step("AnnotateBoolAttr",
-                         {{"block", std::vector<Expr>({block_b})}},
-                         {{"key", std::string("k2")}, {"value", bool(true)}},
-                         {}));
+  ir_sch.Annotate(block_b, "k2", true);
+  trace.Append(ScheduleDesc::Step("AnnotateBoolAttr",
+                                  {{"block", std::vector<Expr>({block_b})}},
+                                  {{"key", std::string("k2")}, {"value", true}},
+                                  {}));

  block_b = ir_sch.GetBlock("B");
  trace.Append(ScheduleDesc::Step(

--- a/paddle/cinn/lang/lower_impl.cc
+++ b/paddle/cinn/lang/lower_impl.cc
@@ -741,14 +741,15 @@ std::vector<Expr> LowerImpl::GenerateFunctionBody(
                << "'s shape is : " << utils::Join(tensor->shape, ",");
        for (auto& expr : tensor->shape) {
          CHECK(expr.is_constant());
-          int_shape.push_back((int)expr.get_constant());
+          int_shape.push_back(static_cast<int>(expr.get_constant()));
        }
        for (auto& var : tensor->reduce_axis) {
          CHECK(var->lower_bound.defined());
          CHECK(var->upper_bound.defined());
          CHECK(common::is_zero(var->lower_bound));
          CHECK(var->upper_bound.is_constant());
-          int_shape.push_back((int)var->upper_bound.get_constant());
+          int_shape.push_back(
+              static_cast<int>(var->upper_bound.get_constant()));
        }
        // create block itervars, i0,i1...
        std::vector<Var> block_vars;

--- a/paddle/cinn/optim/compute_inline_expand.cc
+++ b/paddle/cinn/optim/compute_inline_expand.cc
@@ -79,7 +79,7 @@ struct TensorInlineExpandMutator : public ir::IRMutator<> {

  void Visit(const ir::For *op, Expr *expr) override {
    CHECK(op->extent.is_constant());
-    int cons_extent = (int)op->extent.get_constant();
+    int cons_extent = static_cast<int>(op->extent.get_constant());
    var_to_extent[op->loop_var->name] = op->extent;
    ir::IRMutator<>::Visit(op, expr);
  }

--- a/paddle/cinn/optim/unroll_loops_test.cc
+++ b/paddle/cinn/optim/unroll_loops_test.cc
@@ -69,7 +69,7 @@ TEST(UnrollLoops, auto_unroll) {
  Expr M(100);
  Expr N(4);
  Expr O(5);
-  Expr const_value(float(2.11));
+  Expr const_value(2.11f);

  Placeholder<float> A("A", {M, N, O});


--- a/paddle/cinn/optim/vectorize_loops_test.cc
+++ b/paddle/cinn/optim/vectorize_loops_test.cc
@@ -274,7 +274,7 @@ TEST(Vectorize, cuda_vectorize_with_constant) {
  Expr M(100);
  Expr N(500);
  Placeholder<float> A("A", {M, N});
-  Expr const_value(float(2.11));
+  Expr const_value(2.11f);

  Tensor C = Compute(
      {M, N}, [&](Var i, Var j) { return const_value * A(i, j); }, "C");

--- a/paddle/cinn/poly/stage.cc
+++ b/paddle/cinn/poly/stage.cc
@@ -923,8 +923,8 @@ Iterator Stage::Fuse(const std::vector<Iterator> &levels) {
      "%s = floor(%s / %d)",
      levels.front().id.c_str(),
      new_iter_name.c_str(),
-      (int)std::accumulate(
-          iterator_max_val.begin() + 1, iterator_max_val.end(), 1, my_prod)));
+      static_cast<int>(std::accumulate(
+          iterator_max_val.begin() + 1, iterator_max_val.end(), 1, my_prod))));
  conds.emplace_back(utils::StringFormat("%s = %s mod %d",
                                         levels.back().id.c_str(),
                                         new_iter_name.c_str(),
@@ -935,10 +935,10 @@ Iterator Stage::Fuse(const std::vector<Iterator> &levels) {
        "%s = floor(%s / %d) mod %d",
        levels[i].id.c_str(),
        new_iter_name.c_str(),
-        (int)std::accumulate(iterator_max_val.begin() + i + 1,
-                             iterator_max_val.end(),
-                             1,
-                             my_prod),
+        static_cast<int>(std::accumulate(iterator_max_val.begin() + i + 1,
+                                         iterator_max_val.end(),
+                                         1,
+                                         my_prod)),
        iterator_max_val[i]));
  }


--- a/paddle/cinn/runtime/cinn_runtime.cc
+++ b/paddle/cinn/runtime/cinn_runtime.cc
@@ -122,7 +122,7 @@ cinn_buffer_t* cinn_buffer_new_default(int target,
      fprintf(stderr, "Not supported device type");
      abort();
  }
-  cinn_buffer_malloc((void*)(0), buf);
+  cinn_buffer_malloc(reinterpret_cast<void*>(0), buf);
  return buf;
 }


--- a/paddle/cinn/runtime/cpu/host_intrinsics.cc
+++ b/paddle/cinn/runtime/cpu/host_intrinsics.cc
@@ -31,8 +31,8 @@ extern "C" {
 void __cinn_host_tanh_v(const cinn_buffer_t* x, cinn_buffer_t* out) {
  CINN_CHECK_EQ(x->num_elements(), out->num_elements());
  int xn = x->num_elements();
-  auto* x_data = (float*)(x->memory);
-  auto* out_data = (float*)(out->memory);
+  auto* x_data = reinterpret_cast<float*>(x->memory);
+  auto* out_data = reinterpret_cast<float*>(out->memory);
  for (int i = 0; i < x->num_elements(); i++) {
    out_data[i] = tanhf(x_data[i]);
  }

--- a/paddle/cinn/runtime/cuda/float16.h
+++ b/paddle/cinn/runtime/cuda/float16.h
@@ -599,7 +599,7 @@ __host__ __device__ inline float16(abs)(const float16& a) {
 #if defined(CINN_CUDA_FP16) && (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530)
  return float16(__habs(a.to_half()));
 #else
-  return float16(fabsf(float(a)));
+  return float16(fabsf(static_cast<float>(a)));
 #endif
 }


--- a/paddle/cinn/runtime/tiny_runtime.cc
+++ b/paddle/cinn/runtime/tiny_runtime.cc
@@ -62,13 +62,13 @@ void *load_program(const char *paramfile) {
    return nullptr;
  }
  // TODO(hp03): check param file version
-  ctx->major_v = *(int *)(buf + 4);
-  ctx->minor_v = *(int *)(buf + 8);
+  ctx->major_v = *reinterpret_cast<int *>(buf + 4);
+  ctx->minor_v = *reinterpret_cast<int *>(buf + 8);

-  int *namelist_pos = (int *)(buf + 16);
-  int *podvalue_pos = (int *)(buf + *namelist_pos);
-  int *persistent_pos = (int *)(buf + *podvalue_pos);
-  int *inst_pos = (int *)(buf + *persistent_pos);
+  int *namelist_pos = reinterpret_cast<int *>(buf + 16);
+  int *podvalue_pos = reinterpret_cast<int *>(buf + *namelist_pos);
+  int *persistent_pos = reinterpret_cast<int *>(buf + *podvalue_pos);
+  int *inst_pos = reinterpret_cast<int *>(buf + *persistent_pos);
  if (fsize < *inst_pos) {
    return nullptr;
  }
@@ -78,11 +78,11 @@ void *load_program(const char *paramfile) {
  std::map<std::string, int> name2index;
  for (int i = 0; i < namelen; i++) {
    int offset = (namelist_pos + 2)[i];
-    namev[i] = (char *)(buf + offset);
+    namev[i] = reinterpret_cast<char *>(buf + offset);
    name2index[namev[i]] = i;
  }

-  cinn_buffer_t *cb = (cinn_buffer_t *)(buf + podvalue_pos[1]);
+  cinn_buffer_t *cb = reinterpret_cast<cinn_buffer_t *>(buf + podvalue_pos[1]);
  for (int i = 0; i < namelen; i++) {
    // currently only CPU device is supported, so just use malloc
    if (cb[i].memory) {
@@ -107,9 +107,9 @@ void *load_program(const char *paramfile) {
    int instargc = inst_pos[2 + i * 3 + 1];
    ctx->inst_argc.push_back(instargc);
    cinn_pod_value_t *argv =
-        (cinn_pod_value_t *)(buf + inst_pos[2 + i * 3 + 2]);
+        reinterpret_cast<cinn_pod_value_t *>(buf + inst_pos[2 + i * 3 + 2]);
    for (int i = 0; i < instargc; i++) {
-      int idx = (uintptr_t)((cinn_buffer_t *)argv[i]);
+      int idx = (uintptr_t)((cinn_buffer_t *)(argv[i]));  // NOLINT
      cinn_value_t tmp_v;
      tmp_v.v_handle = &cb[idx];
      argv[i].set_value(tmp_v);
@@ -127,7 +127,7 @@ int set_maxconcurrency(int c) {

 typedef void (*func_t)(cinn_pod_value_t *, int);
 void run_program(void *ctx) {
-  param_context_t *pc = (param_context_t *)ctx;
+  param_context_t *pc = reinterpret_cast<param_context_t *>(ctx);
  for (int i = 0; i < pc->instructions.size(); i++) {
    const char *sym = pc->instructions[i].c_str();
    void *p = dlsym(RTLD_DEFAULT, sym);
@@ -137,7 +137,7 @@ void run_program(void *ctx) {
 }

 cinn_pod_value_t *get_pod_value(void *ctx, const char *tname) {
-  param_context_t *pc = (param_context_t *)ctx;
+  param_context_t *pc = reinterpret_cast<param_context_t *>(ctx);
  if (pc->name2podvalue.find(tname) != pc->name2podvalue.end()) {
    return &pc->name2podvalue[tname];
  }