diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc index 751c4f931d6d12b3f2e789e5d2460222ea571c2d..35dc5374b96647b6269627a9a99f26bb475c0819 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_bind_test.cc @@ -68,8 +68,8 @@ class TestAutoBind : public TestAutoGenRuleBase { } else if (total_num <= kMaxBlocks * kMaxThreadsPerBlock) { ASSERT_EQ(all_loops.size(), 2); EXPECT_EQ(all_loops[0].As()->extent.as_int32(), - static_cast( - std::ceil(double(total_num) / kMaxThreadsPerBlock))); + static_cast(std::ceil(static_cast(total_num) / + kMaxThreadsPerBlock))); EXPECT_TRUE(all_loops[0].As()->is_gpu_block_binded()); EXPECT_EQ(all_loops[1].As()->extent.as_int32(), kMaxThreadsPerBlock); @@ -81,9 +81,10 @@ class TestAutoBind : public TestAutoGenRuleBase { EXPECT_EQ(all_loops[1].As()->extent.as_int32(), kMaxThreadsPerBlock); EXPECT_TRUE(all_loops[1].As()->is_gpu_thread_binded()); - EXPECT_EQ(all_loops[2].As()->extent.as_int32(), - static_cast(std::ceil( - double(total_num) / (kMaxBlocks * kMaxThreadsPerBlock)))); + EXPECT_EQ( + all_loops[2].As()->extent.as_int32(), + static_cast(std::ceil(static_cast(total_num) / + (kMaxBlocks * kMaxThreadsPerBlock)))); EXPECT_FALSE(all_loops[2].As()->is_binded()); } diff --git a/paddle/cinn/backends/codegen_c.cc b/paddle/cinn/backends/codegen_c.cc index 239ba9cf59226c1c5a595e87d53b25ce9e6484e5..8263aab3eb9a45f366b7957743b7d0888a6d257e 100644 --- a/paddle/cinn/backends/codegen_c.cc +++ b/paddle/cinn/backends/codegen_c.cc @@ -162,7 +162,7 @@ void CodeGenC::Visit(const ir::Mod *op) { auto copied = op->b(); optim::Simplify(&copied); if (copied.is_constant()) { - int temp = (int)(copied.get_constant()); + int temp = static_cast(copied.get_constant()); if ((temp & (temp - 1)) == 0) { os() << "("; Print(op->a()); diff --git a/paddle/cinn/backends/ir_schedule_test.cc b/paddle/cinn/backends/ir_schedule_test.cc index 2207a3d9cd07353f3b96fbdb89ce17cae516c7bc..427d4e0767c060e58150b8b1ff0118e464af15e6 100644 --- a/paddle/cinn/backends/ir_schedule_test.cc +++ b/paddle/cinn/backends/ir_schedule_test.cc @@ -2871,11 +2871,11 @@ TEST(IrSchedule, Annotate) { ir::IRSchedule ir_sch(ir::ModuleExpr({funcs[0]->body})); auto fused = ir_sch.Fuse("B", {0, 1}); auto block_b = ir_sch.GetBlock("B"); - ir_sch.Annotate(block_b, "k1", int(64)); + ir_sch.Annotate(block_b, "k1", 64); block_b = ir_sch.GetBlock("B"); - ir_sch.Annotate(block_b, "k2", bool(true)); + ir_sch.Annotate(block_b, "k2", true); block_b = ir_sch.GetBlock("B"); - ir_sch.Annotate(block_b, "k3", float(2.0)); + ir_sch.Annotate(block_b, "k3", 2.0f); block_b = ir_sch.GetBlock("B"); ir_sch.Annotate(block_b, "k4", std::string("v4")); std::string expected_expr = R"ROC({ @@ -2915,11 +2915,11 @@ TEST(IrSchedule, Unannotate) { ir::IRSchedule ir_sch(ir::ModuleExpr({funcs[0]->body})); auto fused = ir_sch.Fuse("B", {0, 1}); auto block_b = ir_sch.GetBlock("B"); - ir_sch.Annotate(block_b, "k1", int(64)); + ir_sch.Annotate(block_b, "k1", 64); block_b = ir_sch.GetBlock("B"); - ir_sch.Annotate(block_b, "k2", bool(true)); + ir_sch.Annotate(block_b, "k2", true); block_b = ir_sch.GetBlock("B"); - ir_sch.Annotate(block_b, "k3", float(2.0)); + ir_sch.Annotate(block_b, "k3", 2.0f); block_b = ir_sch.GetBlock("B"); ir_sch.Annotate(block_b, "k4", std::string("v4")); block_b = ir_sch.GetBlock("B"); diff --git a/paddle/cinn/common/float16.h b/paddle/cinn/common/float16.h index 15bd2cee3fc69f269acc10c4804209f9e8c00dba..3cdaa8b2f4c39c5fcee277b100b94c18ad62d9c9 100644 --- a/paddle/cinn/common/float16.h +++ b/paddle/cinn/common/float16.h @@ -597,9 +597,9 @@ __host__ __device__ inline bool(isfinite)(const float16& a) { __host__ __device__ inline float16(abs)(const float16& a) { #if defined(CINN_CUDA_FP16) && (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530) - return float16(__habs(a.to_half())); + return static_cast(__habs(a.to_half())); #else - return float16(fabsf(float(a))); + return static_cast(fabsf(static_cast(a))); #endif } diff --git a/paddle/cinn/common/float16_bfloat16_cuda_test.cu b/paddle/cinn/common/float16_bfloat16_cuda_test.cu index 932208b1a9d6906ca6d111104a118101da1b59c2..e8d9c7f534cc124b33b3ce9a92148273fef098f4 100644 --- a/paddle/cinn/common/float16_bfloat16_cuda_test.cu +++ b/paddle/cinn/common/float16_bfloat16_cuda_test.cu @@ -100,7 +100,7 @@ __global__ void cast_fp16_to_fp32_cuda_kernel(const float16* input, float* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num) { - out[idx] = float(input[idx]); + out[idx] = static_cast(input[idx]); } } @@ -131,7 +131,7 @@ __global__ void cast_bf16_to_fp32_cuda_kernel(const bfloat16* input, float* out) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num) { - out[idx] = float(input[idx]); + out[idx] = static_cast(input[idx]); } } diff --git a/paddle/cinn/frontend/net_builder_test.cc b/paddle/cinn/frontend/net_builder_test.cc index ba104d642f2a2e686328f1801f2f69340186270a..8ae273b8321756ba90d86d73a69e5d5f74e1ad65 100644 --- a/paddle/cinn/frontend/net_builder_test.cc +++ b/paddle/cinn/frontend/net_builder_test.cc @@ -484,7 +484,7 @@ TEST(net_build, program_execute_cast) { for (int h = 0; h < H; ++h) { std::string line; int index = h + H * b; - float in_data = (float)input_data[index]; + float in_data = static_cast(input_data[index]); float out_data = output_data[index]; line += (std::to_string(out_data) + ", "); EXPECT_EQ(in_data, out_data); @@ -1339,7 +1339,8 @@ TEST(net_build, program_execute_repeat_axis_0) { std::vector output_data = GetTensorData(output_tensor, target); for (int m = 0; m < new_M; ++m) { for (int n = 0; n < new_N; ++n) { - int in_index = n + N * static_cast(std::floor((float)m / repeats)); + int in_index = + n + N * static_cast(std::floor(static_cast(m) / repeats)); int out_index = n + new_N * m; float in_data = input_data[in_index]; float out_data = output_data[out_index]; @@ -1393,7 +1394,8 @@ TEST(net_build, program_execute_repeat_axis_1) { std::vector output_data = GetTensorData(output_tensor, target); for (int m = 0; m < new_M; ++m) { for (int n = 0; n < new_N; ++n) { - int in_index = N * m + static_cast(std::floor((float)n / repeats)); + int in_index = + N * m + static_cast(std::floor(static_cast(n) / repeats)); int out_index = n + new_N * m; float in_data = input_data[in_index]; float out_data = output_data[out_index]; diff --git a/paddle/cinn/hlir/framework/graph_compiler.cc b/paddle/cinn/hlir/framework/graph_compiler.cc index 575283987cd5a9f42cc0aee9156182ed5026df5d..22c54891ee507ed06308f8d94b3ec01dfc6687f6 100644 --- a/paddle/cinn/hlir/framework/graph_compiler.cc +++ b/paddle/cinn/hlir/framework/graph_compiler.cc @@ -155,7 +155,7 @@ void Program::Export(const std::vector& persistent_vars, std::string name = (std::string)varname; auto t = scope_->GetTensor(name); cinn_buffer_t buffer = *t->buffer(); - buffer.memory = (uint8_t*)0; + buffer.memory = reinterpret_cast(0); if (std::find(persistent_vars.begin(), persistent_vars.end(), name) != persistent_vars.end()) { pvars.emplace_back(t->buffer(), @@ -206,7 +206,7 @@ void Program::Export(const std::vector& persistent_vars, tellplaceholder(instplaceholder + findex * 12 + 8, f); for (auto& arg : all_args) { uintptr_t bufindex = varindex[arg]; - cinn_pod_value_t v((cinn_buffer_t*)bufindex); + cinn_pod_value_t v(reinterpret_cast(bufindex)); fwrite(&v, sizeof(cinn_pod_value_t), 1, f); } } diff --git a/paddle/cinn/hlir/framework/op_lowering_util.cc b/paddle/cinn/hlir/framework/op_lowering_util.cc index 88963f5b989f7bbb6ef1bb6adafc4910469b304c..24a90a6d138898af1f8dc90970a8667a409e267e 100644 --- a/paddle/cinn/hlir/framework/op_lowering_util.cc +++ b/paddle/cinn/hlir/framework/op_lowering_util.cc @@ -722,8 +722,9 @@ void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch, need_reduce_last_count *= inshape[i]; } } - int warp_reduce_need_sm_count = ceil((need_reduce_last_count * 32) / - float(target.get_max_threads_per_sm())); + int warp_reduce_need_sm_count = + ceil((need_reduce_last_count * 32) / + static_cast(target.get_max_threads_per_sm())); // Set Num_max_threads to 32 is Warp Reduce if (target.get_multi_processor_count() < warp_reduce_need_sm_count) { max_num_threads = 32; @@ -805,7 +806,8 @@ void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch, } LoopOrderAssignReduce(ir_sch, block_name, first_axes, target, true); // fuse axis before reduce to bind blockidx. - for (int idx = 0; idx < int(inshape.size() - axes.size()) - 1; ++idx) { + for (int idx = 0; idx < static_cast(inshape.size() - axes.size()) - 1; + ++idx) { ir_sch.Fuse(block_name, {0, 1}); } } diff --git a/paddle/cinn/hlir/pass/alterlayout_test.cc b/paddle/cinn/hlir/pass/alterlayout_test.cc index 45ec29d061c74c1bf4acfe9a02cf72e6ff10c7ef..2d91fb2f308c3d85a503d2f08f3ac52e59ca39bb 100755 --- a/paddle/cinn/hlir/pass/alterlayout_test.cc +++ b/paddle/cinn/hlir/pass/alterlayout_test.cc @@ -210,7 +210,7 @@ TEST(conv_bn_conv, conv_bn_conv) { attrs["data_format"] = src_layout; absl::flat_hash_map attrs1; - attrs1["epsilon"] = (float)0.001; + attrs1["epsilon"] = 0.001f; auto c = program.conv2d(A, B, attrs); auto d = program.batchnorm(c, Scale, Bias, Mean, Variance, attrs1); @@ -317,7 +317,7 @@ TEST(conv_softmax_conv, conv_softmax_conv) { attrs["data_format"] = src_layout; absl::flat_hash_map attrs1; - attrs1["axis"] = (int)-1; + attrs1["axis"] = static_cast(-1); auto c = program.conv2d(A, B, attrs); auto d = program.softmax(c, attrs1); @@ -417,7 +417,7 @@ TEST(conv_mul_conv, conv_mul_conv) { attrs["data_format"] = src_layout; absl::flat_hash_map attrs1; - attrs1["axis"] = (int)-1; + attrs1["axis"] = static_cast(-1); auto c = program.conv2d(A, B, attrs); auto d = program.mul(c, C, 1, 1); diff --git a/paddle/cinn/hlir/pe/nn.cc b/paddle/cinn/hlir/pe/nn.cc index 152eb1bc3847b097a0da7272f8a0376608e139f2..6a3f46f6bf0d295ef9342a6f9d33bf9c3faa570a 100644 --- a/paddle/cinn/hlir/pe/nn.cc +++ b/paddle/cinn/hlir/pe/nn.cc @@ -806,15 +806,15 @@ std::vector Depthwise_Conv2d_NCHW(const Tensor &input, CHECK(weight->shape[1].is_constant()); CHECK(weight->shape[2].is_constant()); CHECK(weight->shape[3].is_constant()); - int B = (int)input->shape[0].get_constant(); - int O = (int)weight->shape[1].get_constant() * - (int)input->shape[1].get_constant(); - int H = ((int)input->shape[2].get_constant() - - (int)weight->shape[2].get_constant() + 2 * pad_h) / + int B = static_cast(input->shape[0].get_constant()); + int O = static_cast(weight->shape[1].get_constant()) * + static_cast(input->shape[1].get_constant()); + int H = (static_cast(input->shape[2].get_constant()) - + static_cast(weight->shape[2].get_constant()) + 2 * pad_h) / stride_h + 1; - int W = ((int)input->shape[3].get_constant() - - (int)weight->shape[3].get_constant() + 2 * pad_w) / + int W = (static_cast(input->shape[3].get_constant()) - + static_cast(weight->shape[3].get_constant()) + 2 * pad_w) / stride_w + 1; output_shape = { diff --git a/paddle/cinn/hlir/pe/reduction.cc b/paddle/cinn/hlir/pe/reduction.cc index 2feac6104b4341fd4a0913c4de8b4fc61f65ea14..8edce3c153bbb3538b6bbb1a2a7265bf75c69509 100644 --- a/paddle/cinn/hlir/pe/reduction.cc +++ b/paddle/cinn/hlir/pe/reduction.cc @@ -380,7 +380,8 @@ std::vector BlockReduceInternal(const ir::Tensor& A, // compute the reduce dimension stride. std::vector last_reduce_stride(A->shape.size() - axes.front(), Expr(1)); - for (int idx = A->shape.size(), index = int(last_reduce_stride.size()) - 2; + for (int idx = A->shape.size(), + index = static_cast(last_reduce_stride.size()) - 2; index >= 0; --index) { last_reduce_stride[index] = last_reduce_stride[index + 1] * A->shape[--idx]; @@ -407,8 +408,8 @@ std::vector BlockReduceInternal(const ir::Tensor& A, // compute output shape. std::vector out_shape(A->shape.begin(), A->shape.begin() + axes.front()); - int tailf = keep_dim ? (int(A->shape.size()) - axes.front()) - : (int(A->shape.size()) - axes.back() - 1); + int tailf = keep_dim ? (static_cast(A->shape.size()) - axes.front()) + : (static_cast(A->shape.size()) - axes.back() - 1); for (int idx = 0; idx < tailf; ++idx) { out_shape.push_back(Expr(1)); } @@ -538,8 +539,8 @@ std::vector BlockReduce(const ir::Tensor& A, // compute output tensor shape. std::vector out_shape(A->shape.begin(), A->shape.begin() + axes.front()); - int tailf = keep_dim ? (int(A->shape.size()) - axes.front()) - : (int(A->shape.size()) - axes.back() - 1); + int tailf = keep_dim ? (static_cast(A->shape.size()) - axes.front()) + : (static_cast(A->shape.size()) - axes.back() - 1); for (int idx = 0; idx < tailf; ++idx) { out_shape.push_back(Expr(1)); } @@ -832,7 +833,8 @@ std::vector TwoStepBlockReduceInternal( } int warp_reduce_need_sm_count = ceil((need_reduce_last_count * 32) / - float(common::DefaultNVGPUTarget().get_max_threads_per_sm())); + static_cast( + common::DefaultNVGPUTarget().get_max_threads_per_sm())); // Set Num_max_threads to 32 is Warp Reduce if (common::DefaultNVGPUTarget().get_multi_processor_count() < warp_reduce_need_sm_count) { diff --git a/paddle/cinn/hlir/pe/schedule.cc b/paddle/cinn/hlir/pe/schedule.cc index 39dfd2360cca19cfbb64d91f1a14a4a08b341ab1..2547a43862a9f28f3cd13afb87336cb48358c863 100644 --- a/paddle/cinn/hlir/pe/schedule.cc +++ b/paddle/cinn/hlir/pe/schedule.cc @@ -2662,7 +2662,7 @@ int gcd(int a, int b) { int MaxFactorLessThan(int a, int b) { CHECK_GT(a, b); int res = 1; - for (int i = 2; i <= (int)sqrt((double)a); i++) { + for (int i = 2; i <= static_cast(sqrt(static_cast(a))); i++) { if (a % i == 0) { if (i <= b) res = std::max(res, i); if (a / i <= b) res = std::max(res, a / i); diff --git a/paddle/cinn/ir/ir_schedule.cc b/paddle/cinn/ir/ir_schedule.cc index 04041c9d8c39c3bb85bf8d6185db9620df859cef..3489b84a45e505f706c16af6aa15be579c1be59f 100644 --- a/paddle/cinn/ir/ir_schedule.cc +++ b/paddle/cinn/ir/ir_schedule.cc @@ -2189,7 +2189,7 @@ void ScheduleImpl::CopyTransformAndLoopInfo(const Expr& block, Expr new_loop; VLOG(3) << "changed_loop_num is : " << changed_loop_num; VLOG(3) << "old_iter_values.size() is : " << old_iter_values.size(); - if (changed_loop_num >= (int)old_iter_values.size()) { + if (changed_loop_num >= static_cast(old_iter_values.size())) { new_loop = optim::IRCopy(block); new_loop.As()->iter_values = new_iter_values; } else { diff --git a/paddle/cinn/ir/ir_schedule_util.cc b/paddle/cinn/ir/ir_schedule_util.cc index a518ad7e7860f96f336abd13175b492cbb006a31..34e6cf6e7cd2385981ecbe76cc977acad1b59c13 100644 --- a/paddle/cinn/ir/ir_schedule_util.cc +++ b/paddle/cinn/ir/ir_schedule_util.cc @@ -74,7 +74,7 @@ int GetLoopExtent(const Expr& loop) { CHECK(loop.As()); CHECK(common::is_zero(loop.As()->min)); CHECK(loop.As()->extent.is_constant()); - return (int)loop.As()->extent.get_constant(); + return static_cast(loop.As()->extent.get_constant()); } void SetCudaAxisInfo(Expr* lowered_func) { @@ -249,7 +249,8 @@ std::vector ValidateFactors(const std::vector& factors, << "In Split, when there is -1 in factors, the other factors' product " "should be <= " "original loop's extent! Please check."; - int minus_one_candidate = (int)ceil((double)total_extent / (double)product); + int minus_one_candidate = static_cast( + ceil(static_cast(total_extent) / static_cast(product))); for (int i = 0; i < validated_factors.size(); ++i) { if (validated_factors[i] == -1) { validated_factors[i] = minus_one_candidate; @@ -490,7 +491,7 @@ Expr MakeCacheBlock(const std::vector& buffer_ranges, ir::ScheduleBlock::Make( block_vars, {}, {}, new_tensor->name, Block::Make({body}))); Expr new_body = block; - for (int i = (int)loop_vars.size() - 1; i >= 0; i--) { + for (int i = static_cast(loop_vars.size()) - 1; i >= 0; i--) { new_body = For::Make(loop_vars[i], Expr(0), common::AutoSimplify(buffer_ranges[i].extent), @@ -531,7 +532,9 @@ void FindInsertionPoint(Expr& root, CacheBlockInfo* info, bool is_write) { ->body.As()); info->loc_block = root.As()->schedule_block.As()->body; - for (int i = 0; i < (int)info->loc_block.As()->stmts.size(); ++i) { + for (int i = 0; + i < static_cast(info->loc_block.As()->stmts.size()); + ++i) { if (Contains(info->loc_block.As()->stmts[i], producer)) { info->loc_pos = i + 1; break; @@ -1075,9 +1078,8 @@ std::vector CalculateRequiredRegions( (*find_for_loops.begin()).As()->min, (*find_for_loops.begin()).As()->extent); } else { - int cons = (int)block.As() - ->iter_values[i] - .is_constant(); + int cons = static_cast( + block.As()->iter_values[i].is_constant()); required_buffer_range.emplace_back(Expr(cons), Expr(1)); } } diff --git a/paddle/cinn/ir/schedule_desc_test.cc b/paddle/cinn/ir/schedule_desc_test.cc index efbfe0603dd54910a7ae5a10ab17ad309059f6d6..a58a322afdf0a3ffaa9f8527f9fbfbe6e46936b3 100644 --- a/paddle/cinn/ir/schedule_desc_test.cc +++ b/paddle/cinn/ir/schedule_desc_test.cc @@ -863,32 +863,29 @@ TEST_F(TestScheduleDesc, StepKind_Annotate) { auto block_b = ir_sch.GetBlock("B"); trace.Append(ScheduleDesc::Step( "GetBlock", {}, {{"block_name", std::string("B")}}, {block_b})); - ir_sch.Annotate(block_b, "k1", int(64)); - trace.Append( - ScheduleDesc::Step("AnnotateIntAttr", - {{"block", std::vector({block_b})}}, - {{"key", std::string("k1")}, {"value", int(64)}}, - {})); + ir_sch.Annotate(block_b, "k1", 64); + trace.Append(ScheduleDesc::Step("AnnotateIntAttr", + {{"block", std::vector({block_b})}}, + {{"key", std::string("k1")}, {"value", 64}}, + {})); block_b = ir_sch.GetBlock("B"); trace.Append(ScheduleDesc::Step( "GetBlock", {}, {{"block_name", std::string("B")}}, {block_b})); - ir_sch.Annotate(block_b, "k2", bool(true)); - trace.Append( - ScheduleDesc::Step("AnnotateBoolAttr", - {{"block", std::vector({block_b})}}, - {{"key", std::string("k2")}, {"value", bool(true)}}, - {})); + ir_sch.Annotate(block_b, "k2", true); + trace.Append(ScheduleDesc::Step("AnnotateBoolAttr", + {{"block", std::vector({block_b})}}, + {{"key", std::string("k2")}, {"value", true}}, + {})); block_b = ir_sch.GetBlock("B"); trace.Append(ScheduleDesc::Step( "GetBlock", {}, {{"block_name", std::string("B")}}, {block_b})); - ir_sch.Annotate(block_b, "k3", float(2.0)); - trace.Append( - ScheduleDesc::Step("AnnotateFloatAttr", - {{"block", std::vector({block_b})}}, - {{"key", std::string("k3")}, {"value", float(2.0)}}, - {})); + ir_sch.Annotate(block_b, "k3", 2.0f); + trace.Append(ScheduleDesc::Step("AnnotateFloatAttr", + {{"block", std::vector({block_b})}}, + {{"key", std::string("k3")}, {"value", 2.0f}}, + {})); block_b = ir_sch.GetBlock("B"); trace.Append(ScheduleDesc::Step( @@ -911,22 +908,20 @@ TEST_F(TestScheduleDesc, StepKind_Unannotate) { auto block_b = ir_sch.GetBlock("B"); trace.Append(ScheduleDesc::Step( "GetBlock", {}, {{"block_name", std::string("B")}}, {block_b})); - ir_sch.Annotate(block_b, "k1", int(64)); - trace.Append( - ScheduleDesc::Step("AnnotateIntAttr", - {{"block", std::vector({block_b})}}, - {{"key", std::string("k1")}, {"value", int(64)}}, - {})); + ir_sch.Annotate(block_b, "k1", 64); + trace.Append(ScheduleDesc::Step("AnnotateIntAttr", + {{"block", std::vector({block_b})}}, + {{"key", std::string("k1")}, {"value", 64}}, + {})); block_b = ir_sch.GetBlock("B"); trace.Append(ScheduleDesc::Step( "GetBlock", {}, {{"block_name", std::string("B")}}, {block_b})); - ir_sch.Annotate(block_b, "k2", bool(true)); - trace.Append( - ScheduleDesc::Step("AnnotateBoolAttr", - {{"block", std::vector({block_b})}}, - {{"key", std::string("k2")}, {"value", bool(true)}}, - {})); + ir_sch.Annotate(block_b, "k2", true); + trace.Append(ScheduleDesc::Step("AnnotateBoolAttr", + {{"block", std::vector({block_b})}}, + {{"key", std::string("k2")}, {"value", true}}, + {})); block_b = ir_sch.GetBlock("B"); trace.Append(ScheduleDesc::Step( diff --git a/paddle/cinn/lang/lower_impl.cc b/paddle/cinn/lang/lower_impl.cc index 247888246c2cfa32af0585a087451517854e7786..fcc6c12ce11ddd7c6d945c0645838c82ff583ee9 100644 --- a/paddle/cinn/lang/lower_impl.cc +++ b/paddle/cinn/lang/lower_impl.cc @@ -741,14 +741,15 @@ std::vector LowerImpl::GenerateFunctionBody( << "'s shape is : " << utils::Join(tensor->shape, ","); for (auto& expr : tensor->shape) { CHECK(expr.is_constant()); - int_shape.push_back((int)expr.get_constant()); + int_shape.push_back(static_cast(expr.get_constant())); } for (auto& var : tensor->reduce_axis) { CHECK(var->lower_bound.defined()); CHECK(var->upper_bound.defined()); CHECK(common::is_zero(var->lower_bound)); CHECK(var->upper_bound.is_constant()); - int_shape.push_back((int)var->upper_bound.get_constant()); + int_shape.push_back( + static_cast(var->upper_bound.get_constant())); } // create block itervars, i0,i1... std::vector block_vars; diff --git a/paddle/cinn/optim/compute_inline_expand.cc b/paddle/cinn/optim/compute_inline_expand.cc index aef64af01e011d7333ff3b78ca5487561038145a..8c38090db94ddaa0a6284669ca765589c10579fb 100644 --- a/paddle/cinn/optim/compute_inline_expand.cc +++ b/paddle/cinn/optim/compute_inline_expand.cc @@ -79,7 +79,7 @@ struct TensorInlineExpandMutator : public ir::IRMutator<> { void Visit(const ir::For *op, Expr *expr) override { CHECK(op->extent.is_constant()); - int cons_extent = (int)op->extent.get_constant(); + int cons_extent = static_cast(op->extent.get_constant()); var_to_extent[op->loop_var->name] = op->extent; ir::IRMutator<>::Visit(op, expr); } diff --git a/paddle/cinn/optim/unroll_loops_test.cc b/paddle/cinn/optim/unroll_loops_test.cc index 0923d9eb8569b33f0e9cb50462ccb3f50615e5c1..56014524514a67fdfc2681048d564bee5a222f94 100644 --- a/paddle/cinn/optim/unroll_loops_test.cc +++ b/paddle/cinn/optim/unroll_loops_test.cc @@ -69,7 +69,7 @@ TEST(UnrollLoops, auto_unroll) { Expr M(100); Expr N(4); Expr O(5); - Expr const_value(float(2.11)); + Expr const_value(2.11f); Placeholder A("A", {M, N, O}); diff --git a/paddle/cinn/optim/vectorize_loops_test.cc b/paddle/cinn/optim/vectorize_loops_test.cc index 55f8b3097d91e292c5edea1ea4177e6cbd5a3213..8c589e23027d57e922c7868eda87c77f705f434e 100644 --- a/paddle/cinn/optim/vectorize_loops_test.cc +++ b/paddle/cinn/optim/vectorize_loops_test.cc @@ -274,7 +274,7 @@ TEST(Vectorize, cuda_vectorize_with_constant) { Expr M(100); Expr N(500); Placeholder A("A", {M, N}); - Expr const_value(float(2.11)); + Expr const_value(2.11f); Tensor C = Compute( {M, N}, [&](Var i, Var j) { return const_value * A(i, j); }, "C"); diff --git a/paddle/cinn/poly/stage.cc b/paddle/cinn/poly/stage.cc index 7b6f18c7acc60a555afdc7314ace1485b586b866..5af88bf251ed8d59dc5ac5c2ea247c396cd03a18 100644 --- a/paddle/cinn/poly/stage.cc +++ b/paddle/cinn/poly/stage.cc @@ -923,8 +923,8 @@ Iterator Stage::Fuse(const std::vector &levels) { "%s = floor(%s / %d)", levels.front().id.c_str(), new_iter_name.c_str(), - (int)std::accumulate( - iterator_max_val.begin() + 1, iterator_max_val.end(), 1, my_prod))); + static_cast(std::accumulate( + iterator_max_val.begin() + 1, iterator_max_val.end(), 1, my_prod)))); conds.emplace_back(utils::StringFormat("%s = %s mod %d", levels.back().id.c_str(), new_iter_name.c_str(), @@ -935,10 +935,10 @@ Iterator Stage::Fuse(const std::vector &levels) { "%s = floor(%s / %d) mod %d", levels[i].id.c_str(), new_iter_name.c_str(), - (int)std::accumulate(iterator_max_val.begin() + i + 1, - iterator_max_val.end(), - 1, - my_prod), + static_cast(std::accumulate(iterator_max_val.begin() + i + 1, + iterator_max_val.end(), + 1, + my_prod)), iterator_max_val[i])); } diff --git a/paddle/cinn/runtime/cinn_runtime.cc b/paddle/cinn/runtime/cinn_runtime.cc index 51c9ac0866cfacfeacf5153d6c9545394936365d..b8bc96d508877ba21aab385ccd0df7cd464bd8d8 100644 --- a/paddle/cinn/runtime/cinn_runtime.cc +++ b/paddle/cinn/runtime/cinn_runtime.cc @@ -122,7 +122,7 @@ cinn_buffer_t* cinn_buffer_new_default(int target, fprintf(stderr, "Not supported device type"); abort(); } - cinn_buffer_malloc((void*)(0), buf); + cinn_buffer_malloc(reinterpret_cast(0), buf); return buf; } diff --git a/paddle/cinn/runtime/cpu/host_intrinsics.cc b/paddle/cinn/runtime/cpu/host_intrinsics.cc index 5f8d68e8d9ceeef0ad9b8a47f6892233570d8616..58c95f7e52fb549c8ace2a2bf6f5e991f5ce069d 100644 --- a/paddle/cinn/runtime/cpu/host_intrinsics.cc +++ b/paddle/cinn/runtime/cpu/host_intrinsics.cc @@ -31,8 +31,8 @@ extern "C" { void __cinn_host_tanh_v(const cinn_buffer_t* x, cinn_buffer_t* out) { CINN_CHECK_EQ(x->num_elements(), out->num_elements()); int xn = x->num_elements(); - auto* x_data = (float*)(x->memory); - auto* out_data = (float*)(out->memory); + auto* x_data = reinterpret_cast(x->memory); + auto* out_data = reinterpret_cast(out->memory); for (int i = 0; i < x->num_elements(); i++) { out_data[i] = tanhf(x_data[i]); } diff --git a/paddle/cinn/runtime/cuda/float16.h b/paddle/cinn/runtime/cuda/float16.h index 15bd2cee3fc69f269acc10c4804209f9e8c00dba..be847bfc2e7cd183ea663bd05486aea3df0c8ab7 100644 --- a/paddle/cinn/runtime/cuda/float16.h +++ b/paddle/cinn/runtime/cuda/float16.h @@ -599,7 +599,7 @@ __host__ __device__ inline float16(abs)(const float16& a) { #if defined(CINN_CUDA_FP16) && (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530) return float16(__habs(a.to_half())); #else - return float16(fabsf(float(a))); + return float16(fabsf(static_cast(a))); #endif } diff --git a/paddle/cinn/runtime/tiny_runtime.cc b/paddle/cinn/runtime/tiny_runtime.cc index 65bb07759c0342c415aa745914804948c534afbb..8fb0238cbc31ded0330ff9d679e286af253ac4af 100644 --- a/paddle/cinn/runtime/tiny_runtime.cc +++ b/paddle/cinn/runtime/tiny_runtime.cc @@ -62,13 +62,13 @@ void *load_program(const char *paramfile) { return nullptr; } // TODO(hp03): check param file version - ctx->major_v = *(int *)(buf + 4); - ctx->minor_v = *(int *)(buf + 8); + ctx->major_v = *reinterpret_cast(buf + 4); + ctx->minor_v = *reinterpret_cast(buf + 8); - int *namelist_pos = (int *)(buf + 16); - int *podvalue_pos = (int *)(buf + *namelist_pos); - int *persistent_pos = (int *)(buf + *podvalue_pos); - int *inst_pos = (int *)(buf + *persistent_pos); + int *namelist_pos = reinterpret_cast(buf + 16); + int *podvalue_pos = reinterpret_cast(buf + *namelist_pos); + int *persistent_pos = reinterpret_cast(buf + *podvalue_pos); + int *inst_pos = reinterpret_cast(buf + *persistent_pos); if (fsize < *inst_pos) { return nullptr; } @@ -78,11 +78,11 @@ void *load_program(const char *paramfile) { std::map name2index; for (int i = 0; i < namelen; i++) { int offset = (namelist_pos + 2)[i]; - namev[i] = (char *)(buf + offset); + namev[i] = reinterpret_cast(buf + offset); name2index[namev[i]] = i; } - cinn_buffer_t *cb = (cinn_buffer_t *)(buf + podvalue_pos[1]); + cinn_buffer_t *cb = reinterpret_cast(buf + podvalue_pos[1]); for (int i = 0; i < namelen; i++) { // currently only CPU device is supported, so just use malloc if (cb[i].memory) { @@ -107,9 +107,9 @@ void *load_program(const char *paramfile) { int instargc = inst_pos[2 + i * 3 + 1]; ctx->inst_argc.push_back(instargc); cinn_pod_value_t *argv = - (cinn_pod_value_t *)(buf + inst_pos[2 + i * 3 + 2]); + reinterpret_cast(buf + inst_pos[2 + i * 3 + 2]); for (int i = 0; i < instargc; i++) { - int idx = (uintptr_t)((cinn_buffer_t *)argv[i]); + int idx = (uintptr_t)((cinn_buffer_t *)(argv[i])); // NOLINT cinn_value_t tmp_v; tmp_v.v_handle = &cb[idx]; argv[i].set_value(tmp_v); @@ -127,7 +127,7 @@ int set_maxconcurrency(int c) { typedef void (*func_t)(cinn_pod_value_t *, int); void run_program(void *ctx) { - param_context_t *pc = (param_context_t *)ctx; + param_context_t *pc = reinterpret_cast(ctx); for (int i = 0; i < pc->instructions.size(); i++) { const char *sym = pc->instructions[i].c_str(); void *p = dlsym(RTLD_DEFAULT, sym); @@ -137,7 +137,7 @@ void run_program(void *ctx) { } cinn_pod_value_t *get_pod_value(void *ctx, const char *tname) { - param_context_t *pc = (param_context_t *)ctx; + param_context_t *pc = reinterpret_cast(ctx); if (pc->name2podvalue.find(tname) != pc->name2podvalue.end()) { return &pc->name2podvalue[tname]; }