diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir.cc b/paddle/cinn/auto_schedule/analysis/analyze_ir.cc index cf4b17747ad3b3981b446be1cd36bac8b47ff6e2..62cef5e2694c1c39515bd3ecc2d74a86d905d8a0 100644 --- a/paddle/cinn/auto_schedule/analysis/analyze_ir.cc +++ b/paddle/cinn/auto_schedule/analysis/analyze_ir.cc @@ -145,7 +145,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) { ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, const ir::LoweredFunc& old_func, - ir::Expr& body) { + ir::Expr& body) { // NOLINT ir::ModuleExpr mod_expr(std::vector({body})); ir::IRSchedule ir_sch(mod_expr); diff --git a/paddle/cinn/auto_schedule/analysis/analyze_ir.h b/paddle/cinn/auto_schedule/analysis/analyze_ir.h index 4e48be04ee5fc11c431cf9adbea45cc5d712a400..f59aa06b2dcaf6db2dc4c236c3dc13a0c16a6218 100644 --- a/paddle/cinn/auto_schedule/analysis/analyze_ir.h +++ b/paddle/cinn/auto_schedule/analysis/analyze_ir.h @@ -46,7 +46,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize); */ ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, const ir::LoweredFunc& old_func, - ir::Expr& body); + ir::Expr& body); // NOLINT } // namespace auto_schedule } // namespace cinn diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.h index 02090467049a08b3cf01b497bd53b358d0c45270..8a556dfdf488fbbec463841c654fe7aa8872f99c 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/auto_inline.h @@ -64,7 +64,7 @@ class AutoInline : public AutoGenRule { const std::string& block_name) override; private: - void Apply(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); + void Apply(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); // NOLINT private: std::vector all_block_realizes_; diff --git a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h index 378a67c1620d08d36dcc4c6b7caef3dc0c724d2a..cd26bc69b8531938589c4e699ac4ef78ffe131cd 100644 --- a/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h +++ b/paddle/cinn/auto_schedule/search_space/auto_gen_rule/multi_level_tiling.h @@ -120,9 +120,12 @@ class MultiLevelTiling : public AutoGenRule { } private: - void ApplyTiling(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); - void ApplyCacheRead(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); - void ApplyCacheWrite(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); + void ApplyTiling(ir::IRSchedule* ir_schedule, + ir::Expr& block_expr); // NOLINT + void ApplyCacheRead(ir::IRSchedule* ir_schedule, + ir::Expr& block_expr); // NOLINT + void ApplyCacheWrite(ir::IRSchedule* ir_schedule, + ir::Expr& block_expr); // NOLINT private: std::vector all_block_realizes_; diff --git a/paddle/cinn/auto_schedule/task/task_optimizer.cc b/paddle/cinn/auto_schedule/task/task_optimizer.cc index f988a03eb301b6a35974ae7ec046ff20dee4e677..cc658934dd4da4ddd36ad8ea1e9155e9f4daf592 100644 --- a/paddle/cinn/auto_schedule/task/task_optimizer.cc +++ b/paddle/cinn/auto_schedule/task/task_optimizer.cc @@ -51,7 +51,7 @@ using cinn::hlir::op::ExternalApiRegistry; // *** update a scheduled function with several post-processors ir::LoweredFunc FuncWithUpdatedBody(const common::Target& target, const ir::LoweredFunc& old_func, - ir::Expr& body); + ir::Expr& body); // NOLINT // check whether a scheduled lowered function is valid bool PruneInvalid(const ir::LoweredFunc& lowered_func, const common::Target& target); diff --git a/paddle/cinn/backends/llvm/codegen_llvm.cc b/paddle/cinn/backends/llvm/codegen_llvm.cc index 5e49c36525b307c7ab4d78041a5a797ab308b32f..57dc97ecc94f9108a4dc62e04986d90a5ed62047 100644 --- a/paddle/cinn/backends/llvm/codegen_llvm.cc +++ b/paddle/cinn/backends/llvm/codegen_llvm.cc @@ -1038,13 +1038,13 @@ llvm::Value *CodeGenLLVM::Visit(const ir::_Tensor_ *op) { template ::value, int> = 0> -void appendBody(std::vector &new_body, T &&v) { +void appendBody(std::vector &new_body, T &&v) { // NOLINT new_body.push_back(v); } template ::value, int> = 1> -void appendBody(std::vector &new_body, T &&v) { +void appendBody(std::vector &new_body, T &&v) { // NOLINT new_body.insert(new_body.end(), v.begin(), v.end()); } diff --git a/paddle/cinn/backends/llvm/codegen_llvm.h b/paddle/cinn/backends/llvm/codegen_llvm.h index facf13d05147b90795b5e1920fee951e0a4cbd02..bf5be73adcb11fc36bdfd006d546d4d95d851c36 100644 --- a/paddle/cinn/backends/llvm/codegen_llvm.h +++ b/paddle/cinn/backends/llvm/codegen_llvm.h @@ -98,7 +98,7 @@ class SymbolTable { }; struct SymbolTableGuard { - explicit SymbolTableGuard(SymbolTable &symbol_table) + explicit SymbolTableGuard(SymbolTable &symbol_table) // NOLINT : symbol_table_(symbol_table) { symbol_table.PushScope(); } diff --git a/paddle/cinn/common/arithmatic.cc b/paddle/cinn/common/arithmatic.cc index 44ed4846e782eabcce2df48b40d6c3e41f07ad03..6dff837ca10173178cccab5a219b1cfbec9c04b8 100644 --- a/paddle/cinn/common/arithmatic.cc +++ b/paddle/cinn/common/arithmatic.cc @@ -180,7 +180,8 @@ class GiNaCToExprVisitor : public GiNaC::symbol::visitor, ir::Expr cur; public: - explicit GiNaCToExprVisitor(std::map& repr_to_expr) + explicit GiNaCToExprVisitor( + std::map& repr_to_expr) // NOLINT : repr_to_expr(repr_to_expr) {} Expr operator()(GiNaC::ex ex) { diff --git a/paddle/cinn/common/float16.h b/paddle/cinn/common/float16.h index 3cdaa8b2f4c39c5fcee277b100b94c18ad62d9c9..d64731387d596aa668aa39294a28f6c1e21cea04 100644 --- a/paddle/cinn/common/float16.h +++ b/paddle/cinn/common/float16.h @@ -500,25 +500,25 @@ __host__ __device__ inline float16 operator-(const float16& a) { #endif } -__host__ __device__ inline float16& operator+=(float16& a, +__host__ __device__ inline float16& operator+=(float16& a, // NOLINT const float16& b) { // NOLINT a = a + b; return a; } -__host__ __device__ inline float16& operator-=(float16& a, +__host__ __device__ inline float16& operator-=(float16& a, // NOLINT const float16& b) { // NOLINT a = a - b; return a; } -__host__ __device__ inline float16& operator*=(float16& a, +__host__ __device__ inline float16& operator*=(float16& a, // NOLINT const float16& b) { // NOLINT a = a * b; return a; } -__host__ __device__ inline float16& operator/=(float16& a, +__host__ __device__ inline float16& operator/=(float16& a, // NOLINT const float16& b) { // NOLINT a = a / b; return a; diff --git a/paddle/cinn/frontend/computation.cc b/paddle/cinn/frontend/computation.cc index 868dc50807e9e4e83fbdc4112d432664e9b83715..ab6ea29ee92434615ad4fa49e937ab8b7be22b2c 100644 --- a/paddle/cinn/frontend/computation.cc +++ b/paddle/cinn/frontend/computation.cc @@ -42,7 +42,7 @@ struct ComputationContext { std::shared_ptr CompileProgram( const Target &target, - Program &program, + Program &program, // NOLINT const std::vector &outputs, std::shared_ptr scope, const CinnComputation::CompileOptions &options, diff --git a/paddle/cinn/frontend/computation.h b/paddle/cinn/frontend/computation.h index 8b16f9483320f768bcfeaa65a3273198650444f0..7ad4e381b24f4a038328eda87c308dd021831987 100644 --- a/paddle/cinn/frontend/computation.h +++ b/paddle/cinn/frontend/computation.h @@ -59,7 +59,7 @@ class CinnComputation { */ static std::shared_ptr BuildAndCompile( const Target &target, - NetBuilder &builder, + NetBuilder &builder, // NOLINT const CompileOptions &options = DefaultCompileOptions(), const std::vector &outputs = {}, void *stream = nullptr); @@ -77,7 +77,7 @@ class CinnComputation { */ static std::shared_ptr Compile( const Target &target, - Program &program, + Program &program, // NOLINT const CompileOptions &options = DefaultCompileOptions(), const std::vector &outputs = {}, void *stream = nullptr); @@ -130,7 +130,9 @@ class CinnComputation { * @param data address of the memory buffer to store tensor's data * @param size size of the memory buffer */ - void SetTensorData(hlir::framework::Tensor &t, void *data, size_t size); + void SetTensorData(hlir::framework::Tensor &t, // NOLINT + void *data, + size_t size); /** * set the data of a tensor (specified by it's name) from user specified @@ -148,7 +150,9 @@ class CinnComputation { * @param data address of the memory buffer to store tensor's data * @param size size of the memory buffer */ - void GetTensorData(hlir::framework::Tensor &t, void *data, size_t size); + void GetTensorData(hlir::framework::Tensor &t, // NOLINT + void *data, + size_t size); /** * copy the data of a tensor (specified by it's name) to user specified * buffer. if tensor is in NVGPU device memory, cudaMemcpy is used. diff --git a/paddle/cinn/frontend/pass/expand_zero_dim_pass_test.cc b/paddle/cinn/frontend/pass/expand_zero_dim_pass_test.cc index 51f80a25035d0bf94f5359b2906eadefcc207946..58eb8a01e39c8f70dd0cf8ac344163cd9f6ae94f 100644 --- a/paddle/cinn/frontend/pass/expand_zero_dim_pass_test.cc +++ b/paddle/cinn/frontend/pass/expand_zero_dim_pass_test.cc @@ -33,7 +33,7 @@ namespace cinn { namespace frontend { -int GetSize(std::vector& shape) { +int GetSize(const std::vector& shape) { return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); } diff --git a/paddle/cinn/frontend/syntax.cc b/paddle/cinn/frontend/syntax.cc index b2d4e9a54e9689283787f651a33dca160af588c7..9b534d8d146c015969d9929616c8af9c4b70faa8 100644 --- a/paddle/cinn/frontend/syntax.cc +++ b/paddle/cinn/frontend/syntax.cc @@ -296,12 +296,12 @@ std::tuple, absl::flat_hash_map, absl::flat_hash_map, absl::flat_hash_set> -LoadPaddleProgram( - const std::string& model_dir, - Scope* scope, - std::unordered_map>& input_shape_map, - bool is_combined, - const common::Target& target) { +LoadPaddleProgram(const std::string& model_dir, + Scope* scope, + std::unordered_map>& + input_shape_map, // NOLINT + bool is_combined, + const common::Target& target) { VLOG(1) << "Loading Paddle model from " << model_dir; PaddleModelToProgram paddle_to_program(scope, input_shape_map, target); return std::make_tuple(paddle_to_program(model_dir, is_combined), diff --git a/paddle/cinn/frontend/syntax.h b/paddle/cinn/frontend/syntax.h index 2d5b0223d58343c92ce7c0d1396abae6fdff852e..a405e22ddb565c2db5a967640e7ff592b48c8edf 100644 --- a/paddle/cinn/frontend/syntax.h +++ b/paddle/cinn/frontend/syntax.h @@ -528,12 +528,12 @@ std::tuple, absl::flat_hash_map, absl::flat_hash_map, absl::flat_hash_set> -LoadPaddleProgram( - const std::string& model_dir, - hlir::framework::Scope* scope, - std::unordered_map>& input_shape_map, - bool is_combined, - const common::Target& target = common::DefaultHostTarget()); +LoadPaddleProgram(const std::string& model_dir, + hlir::framework::Scope* scope, + std::unordered_map>& + input_shape_map, // NOLINT + bool is_combined, + const common::Target& target = common::DefaultHostTarget()); std::ostream& operator<<(std::ostream& os, const Variable& x); std::ostream& operator<<(std::ostream& os, const Instruction& instr); diff --git a/paddle/cinn/hlir/framework/graph_compiler.cc b/paddle/cinn/hlir/framework/graph_compiler.cc index 22c54891ee507ed06308f8d94b3ec01dfc6687f6..2d959353c52b8c32b90683f0bf46167ce9ccda42 100644 --- a/paddle/cinn/hlir/framework/graph_compiler.cc +++ b/paddle/cinn/hlir/framework/graph_compiler.cc @@ -1643,7 +1643,7 @@ std::shared_ptr BuildScope(Target target, std::vector GetFuncFromImpl( const std::shared_ptr& impl, const common::CINNValuePack& cinn_inputs, - std::vector& all_arg_tensors, + std::vector& all_arg_tensors, // NOLINT const std::vector& input_output_nodes, const std::string& node_id, const Target& target) { diff --git a/paddle/cinn/hlir/framework/graph_compiler.h b/paddle/cinn/hlir/framework/graph_compiler.h index ae482e1165ff9919aea2c7b20c6fe45e05f4854c..6e72b6b97bbdb1824ecd2b7a592af3f4c5c7070f 100644 --- a/paddle/cinn/hlir/framework/graph_compiler.h +++ b/paddle/cinn/hlir/framework/graph_compiler.h @@ -222,7 +222,7 @@ std::shared_ptr BuildScope(Target target, std::vector GetFuncFromImpl( const std::shared_ptr& impl, const common::CINNValuePack& cinn_inputs, - std::vector& tensor_inputs, + std::vector& tensor_inputs, // NOLINT const std::vector& input_output_nodes, const std::string& node_id, const Target& target); diff --git a/paddle/cinn/hlir/framework/op_lowering.cc b/paddle/cinn/hlir/framework/op_lowering.cc index 12ed8311afe19a626b629d494aba4c265e64dcb3..bf6099cc9a6bf4caacd2c6510a8376bb268b6328 100644 --- a/paddle/cinn/hlir/framework/op_lowering.cc +++ b/paddle/cinn/hlir/framework/op_lowering.cc @@ -45,7 +45,7 @@ OpLowerer::OpLowerer( const Target& target) : type_dict_(type_dict), shape_dict_(shape_dict), target_(target) {} -std::vector OpLowerer::Lower(GroupPtr& group) { +std::vector OpLowerer::Lower(GroupPtr& group) { // NOLINT VLOG(3) << "Lowering Group : " << group->group_id << " , Op Pattern : " << group->op_pattern_kind; group->input_names.clear(); diff --git a/paddle/cinn/hlir/framework/op_lowering.h b/paddle/cinn/hlir/framework/op_lowering.h old mode 100755 new mode 100644 index 97bdaeb485883daf52f2fe5ae5101ee07b293eb8..5e909d1196bbc7c21891ef2f3843214e0bdcec90 --- a/paddle/cinn/hlir/framework/op_lowering.h +++ b/paddle/cinn/hlir/framework/op_lowering.h @@ -52,8 +52,8 @@ class OpLowerer { OpLowerer(const absl::flat_hash_map&, const absl::flat_hash_map&, const Target&); - std::vector Lower(GroupPtr& group); - std::vector LowerWithoutSchedule(GroupPtr& group); + std::vector Lower(GroupPtr& group); // NOLINT + std::vector LowerWithoutSchedule(GroupPtr& group); // NOLINT private: std::vector IRLowerOp(IRComputeFunction, GroupPtr&); @@ -75,7 +75,7 @@ class OpLowerer { DEFINE_IR_COMPUTE(OutEWiseFusable); void IRSchedule( - ir::IRSchedule& ir_sch, + ir::IRSchedule& ir_sch, // NOLINT const GroupPtr& group, const std::unordered_map& tensor_map); diff --git a/paddle/cinn/hlir/framework/op_lowering_test.cc b/paddle/cinn/hlir/framework/op_lowering_test.cc index c5d615782c5308b54701df1557f557c9cd272167..3cd063a07e6346042c9554c227afb51d7d3796a7 100644 --- a/paddle/cinn/hlir/framework/op_lowering_test.cc +++ b/paddle/cinn/hlir/framework/op_lowering_test.cc @@ -32,7 +32,7 @@ namespace framework { using frontend::NetBuilder; using frontend::RunDecomposer; -void CodeGen(ir::LoweredFunc& func) { +void CodeGen(const ir::LoweredFunc& func) { #ifdef CINN_WITH_CUDA auto target = common::DefaultNVGPUTarget(); Module::Builder builder("module_builder", target); @@ -56,7 +56,7 @@ void CodeGen(ir::LoweredFunc& func) { #endif } -void Compile(NetBuilder& net_builder) { +void Compile(NetBuilder& net_builder) { // NOLINT auto program = net_builder.Build(); auto target = common::DefaultTarget(); RunDecomposer(&program, target); diff --git a/paddle/cinn/hlir/framework/op_lowering_util.cc b/paddle/cinn/hlir/framework/op_lowering_util.cc index 24a90a6d138898af1f8dc90970a8667a409e267e..06ec4488203621f1b45ebaab3e415fd686e0134c 100644 --- a/paddle/cinn/hlir/framework/op_lowering_util.cc +++ b/paddle/cinn/hlir/framework/op_lowering_util.cc @@ -92,8 +92,8 @@ ir::Tensor GetTensor( std::vector CollectInputTensor( const Node* node, - std::vector& func_args, - std::unordered_map& tensor_map, + std::vector& func_args, // NOLINT + std::unordered_map& tensor_map, // NOLINT const absl::flat_hash_map& type_dict, const absl::flat_hash_map& shape_dict) { std::vector tensors; @@ -543,7 +543,7 @@ bool WithoutLastDimInReduce(const std::vector& shape, } } -void LoopOrderAssignReduce(ir::IRSchedule& ir_sch, +void LoopOrderAssignReduce(ir::IRSchedule& ir_sch, // NOLINT const std::string& block_name, const std::vector& axes, const common::Target& target, @@ -593,7 +593,7 @@ void LoopOrderAssignReduce(ir::IRSchedule& ir_sch, } } -void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch, +void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch, // NOLINT const std::string& block_name, const std::vector& inshape, const std::vector& axes, @@ -707,7 +707,7 @@ void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch, ir_sch.Reorder(block_name, new_order); } -void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch, +void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch, // NOLINT const std::string& block_name, const std::vector& inshape, const std::vector& axes, @@ -974,7 +974,7 @@ Node* GetMasterToComputeAt( } void LoopAssignReduce( - ir::IRSchedule& ir_sch, + ir::IRSchedule& ir_sch, // NOLINT const Node* node, const Node* reducer, const Target& target, @@ -1217,8 +1217,8 @@ class RemoveExpr : public ir::IRMutator<> { }; void MergeLoops(ir::Expr root, - std::vector& src, - std::vector& dst, + std::vector& src, // NOLINT + std::vector& dst, // NOLINT int index) { if (index < 0) { return; @@ -1247,7 +1247,7 @@ void MergeLoops(ir::Expr root, } void InsertSyncThread( - ir::IRSchedule& ir_sch, + ir::IRSchedule& ir_sch, // NOLINT const Node* node, const absl::flat_hash_map& shape_dict, const std::unordered_map& tensor_map) { @@ -1318,7 +1318,7 @@ class InsertExpr : public ir::IRMutator<> { }; void MergeReduceToReduce( - ir::IRSchedule& ir_sch, + ir::IRSchedule& ir_sch, // NOLINT const Node* node, const Node* master, const absl::flat_hash_map& shape_dict, @@ -1506,7 +1506,7 @@ void MergeReduceToReduce( } void MergeReduceLoop( - ir::IRSchedule& ir_sch, + ir::IRSchedule& ir_sch, // NOLINT Node* node, const Node* master, const absl::flat_hash_map& shape_dict, @@ -1611,7 +1611,7 @@ class FindExprInBlock : public ir::IRMutator<> { }; void LoopComputeAt( - ir::IRSchedule& ir_sch, + ir::IRSchedule& ir_sch, // NOLINT Node* node, const Node* master, const GroupPtr& group, @@ -1712,7 +1712,7 @@ std::unordered_set GetMasters( } void SyncThreadWithShared( - ir::IRSchedule& ir_sch, + ir::IRSchedule& ir_sch, // NOLINT const GroupPtr& group, const std::unordered_set& nodes_inline, const std::unordered_set& nodes_set, diff --git a/paddle/cinn/hlir/framework/op_lowering_util.h b/paddle/cinn/hlir/framework/op_lowering_util.h index 02741820db85ce26bb252dfb64a0ae30592ba430..504ee0600479d584f45ac3e35ebed394a691d133 100644 --- a/paddle/cinn/hlir/framework/op_lowering_util.h +++ b/paddle/cinn/hlir/framework/op_lowering_util.h @@ -31,8 +31,8 @@ ir::Tensor GetTensor( std::vector CollectInputTensor( const Node* node, - std::vector& func_args, - std::unordered_map& tensor_map, + std::vector& func_args, // NOLINT + std::unordered_map& tensor_map, // NOLINT const absl::flat_hash_map& type_dict, const absl::flat_hash_map& shape_dict); @@ -87,7 +87,7 @@ std::unordered_set GetMasters( const std::unordered_set& nodes_set); void LoopAssignReduce( - ir::IRSchedule& ir_sch, + ir::IRSchedule& ir_sch, // NOLINT const Node* node, const Node* reducer, const Target& target, @@ -95,7 +95,7 @@ void LoopAssignReduce( const absl::flat_hash_map& shape_dict); void LoopComputeAt( - ir::IRSchedule& ir_sch, + ir::IRSchedule& ir_sch, // NOLINT Node* node, const Node* master, const GroupPtr& group, @@ -103,7 +103,7 @@ void LoopComputeAt( const std::unordered_map& tensor_map); void SyncThreadWithShared( - ir::IRSchedule& ir_sch, + ir::IRSchedule& ir_sch, // NOLINT const GroupPtr& group, const std::unordered_set& nodes_inline, const std::unordered_set& nodes_set, diff --git a/paddle/cinn/hlir/framework/parallel_compiler.h b/paddle/cinn/hlir/framework/parallel_compiler.h index 8638d07230386ffd7b3092c2b0460002dd1cb00d..45fc4fef77a8d2a363a0e6dd50b82bc5104986c6 100644 --- a/paddle/cinn/hlir/framework/parallel_compiler.h +++ b/paddle/cinn/hlir/framework/parallel_compiler.h @@ -36,8 +36,8 @@ class ParallelCompiler { }; public: - explicit ParallelCompiler(std::shared_ptr& scope, - std::shared_ptr& graph, + explicit ParallelCompiler(std::shared_ptr& scope, // NOLINT + std::shared_ptr& graph, // NOLINT const CompileOptions& option, const common::Target& target) : scope_(scope), graph_(graph), option_(option), target_(target) {} @@ -53,8 +53,8 @@ class ParallelCompiler { struct Task { public: Task(ParallelCompiler* p, - std::shared_ptr& s, - std::shared_ptr& g, + std::shared_ptr& s, // NOLINT + std::shared_ptr& g, // NOLINT const CompileOptions& cp, const Target& t) : compiler(p), scope(s), graph(g), options(cp), target(t) {} diff --git a/paddle/cinn/hlir/op/op_broadcast_test.cc b/paddle/cinn/hlir/op/op_broadcast_test.cc old mode 100755 new mode 100644 index 086cb43528aa6212fbd39c22083024e87989a20c..4af7108f383614726b957414ffac2c5bed054ed9 --- a/paddle/cinn/hlir/op/op_broadcast_test.cc +++ b/paddle/cinn/hlir/op/op_broadcast_test.cc @@ -258,7 +258,7 @@ TEST(Operator, Operator_BroadcastTo) { common::CINNValuePack GetComputeResult( const std::shared_ptr &impl, - std::vector &cinn_inputs, + std::vector &cinn_inputs, // NOLINT const std::string &output_name = "") { if (FLAGS_cinn_ir_schedule) { cinn_inputs.emplace_back(output_name); diff --git a/paddle/cinn/hlir/op/op_nn_test.cc b/paddle/cinn/hlir/op/op_nn_test.cc index b2dff5cfdb7ee956d20cb9cee15aa0188f477dbc..7f0b287579b010cfefccdd1f78c2d426c136cbb3 100644 --- a/paddle/cinn/hlir/op/op_nn_test.cc +++ b/paddle/cinn/hlir/op/op_nn_test.cc @@ -44,7 +44,7 @@ Module LowerToModule(const std::string test_name, const std::shared_ptr &impl, std::vector input_names, const std::string &output_name, - std::vector &inputs, + std::vector &inputs, // NOLINT std::vector cinn_inputs, const Target &target) { Module::Builder builder("module", target); diff --git a/paddle/cinn/hlir/pass/common_subexpression_elimination.cc b/paddle/cinn/hlir/pass/common_subexpression_elimination.cc index 3c14e1d03b6804157e3fad5ba3e399adcd05fc21..e595783c7b11b8f2be3dfccfbc384c98d8111bb4 100644 --- a/paddle/cinn/hlir/pass/common_subexpression_elimination.cc +++ b/paddle/cinn/hlir/pass/common_subexpression_elimination.cc @@ -67,7 +67,9 @@ std::unordered_map special_attrs = { {"axes", 2}, {"perm", 2}}; -bool IsSameSubexpression(Node* op1, Node* op2, shape_dict_t& shape_dict) { +bool IsSameSubexpression(Node* op1, + Node* op2, + shape_dict_t& shape_dict) { // NOLINT // Get the input edges for op1 and op2 in order. auto op1_in_edges = op1->inlinks_in_order(); auto op2_in_edges = op2->inlinks_in_order(); @@ -201,13 +203,14 @@ void RemoveNodes(framework::Graph* graph, GraphNode* node) { graph->DropNode(node); } -void RemoveNodes(framework::Graph* graph, std::vector& nodes) { +void RemoveNodes(framework::Graph* graph, const std::vector& nodes) { for (auto* node : nodes) { RemoveNodes(graph, node); } } -void RemoveNodes(framework::Graph* graph, std::vector& nodes_data) { +void RemoveNodes(framework::Graph* graph, + const std::vector& nodes_data) { for (auto* data : nodes_data) { if (std::find(graph->outputs.begin(), graph->outputs.end(), data) != graph->outputs.end()) { diff --git a/paddle/cinn/hlir/pass/constant_folding_pass_test.cc b/paddle/cinn/hlir/pass/constant_folding_pass_test.cc index 2698d035d30d92b7416fa35fa49a6992d2c9785e..7729f4a49f1583ed25f0bfbcce7f210bfcedde06 100644 --- a/paddle/cinn/hlir/pass/constant_folding_pass_test.cc +++ b/paddle/cinn/hlir/pass/constant_folding_pass_test.cc @@ -19,7 +19,7 @@ namespace cinn { namespace frontend { -int GetSize(std::vector& shape) { +int GetSize(const std::vector& shape) { return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); } @@ -36,7 +36,7 @@ std::unordered_map> GetInputRandom( } std::unordered_map> RunModelTest( - Program& program, + Program& program, // NOLINT const std::vector&& passes, const std::unordered_map>& input_data, const std::unordered_set& fetch_ids) { diff --git a/paddle/cinn/hlir/pass/dense_merge_pass_test.cc b/paddle/cinn/hlir/pass/dense_merge_pass_test.cc index 05ee12558f7cac786ecdb352f609033f8902c0d2..07e16556729bdb3b9c75068ec08577f450aced9a 100644 --- a/paddle/cinn/hlir/pass/dense_merge_pass_test.cc +++ b/paddle/cinn/hlir/pass/dense_merge_pass_test.cc @@ -19,11 +19,11 @@ namespace cinn { namespace frontend { -int GetSize(std::vector& shape) { +int GetSize(const std::vector& shape) { return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); } -void RunModelTest(Program& program, +void RunModelTest(Program& program, // NOLINT const std::vector&& inputs, const std::unordered_set& fetch_ids) { // init input data. diff --git a/paddle/cinn/hlir/pass/dot_merger.cc b/paddle/cinn/hlir/pass/dot_merger.cc index 30cc12c083676e015fc6461eee93a53ecf675ff4..8638200180f66ef16eeea31375456b3921986467 100644 --- a/paddle/cinn/hlir/pass/dot_merger.cc +++ b/paddle/cinn/hlir/pass/dot_merger.cc @@ -130,7 +130,7 @@ class DotBuilder { const shape_dict_t& shape_dict() const { return shape_dict_; } // Currently the constructor of `NodeData` needs to pass in `Shared`. - NodeData* Var(common::Shared& producer) { + NodeData* Var(common::Shared& producer) { // NOLINT auto* res = new NodeData(producer, 0, 0, node_name("var"), false); graph_->RegisterNode(producer->id(), res); graph_->RegisterNode(res->id(), producer.get()); diff --git a/paddle/cinn/hlir/pass/dot_merger_test.cc b/paddle/cinn/hlir/pass/dot_merger_test.cc index 17258623f0d346f3375518e43a7e3d9734f9f611..77f02e709625aaa34beaaba9d874c229c589a6ca 100644 --- a/paddle/cinn/hlir/pass/dot_merger_test.cc +++ b/paddle/cinn/hlir/pass/dot_merger_test.cc @@ -19,11 +19,11 @@ namespace cinn { namespace frontend { -int GetSize(std::vector& shape) { +int GetSize(const std::vector& shape) { return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); } -void RunModelTest(Program& program, +void RunModelTest(Program& program, // NOLINT const std::vector&& inputs, const std::unordered_set& fetch_ids) { // init input data. diff --git a/paddle/cinn/hlir/pass/infershape.cc b/paddle/cinn/hlir/pass/infershape.cc old mode 100755 new mode 100644 index 87d8e263567c0156de6e4ec12e393774025cc5d8..b082c98a0fcf69b96ff26750a5dbaa650966162e --- a/paddle/cinn/hlir/pass/infershape.cc +++ b/paddle/cinn/hlir/pass/infershape.cc @@ -38,8 +38,8 @@ using dtype_dict_t = absl::flat_hash_map; using shape_dict_t = absl::flat_hash_map; void InferShape(Node* node, - dtype_dict_t& dtype_dict, - shape_dict_t& shape_dict) { + dtype_dict_t& dtype_dict, // NOLINT + shape_dict_t& shape_dict) { // NOLINT VLOG(3) << "Begin InferShape of node " << node->id(); auto op_infershape = Operator::GetAttrs("infershape"); auto op_inferdtype = Operator::GetAttrs("inferdtype"); diff --git a/paddle/cinn/hlir/pass/infershape.h b/paddle/cinn/hlir/pass/infershape.h index 7ed0ca5fe78ae6263471bb4b0fee92180bf5a19a..db9b8e21e7020818f93c777378ae2e7e91c3511d 100644 --- a/paddle/cinn/hlir/pass/infershape.h +++ b/paddle/cinn/hlir/pass/infershape.h @@ -24,8 +24,9 @@ namespace pass { void InferShape( framework::Node* node, - absl::flat_hash_map& dtype_dict, - absl::flat_hash_map& shape_dict); + absl::flat_hash_map& dtype_dict, // NOLINT + absl::flat_hash_map& + shape_dict); // NOLINT } // namespace pass } // namespace hlir diff --git a/paddle/cinn/hlir/pass/reduce_split_pass_test.cc b/paddle/cinn/hlir/pass/reduce_split_pass_test.cc index 688d6a0bd607ca8eff530ee78624ab7f05a1a910..5d9973209d0be8c3c396eddb1612862e7e6d3729 100644 --- a/paddle/cinn/hlir/pass/reduce_split_pass_test.cc +++ b/paddle/cinn/hlir/pass/reduce_split_pass_test.cc @@ -20,7 +20,7 @@ namespace cinn { namespace frontend { std::unordered_map> RunModelTest( - Program& program, + Program& program, // NOLINT const std::vector&& passes, const std::unordered_map>& input_data, const std::unordered_set& fetch_ids) { diff --git a/paddle/cinn/hlir/pe/ir_schedule_pe.cc b/paddle/cinn/hlir/pe/ir_schedule_pe.cc index 784f7ff85d5aba990410b9cc26ac07d957561931..a97b248c1223b677e36f5689bc3cc3f7e5788270 100644 --- a/paddle/cinn/hlir/pe/ir_schedule_pe.cc +++ b/paddle/cinn/hlir/pe/ir_schedule_pe.cc @@ -39,7 +39,7 @@ namespace cinn { namespace hlir { namespace pe { -void IRElementwiseSchedule(ir::IRSchedule &ir_sch, +void IRElementwiseSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, const common::Target &target) { VLOG(3) << "Before IRElementwiseSchedule, new ir is : " @@ -67,7 +67,7 @@ void IRElementwiseSchedule(ir::IRSchedule &ir_sch, << ir_sch.GetModule().GetExprs().at(0); } -void IRInjectiveSchedule(ir::IRSchedule &ir_sch, +void IRInjectiveSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, const common::Target &target) { VLOG(3) << "Before IRInjectiveSchedule, new ir is : " @@ -95,7 +95,7 @@ void IRInjectiveSchedule(ir::IRSchedule &ir_sch, << ir_sch.GetModule().GetExprs().at(0); } -void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, +void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, const common::Target &target, bool vectorizable) { @@ -132,7 +132,7 @@ void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, << ir_sch.GetModule().GetExprs().at(0); } -void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, +void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, const common::Target &target) { VLOG(3) << "Begin IRCudaScheduleInjective "; @@ -208,7 +208,7 @@ std::vector IRCudaScheduleMatMul( return {common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; } -void IRCudaScheduleMul(ir::IRSchedule &ir_sch, +void IRCudaScheduleMul(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, const common::Target &target) { auto all_blocks = ir_sch.GetAllBlocks(); @@ -221,7 +221,7 @@ void IRCudaScheduleMul(ir::IRSchedule &ir_sch, ir_sch.Bind(loops[1], "threadIdx.x"); } -void IRMulScheduleCPU(ir::IRSchedule &ir_sch, +void IRMulScheduleCPU(ir::IRSchedule &ir_sch, // NOLINT const std::vector &reduce_first_shape, const common::Target &target) { ir_sch.MergeExprs(); @@ -238,7 +238,7 @@ void IRMulScheduleCPU(ir::IRSchedule &ir_sch, } } -void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, +void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector> &output_shapes, int axis, const common::Target &target) { @@ -334,7 +334,7 @@ void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, << ir_sch.GetModule().GetExprs().at(0); } -void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, +void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor output, int last_dimension_num, const common::Target &target) { @@ -390,7 +390,7 @@ void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, << ir_sch.GetModule().GetExprs().at(0); } -void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, +void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor tmp_out, ir::Tensor out, const common::Target &target) { @@ -478,7 +478,7 @@ void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, << ir_sch.GetModule().GetExprs().at(0); } -void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, +void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reduce_tmp_out, ir::Tensor tmp_out, ir::Tensor out, @@ -621,7 +621,7 @@ void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, << ir_sch.GetModule().GetExprs().at(0); } -void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, +void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reshape, ir::Tensor internal, ir::Tensor reduce_out, @@ -880,7 +880,7 @@ void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, << ir_sch.GetModule().GetExprs().at(0); } -void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, +void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reshape, ir::Tensor internal, ir::Tensor tmp_out, @@ -991,7 +991,7 @@ void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, // ir_sch.GetLoops(out->name)[0]); } -void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) { +void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) { // NOLINT ir_sch.MergeExprs(); auto all_blocks = ir_sch.GetAllBlocks(); CHECK_EQ(all_blocks.size(), 3U); @@ -1010,7 +1010,7 @@ void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) { ir_sch.ComputeAt(all_blocks[1], loops[0]); } -void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, +void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT const common::Target &target, int arg_pack_size) { VLOG(3) << "Before IRPoolScheduleGPU: " @@ -1028,7 +1028,7 @@ void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, VLOG(3) << "End IRPoolScheduleGPU: " << ir_sch.GetModule().GetExprs().at(0); } -void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, +void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT const common::Target &target) { VLOG(3) << "Before IRGlobalPoolScheduleGPU: " << ir_sch.GetModule().GetExprs().at(0); @@ -1071,7 +1071,7 @@ void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, << ir_sch.GetModule().GetExprs().at(0); } -void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, +void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, // NOLINT const std::vector &tensors) { if (tensors.size() == 3U) { CHECK(tensors[1].as_tensor()); @@ -1097,7 +1097,8 @@ void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, << ir_sch.GetModule().GetExprs().at(0); } -void IRCudaScheduleConv(ir::IRSchedule &ir_sch, const common::Target &target) { +void IRCudaScheduleConv(ir::IRSchedule &ir_sch, // NOLINT + const common::Target &target) { VLOG(3) << "Begin IRCudaScheduleConv with expr: " << ir_sch.GetModule().GetExprs().at(0); auto &res = ScheduleParam::get_cuda_instance().GetParam(); @@ -1238,10 +1239,10 @@ void IRCudaScheduleConv(ir::IRSchedule &ir_sch, const common::Target &target) { << ir_sch.GetModule().GetExprs().at(0); } -void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, - ir::Tensor &input_pad, - ir::Tensor &weights, - ir::Tensor &output, +void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, // NOLINT + ir::Tensor &input_pad, // NOLINT + ir::Tensor &weights, // NOLINT + ir::Tensor &output, // NOLINT const common::Target &target, const std::string &key) { auto &res = ScheduleParam::get_cuda_instance().GetParam(); diff --git a/paddle/cinn/hlir/pe/ir_schedule_pe.h b/paddle/cinn/hlir/pe/ir_schedule_pe.h index e7839fcc1ae570b2101c7c8ff5d8de1776a655b0..82967f3d9f536a0015f92423b2191f01ff43da1a 100644 --- a/paddle/cinn/hlir/pe/ir_schedule_pe.h +++ b/paddle/cinn/hlir/pe/ir_schedule_pe.h @@ -31,20 +31,20 @@ namespace cinn { namespace hlir { namespace pe { -void IRElementwiseSchedule(ir::IRSchedule &ir_sch, +void IRElementwiseSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, const common::Target &target); -void IRInjectiveSchedule(ir::IRSchedule &ir_sch, +void IRInjectiveSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, const common::Target &target); -void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, +void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, const common::Target &target, bool vectorizable = true); -void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, +void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, const common::Target &target); @@ -53,68 +53,69 @@ std::vector IRCudaScheduleMatMul( const std::vector &output_shape, const common::Target &target); -void IRCudaScheduleMul(ir::IRSchedule &ir_sch, +void IRCudaScheduleMul(ir::IRSchedule &ir_sch, // NOLINT const std::vector &output_shape, const common::Target &target); -void IRMulScheduleCPU(ir::IRSchedule &ir_sch, +void IRMulScheduleCPU(ir::IRSchedule &ir_sch, // NOLINT const std::vector &reduce_first_shape, const common::Target &target); -void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, +void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT const std::vector> &output_shapes, int axis, const common::Target &target); -void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, +void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor out, int last_dimension_num, const common::Target &target); -void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, +void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reduce_tmp_out, ir::Tensor tmp_out, ir::Tensor out, const common::Target &target); -void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, +void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor tmp_out, ir::Tensor out, const common::Target &target); -void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, +void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reshape, ir::Tensor internal, ir::Tensor out, const common::Target &target); -void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, +void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, // NOLINT ir::Tensor reshape, ir::Tensor internal, ir::Tensor tmp_out, ir::Tensor out, const common::Target &target); -void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis = -1); +void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis = -1); // NOLINT -void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, +void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT const common::Target &target, int arg_pack_size = 3); -void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, +void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, // NOLINT const std::vector &tensors); -void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, +void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT const common::Target &target); -void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, - ir::Tensor &input_pad, - ir::Tensor &weights, - ir::Tensor &output, +void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, // NOLINT + ir::Tensor &input_pad, // NOLINT + ir::Tensor &weights, // NOLINT + ir::Tensor &output, // NOLINT const common::Target &target, const std::string &key); -void IRCudaScheduleConv(ir::IRSchedule &ir_sch, const common::Target &target); +void IRCudaScheduleConv(ir::IRSchedule &ir_sch, // NOLINT + const common::Target &target); } // namespace pe } // namespace hlir diff --git a/paddle/cinn/hlir/pe/nn_util.cc b/paddle/cinn/hlir/pe/nn_util.cc index 30d316bcaf845d1c82c144e9da47864bb8467dd6..22ae26d03aea7a78e7824d129cc5fa87e71ace3e 100644 --- a/paddle/cinn/hlir/pe/nn_util.cc +++ b/paddle/cinn/hlir/pe/nn_util.cc @@ -456,8 +456,8 @@ int GetTailSize(const std::vector& inshape, const std::vector& axes) { std::vector GetFirstStepReduceShape(const std::vector& shape, const std::vector& axes, - bool& inbound, - int& tail) { + bool& inbound, // NOLINT + int& tail) { // NOLINT // post parallel size int post_parallel_size = GetPostParallelSize(shape, axes); // the size to unfold las reduce axis diff --git a/paddle/cinn/hlir/pe/nn_util.h b/paddle/cinn/hlir/pe/nn_util.h index 7ea201e0461ab0655d90fadc4387a41238168d42..b5b35cb2e58d664c91ff1913c2afa736f3a59f59 100644 --- a/paddle/cinn/hlir/pe/nn_util.h +++ b/paddle/cinn/hlir/pe/nn_util.h @@ -41,8 +41,8 @@ std::vector winograd_transform_matrices(const int& tile_size, std::vector GetFirstStepReduceShape(const std::vector& shape, const std::vector& axes, - bool& inbound, - int& tail); + bool& inbound, // NOLINT + int& tail); // NOLINT } // namespace pe } // namespace hlir diff --git a/paddle/cinn/hlir/pe/schedule.cc b/paddle/cinn/hlir/pe/schedule.cc index 2547a43862a9f28f3cd13afb87336cb48358c863..45022f740d7eb44c796b9f3dcc29f7df3fe5fd70 100644 --- a/paddle/cinn/hlir/pe/schedule.cc +++ b/paddle/cinn/hlir/pe/schedule.cc @@ -624,7 +624,7 @@ void PoolScheduleCPU(poly::StageMap stages, } void PoolScheduleGPU(poly::StageMap stages, - ir::Tensor &output, + const ir::Tensor &output, const common::Target &target) { CHECK_GE(stages[output]->axis_names().size(), 4); stages[output]->Fuse({0, 1, 2, 3}); @@ -866,7 +866,7 @@ void CreateX86SerialData(const std::string &file_name) { void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, const ir::Tensor &res, - ir::Tensor &packed_out, + ir::Tensor &packed_out, // NOLINT const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, @@ -1017,7 +1017,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &res, - ir::Tensor &packed_out, + ir::Tensor &packed_out, // NOLINT const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, @@ -1139,7 +1139,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &res, - ir::Tensor &packed_out, + ir::Tensor &packed_out, // NOLINT const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, @@ -1244,7 +1244,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, const ir::Tensor &res, - ir::Tensor &packed_out, + ir::Tensor &packed_out, // NOLINT const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, @@ -1377,7 +1377,7 @@ void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( poly::StageMap stages, const ir::Tensor &res, - ir::Tensor &packed_out, + ir::Tensor &packed_out, // NOLINT const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, @@ -2300,7 +2300,7 @@ void SaveSerialData( } void CudaScheduleDepthwiseConv(poly::StageMap stages, - ir::Tensor &output, + ir::Tensor &output, // NOLINT const common::Target &target) { auto OL = stages[output]->CacheWrite("local", stages, output); stages[output]->Bind(0, "blockIdx.x"); @@ -2313,9 +2313,9 @@ void CudaScheduleDepthwiseConv(poly::StageMap stages, } void CudaScheduleConv(poly::StageMap stages, - ir::Tensor &input_pad, - ir::Tensor &weights, - ir::Tensor &output, + ir::Tensor &input_pad, // NOLINT + ir::Tensor &weights, // NOLINT + ir::Tensor &output, // NOLINT const common::Target &target) { auto &res = ScheduleParam::get_cuda_instance().GetParam(); int n = output->shape[0].as_int32(); @@ -2382,9 +2382,9 @@ void CudaScheduleConv(poly::StageMap stages, } void CudaScheduleConv2(poly::StageMap stages, - ir::Tensor &input_pad, - ir::Tensor &weights, - ir::Tensor &output, + ir::Tensor &input_pad, // NOLINT + ir::Tensor &weights, // NOLINT + ir::Tensor &output, // NOLINT const common::Target &target, const std::string &key) { auto &res = ScheduleParam::get_cuda_instance().GetParam(); @@ -2516,7 +2516,7 @@ void CudaScheduleConv2(poly::StageMap stages, } void CudaScheduleWinogradConv(poly::StageMap wino_stages, - std::vector &all_tensors, + std::vector &all_tensors, // NOLINT const common::Target &target) { auto &res = ScheduleParam::get_cuda_instance().GetParam(); auto &wino_weights_dilation = all_tensors[0]; diff --git a/paddle/cinn/hlir/pe/schedule.h b/paddle/cinn/hlir/pe/schedule.h index c22c8fbb7a93a449c161c9c1c1bdaea15b6e04c6..cd3262cf3ad18e668cd677e584f9e658e846d9ce 100644 --- a/paddle/cinn/hlir/pe/schedule.h +++ b/paddle/cinn/hlir/pe/schedule.h @@ -124,7 +124,7 @@ void GetConv2d1x1Factors(absl::flat_hash_map *factors, void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, const ir::Tensor &res, - ir::Tensor &packed_out, + ir::Tensor &packed_out, // NOLINT const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, @@ -138,12 +138,12 @@ void PoolScheduleCPU(poly::StageMap stages, const ir::Tensor &output, const common::Target &target); void PoolScheduleGPU(poly::StageMap stages, - ir::Tensor &output, + const ir::Tensor &output, const common::Target &target); void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &res, - ir::Tensor &packed_out, + ir::Tensor &packed_out, // NOLINT const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, @@ -151,7 +151,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, const ir::Tensor &res, - ir::Tensor &packed_out, + ir::Tensor &packed_out, // NOLINT const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, @@ -161,7 +161,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, const ir::Tensor &res, - ir::Tensor &packed_out, + ir::Tensor &packed_out, // NOLINT const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, @@ -170,7 +170,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( poly::StageMap stages, const ir::Tensor &res, - ir::Tensor &packed_out, + ir::Tensor &packed_out, // NOLINT const ir::Tensor &input_pad, const ir::Tensor &weights_dilation, const ir::Tensor &data, @@ -218,23 +218,23 @@ void CudaTwoStepReduceSchedule(poly::StageMap stages, const common::Target &target); void CudaScheduleDepthwiseConv(poly::StageMap stages, - ir::Tensor &output, + ir::Tensor &output, // NOLINT const common::Target &target); void CudaScheduleConv(poly::StageMap stages, - ir::Tensor &input_pad, - ir::Tensor &weights, - ir::Tensor &output, + ir::Tensor &input_pad, // NOLINT + ir::Tensor &weights, // NOLINT + ir::Tensor &output, // NOLINT const common::Target &target); void CudaScheduleWinogradConv(poly::StageMap wino_stages, - std::vector &all_tensors, + std::vector &all_tensors, // NOLINT const common::Target &target); void CudaScheduleConv2(poly::StageMap stages, - ir::Tensor &input_pad, - ir::Tensor &weights, - ir::Tensor &output, + ir::Tensor &input_pad, // NOLINT + ir::Tensor &weights, // NOLINT + ir::Tensor &output, // NOLINT const common::Target &target, const std::string &key); diff --git a/paddle/cinn/ir/ir_schedule.cc b/paddle/cinn/ir/ir_schedule.cc index 3489b84a45e505f706c16af6aa15be579c1be59f..de48a7c28d8f149448cf4421da74ee62d5ba2644 100644 --- a/paddle/cinn/ir/ir_schedule.cc +++ b/paddle/cinn/ir/ir_schedule.cc @@ -96,7 +96,7 @@ class ScheduleImpl { int write_buffer_index, const std::string& memory_type); void SyncThreads(const Expr& ir_node, bool after_node = true); - void SetBuffer(Expr& block, + void SetBuffer(Expr& block, // NOLINT const std::string& memory_type, bool fixed = false); Expr Reorder(const std::vector& loops); @@ -114,7 +114,7 @@ class ScheduleImpl { Expr Rfactor(const Expr& rf_loop, int rf_axis); Expr AddUnitLoop(const Expr& block) const; void Annotate(const Expr& block, const std::string& key, const attr_t& value); - void Unannotate(Expr& block, const std::string& key); + void Unannotate(Expr& block, const std::string& key); // NOLINT void FlattenLoops(const std::vector& loops, const bool force_flat = false); void CopyTransformAndLoopInfo(const Expr& block, const Expr& block_target); diff --git a/paddle/cinn/ir/ir_schedule.h b/paddle/cinn/ir/ir_schedule.h index d847e933eb54daaec1b2bc26cf1484a173b0ae04..2689eb48a27e5b0062e7cc9df611a332b2e370c3 100644 --- a/paddle/cinn/ir/ir_schedule.h +++ b/paddle/cinn/ir/ir_schedule.h @@ -247,9 +247,9 @@ class IRSchedule { * \param memory_type The memory type we want to set. Should be "local", * "shared" or "global". */ - void SetBuffer(Expr& block, + void SetBuffer(Expr& block, // NOLINT const std::string& memory_type, - bool fixed = false); + bool fixed = false); // NOLINT /** * \brief Reorder the loops in the order of vector. @@ -391,7 +391,7 @@ class IRSchedule { * \param block The block to be unannotated * \param key The attribute key */ - void Unannotate(Expr& block, const std::string& key); + void Unannotate(Expr& block, const std::string& key); // NOLINT /*! * \brief flatten the loops in one dim. @@ -620,7 +620,7 @@ class LeafBlockRemovalPlan : public ir::IRMutator<> { class ComputeInlineChecker : public ir::IRMutator<> { public: - ComputeInlineChecker(IRSchedule& schedule, Expr& block) + ComputeInlineChecker(IRSchedule& schedule, Expr& block) // NOLINT : ir_schedule_(schedule), block_(block) {} bool Check(); diff --git a/paddle/cinn/ir/ir_schedule_util.cc b/paddle/cinn/ir/ir_schedule_util.cc index 34e6cf6e7cd2385981ecbe76cc977acad1b59c13..4b7ca206487422a9ca8bc85d461489c97ac84752 100644 --- a/paddle/cinn/ir/ir_schedule_util.cc +++ b/paddle/cinn/ir/ir_schedule_util.cc @@ -503,7 +503,7 @@ Expr MakeCacheBlock(const std::vector& buffer_ranges, return block; } -void FindInsertionPoint(Expr& root, CacheBlockInfo* info, bool is_write) { +void FindInsertionPoint(const Expr& root, CacheBlockInfo* info, bool is_write) { Expr find_tensor = is_write ? Expr(info->write_tensor) : Expr(info->read_tensor); auto find_produce_read = @@ -651,7 +651,7 @@ Expr ConstructOtherStmtChain(const std::vector& stmts, Expr ConstructNewLoopChain(const std::vector& chain, const std::vector& ordered_loops, const std::set& loop_set, - std::vector& if_nodes) { + std::vector& if_nodes) { // NOLINT std::vector> condition_vars; // In each IfThenElse node, find the vars its condition depends on. for (auto& if_expr : if_nodes) { @@ -923,7 +923,7 @@ void CheckComputeAtValidation(const Expr& block, CHECK(find_block_in_loop.empty()) << "loop should not be block's ancestor!"; } -void InsertBlock(Expr& for_loop, const Expr& insertion, int index) { +void InsertBlock(Expr& for_loop, const Expr& insertion, int index) { // NOLINT CHECK(for_loop.As()); CHECK(for_loop.As()->body.As()); ir::Block* dst_block = for_loop.As()->body.As(); diff --git a/paddle/cinn/ir/ir_schedule_util.h b/paddle/cinn/ir/ir_schedule_util.h index 0107054e413767e5dc7895bf624963947a238dce..762cd166d2004de9e2523c36ec252558968684c4 100644 --- a/paddle/cinn/ir/ir_schedule_util.h +++ b/paddle/cinn/ir/ir_schedule_util.h @@ -326,7 +326,7 @@ Expr MakeCacheBlock(const std::vector& buffer_ranges, * @param info The information of cache block. * @param is_write Are we inserting a write cache tensor or a read cache tensor. */ -void FindInsertionPoint(Expr& root, CacheBlockInfo* info, bool is_write); +void FindInsertionPoint(const Expr& root, CacheBlockInfo* info, bool is_write); /** * \brief Given a vector of For loops, return a set of them. @@ -359,7 +359,7 @@ std::vector GetLoopsInRange(const Expr& top, const Expr& bottom); Expr ConstructNewLoopChain(const std::vector& chain, const std::vector& ordered_loops, const std::set& loop_set, - std::vector& if_nodes); + std::vector& if_nodes); // NOLINT /*! * \brief Find producers of block in root. @@ -395,7 +395,9 @@ void CheckComputeAtValidation(const Expr& block, * - `index = -1` means inserted into the tail * - otherwise, it should be a index between [0, stmts size) */ -void InsertBlock(Expr& for_loop, const Expr& insertion, int index = 0); +void InsertBlock(Expr& for_loop, // NOLINT + const Expr& insertion, + int index = 0); // NOLINT /*! * \brief Make a union of two range. The detailed function is : diff --git a/paddle/cinn/lang/lower_impl.cc b/paddle/cinn/lang/lower_impl.cc index fcc6c12ce11ddd7c6d945c0645838c82ff583ee9..f68396a3a3ab107fa1fa170bc280d91339347cf7 100644 --- a/paddle/cinn/lang/lower_impl.cc +++ b/paddle/cinn/lang/lower_impl.cc @@ -49,7 +49,7 @@ void CheckNoIslCallRemains(Expr* expr) { } } -void BindBuffer(StageMap& stages) { +void BindBuffer(StageMap& stages) { // NOLINT absl::flat_hash_map tensor_map; for (auto& stage : stages) { tensor_map[stage.second->tensor()->name] = stage.second->tensor(); @@ -71,13 +71,13 @@ void BindBuffer(StageMap& stages) { } } -Expr LowerGroup( - const poly::ScheduleGroup& group, - const std::map& tuple_to_expr, - std::map* global_tensor_map, - std::unordered_map>& resized_buffer_cache, - StageMap stage_map, - ir::CudaAxisInfo* cuda_axis_info) { +Expr LowerGroup(const poly::ScheduleGroup& group, + const std::map& tuple_to_expr, + std::map* global_tensor_map, + std::unordered_map>& + resized_buffer_cache, // NOLINT + StageMap stage_map, + ir::CudaAxisInfo* cuda_axis_info) { BindBuffer(stage_map); std::vector stages; for (auto& node : group.nodes) { diff --git a/paddle/cinn/lang/lower_impl.h b/paddle/cinn/lang/lower_impl.h index 505e80ca6a49e7ed650522791113bc8b5be471f3..bc7494f57f6696388d2fb6b1c5808b9693c57303 100644 --- a/paddle/cinn/lang/lower_impl.h +++ b/paddle/cinn/lang/lower_impl.h @@ -69,7 +69,7 @@ void CheckNoIslCallRemains(const Expr* expr); Expr LowerGroup(const poly::ScheduleGroup& group, const std::map& tuple_to_expr, std::map* global_tensor_map, - std::unordered_set& resized_buffer, + std::unordered_set& resized_buffer, // NOLINT StageMap stage_map, ir::CudaAxisInfo* cuda_axis_info = nullptr); diff --git a/paddle/cinn/optim/ir_simplify.cc b/paddle/cinn/optim/ir_simplify.cc index 48645690f9de7ac42457cd471e398e817052574d..51915c925007e95901a672c32e7104c4bf062387 100644 --- a/paddle/cinn/optim/ir_simplify.cc +++ b/paddle/cinn/optim/ir_simplify.cc @@ -55,7 +55,8 @@ void PartialSimplify( //! Simplify the expression but Load. struct SimplifyButStoreLoadMutator : public ir::IRMutator { common::cas_intervals_t& var_intervals; - explicit SimplifyButStoreLoadMutator(common::cas_intervals_t& var_intervals) + explicit SimplifyButStoreLoadMutator( + common::cas_intervals_t& var_intervals) // NOLINT : var_intervals(var_intervals) {} void operator()(Expr* x) { ir::IRMutator::Visit(x, x); } diff --git a/paddle/cinn/poly/poly_scheduler.cc b/paddle/cinn/poly/poly_scheduler.cc old mode 100755 new mode 100644 index d3a34e4544507187bee03b12f723324faf573e5b..b916e5952ffe4b7ce928db900376dff5f50c37cb --- a/paddle/cinn/poly/poly_scheduler.cc +++ b/paddle/cinn/poly/poly_scheduler.cc @@ -177,7 +177,7 @@ bool IsBetween(const common::GraphNode* x, return false; } -std::vector TopoSortGroups(std::vector& groups) { +std::vector TopoSortGroups(std::vector& groups) { // NOLINT // collect indegree. absl::flat_hash_map group_indegree; std::vector start_groups; diff --git a/paddle/cinn/poly/stage.cc b/paddle/cinn/poly/stage.cc index 5af88bf251ed8d59dc5ac5c2ea247c396cd03a18..149a285f77558fbd0cfe695b2c6ed2094395376c 100644 --- a/paddle/cinn/poly/stage.cc +++ b/paddle/cinn/poly/stage.cc @@ -40,7 +40,7 @@ namespace cinn { namespace poly { -void RemoveDuplicate(std::vector> &indices) { +void RemoveDuplicate(std::vector> &indices) { // NOLINT std::set temp; for (int i = 0; i < indices.size(); i++) { std::string index_str = ""; @@ -309,7 +309,7 @@ int Minus(const Expr &a, const Expr &b) { } // Return the range = max - min among all indices[i][axis](i = 0,1,2,...) -int GetRange(std::vector> &indices, int axis) { +int GetRange(std::vector> &indices, int axis) { // NOLINT Expr max_expr = indices[0][axis]; Expr min_expr = indices[0][axis]; for (auto i = 1; i < indices.size(); i++) { @@ -1420,7 +1420,7 @@ struct CacheReplaceMutator : public ir::IRMutator<> { }; } // namespace -void CacheReadWriteReplace(std::vector &readers, +void CacheReadWriteReplace(const std::vector &readers, ir::Tensor cache_tensor, std::string origin_tensor_name) { for (auto k : readers) { diff --git a/paddle/cinn/poly/stage.h b/paddle/cinn/poly/stage.h old mode 100755 new mode 100644 index 869f8f038de5e9cc06ab9ec4fa95e3e24ae5762f..cf0586710bc9c733525765d955d151249de3a315 --- a/paddle/cinn/poly/stage.h +++ b/paddle/cinn/poly/stage.h @@ -270,7 +270,7 @@ class Stage : public Object { * @param readers the readers of the \p tensor */ ir::Tensor CacheRead(const std::string& memory_type, - std::vector& readers, + std::vector& readers, // NOLINT poly::StageMap stages); /** @@ -303,7 +303,8 @@ class Stage : public Object { void ShowISL() const; - void AddForLoopInTransform(std::vector>& indices); + void AddForLoopInTransform( + std::vector>& indices); // NOLINT /** * Create a cache for write to the original tensor. * @param tensor the tensor to create the cache for. @@ -312,7 +313,7 @@ class Stage : public Object { */ ir::Tensor CacheWrite(const std::string& memory_type, poly::StageMap stages, - ir::Tensor& key_tensor); + ir::Tensor& key_tensor); // NOLINT /** * Generate the `syncthreads()` code to sync all threads on CUDA backends. diff --git a/paddle/cinn/pybind/common.cc b/paddle/cinn/pybind/common.cc index 994308433bc7f81632bbc5de1a6d4d88fc94e6ee..a39d51628df09002d3d18de7143be3286f3eb8a7 100644 --- a/paddle/cinn/pybind/common.cc +++ b/paddle/cinn/pybind/common.cc @@ -236,12 +236,12 @@ inline auto __binary_op_fn_dispatch(T1 x, T2 y, F fn, std::false_type) { template inline void __binary_op_visitor_dispatch( - CINNValue &v, T1 lhs, T2 rhs, F fn, std::true_type) { + CINNValue &v, T1 lhs, T2 rhs, F fn, std::true_type) { // NOLINT v = CINNValue(); } template inline void __binary_op_visitor_dispatch( - CINNValue &v, T1 lhs, T2 rhs, F fn, std::false_type) { + CINNValue &v, T1 lhs, T2 rhs, F fn, std::false_type) { // NOLINT v.Set(fn(lhs, rhs)); } diff --git a/paddle/cinn/runtime/cuda/float16.h b/paddle/cinn/runtime/cuda/float16.h index be847bfc2e7cd183ea663bd05486aea3df0c8ab7..cae59186dc83221993e5f720480087a90e647810 100644 --- a/paddle/cinn/runtime/cuda/float16.h +++ b/paddle/cinn/runtime/cuda/float16.h @@ -500,25 +500,25 @@ __host__ __device__ inline float16 operator-(const float16& a) { #endif } -__host__ __device__ inline float16& operator+=(float16& a, +__host__ __device__ inline float16& operator+=(float16& a, // NOLINT const float16& b) { // NOLINT a = a + b; return a; } -__host__ __device__ inline float16& operator-=(float16& a, +__host__ __device__ inline float16& operator-=(float16& a, // NOLINT const float16& b) { // NOLINT a = a - b; return a; } -__host__ __device__ inline float16& operator*=(float16& a, +__host__ __device__ inline float16& operator*=(float16& a, // NOLINT const float16& b) { // NOLINT a = a * b; return a; } -__host__ __device__ inline float16& operator/=(float16& a, +__host__ __device__ inline float16& operator/=(float16& a, // NOLINT const float16& b) { // NOLINT a = a / b; return a;