未验证 提交 3e5018df 编写于 作者: W Wang Xin 提交者: GitHub

[CodeStyle][CINN] fix cpplint codestyle for [runtime/references] (#55068)

上级 13c2342a
......@@ -145,7 +145,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) {
ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
const ir::LoweredFunc& old_func,
ir::Expr& body) {
ir::Expr& body) { // NOLINT
ir::ModuleExpr mod_expr(std::vector<ir::Expr>({body}));
ir::IRSchedule ir_sch(mod_expr);
......
......@@ -46,7 +46,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize);
*/
ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
const ir::LoweredFunc& old_func,
ir::Expr& body);
ir::Expr& body); // NOLINT
} // namespace auto_schedule
} // namespace cinn
......@@ -64,7 +64,7 @@ class AutoInline : public AutoGenRule {
const std::string& block_name) override;
private:
void Apply(ir::IRSchedule* ir_schedule, ir::Expr& block_expr);
void Apply(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); // NOLINT
private:
std::vector<ir::Expr> all_block_realizes_;
......
......@@ -120,9 +120,12 @@ class MultiLevelTiling : public AutoGenRule {
}
private:
void ApplyTiling(ir::IRSchedule* ir_schedule, ir::Expr& block_expr);
void ApplyCacheRead(ir::IRSchedule* ir_schedule, ir::Expr& block_expr);
void ApplyCacheWrite(ir::IRSchedule* ir_schedule, ir::Expr& block_expr);
void ApplyTiling(ir::IRSchedule* ir_schedule,
ir::Expr& block_expr); // NOLINT
void ApplyCacheRead(ir::IRSchedule* ir_schedule,
ir::Expr& block_expr); // NOLINT
void ApplyCacheWrite(ir::IRSchedule* ir_schedule,
ir::Expr& block_expr); // NOLINT
private:
std::vector<ir::Expr> all_block_realizes_;
......
......@@ -51,7 +51,7 @@ using cinn::hlir::op::ExternalApiRegistry;
// *** update a scheduled function with several post-processors
ir::LoweredFunc FuncWithUpdatedBody(const common::Target& target,
const ir::LoweredFunc& old_func,
ir::Expr& body);
ir::Expr& body); // NOLINT
// check whether a scheduled lowered function is valid
bool PruneInvalid(const ir::LoweredFunc& lowered_func,
const common::Target& target);
......
......@@ -1038,13 +1038,13 @@ llvm::Value *CodeGenLLVM::Visit(const ir::_Tensor_ *op) {
template <typename T,
std::enable_if_t<std::is_same<const ir::Expr &, T>::value, int> = 0>
void appendBody(std::vector<Expr> &new_body, T &&v) {
void appendBody(std::vector<Expr> &new_body, T &&v) { // NOLINT
new_body.push_back(v);
}
template <typename T,
std::enable_if_t<!std::is_same<const ir::Expr &, T>::value, int> = 1>
void appendBody(std::vector<Expr> &new_body, T &&v) {
void appendBody(std::vector<Expr> &new_body, T &&v) { // NOLINT
new_body.insert(new_body.end(), v.begin(), v.end());
}
......
......@@ -98,7 +98,7 @@ class SymbolTable {
};
struct SymbolTableGuard {
explicit SymbolTableGuard(SymbolTable &symbol_table)
explicit SymbolTableGuard(SymbolTable &symbol_table) // NOLINT
: symbol_table_(symbol_table) {
symbol_table.PushScope();
}
......
......@@ -180,7 +180,8 @@ class GiNaCToExprVisitor : public GiNaC::symbol::visitor,
ir::Expr cur;
public:
explicit GiNaCToExprVisitor(std::map<std::string, ir::Expr>& repr_to_expr)
explicit GiNaCToExprVisitor(
std::map<std::string, ir::Expr>& repr_to_expr) // NOLINT
: repr_to_expr(repr_to_expr) {}
Expr operator()(GiNaC::ex ex) {
......
......@@ -500,25 +500,25 @@ __host__ __device__ inline float16 operator-(const float16& a) {
#endif
}
__host__ __device__ inline float16& operator+=(float16& a,
__host__ __device__ inline float16& operator+=(float16& a, // NOLINT
const float16& b) { // NOLINT
a = a + b;
return a;
}
__host__ __device__ inline float16& operator-=(float16& a,
__host__ __device__ inline float16& operator-=(float16& a, // NOLINT
const float16& b) { // NOLINT
a = a - b;
return a;
}
__host__ __device__ inline float16& operator*=(float16& a,
__host__ __device__ inline float16& operator*=(float16& a, // NOLINT
const float16& b) { // NOLINT
a = a * b;
return a;
}
__host__ __device__ inline float16& operator/=(float16& a,
__host__ __device__ inline float16& operator/=(float16& a, // NOLINT
const float16& b) { // NOLINT
a = a / b;
return a;
......
......@@ -42,7 +42,7 @@ struct ComputationContext {
std::shared_ptr<ComputationContext> CompileProgram(
const Target &target,
Program &program,
Program &program, // NOLINT
const std::vector<Variable> &outputs,
std::shared_ptr<hlir::framework::Scope> scope,
const CinnComputation::CompileOptions &options,
......
......@@ -59,7 +59,7 @@ class CinnComputation {
*/
static std::shared_ptr<CinnComputation> BuildAndCompile(
const Target &target,
NetBuilder &builder,
NetBuilder &builder, // NOLINT
const CompileOptions &options = DefaultCompileOptions(),
const std::vector<Variable> &outputs = {},
void *stream = nullptr);
......@@ -77,7 +77,7 @@ class CinnComputation {
*/
static std::shared_ptr<CinnComputation> Compile(
const Target &target,
Program &program,
Program &program, // NOLINT
const CompileOptions &options = DefaultCompileOptions(),
const std::vector<Variable> &outputs = {},
void *stream = nullptr);
......@@ -130,7 +130,9 @@ class CinnComputation {
* @param data address of the memory buffer to store tensor's data
* @param size size of the memory buffer
*/
void SetTensorData(hlir::framework::Tensor &t, void *data, size_t size);
void SetTensorData(hlir::framework::Tensor &t, // NOLINT
void *data,
size_t size);
/**
* set the data of a tensor (specified by it's name) from user specified
......@@ -148,7 +150,9 @@ class CinnComputation {
* @param data address of the memory buffer to store tensor's data
* @param size size of the memory buffer
*/
void GetTensorData(hlir::framework::Tensor &t, void *data, size_t size);
void GetTensorData(hlir::framework::Tensor &t, // NOLINT
void *data,
size_t size);
/**
* copy the data of a tensor (specified by it's name) to user specified
* buffer. if tensor is in NVGPU device memory, cudaMemcpy is used.
......
......@@ -33,7 +33,7 @@
namespace cinn {
namespace frontend {
int GetSize(std::vector<int>& shape) {
int GetSize(const std::vector<int>& shape) {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
}
......
......@@ -296,12 +296,12 @@ std::tuple<std::unique_ptr<Program>,
absl::flat_hash_map<std::string, Variable>,
absl::flat_hash_map<std::string, std::string>,
absl::flat_hash_set<std::string>>
LoadPaddleProgram(
const std::string& model_dir,
Scope* scope,
std::unordered_map<std::string, std::vector<int>>& input_shape_map,
bool is_combined,
const common::Target& target) {
LoadPaddleProgram(const std::string& model_dir,
Scope* scope,
std::unordered_map<std::string, std::vector<int>>&
input_shape_map, // NOLINT
bool is_combined,
const common::Target& target) {
VLOG(1) << "Loading Paddle model from " << model_dir;
PaddleModelToProgram paddle_to_program(scope, input_shape_map, target);
return std::make_tuple(paddle_to_program(model_dir, is_combined),
......
......@@ -528,12 +528,12 @@ std::tuple<std::unique_ptr<Program>,
absl::flat_hash_map<std::string, Variable>,
absl::flat_hash_map<std::string, std::string>,
absl::flat_hash_set<std::string>>
LoadPaddleProgram(
const std::string& model_dir,
hlir::framework::Scope* scope,
std::unordered_map<std::string, std::vector<int>>& input_shape_map,
bool is_combined,
const common::Target& target = common::DefaultHostTarget());
LoadPaddleProgram(const std::string& model_dir,
hlir::framework::Scope* scope,
std::unordered_map<std::string, std::vector<int>>&
input_shape_map, // NOLINT
bool is_combined,
const common::Target& target = common::DefaultHostTarget());
std::ostream& operator<<(std::ostream& os, const Variable& x);
std::ostream& operator<<(std::ostream& os, const Instruction& instr);
......
......@@ -1643,7 +1643,7 @@ std::shared_ptr<Scope> BuildScope(Target target,
std::vector<ir::LoweredFunc> GetFuncFromImpl(
const std::shared_ptr<OpImpl>& impl,
const common::CINNValuePack& cinn_inputs,
std::vector<ir::Tensor>& all_arg_tensors,
std::vector<ir::Tensor>& all_arg_tensors, // NOLINT
const std::vector<std::string>& input_output_nodes,
const std::string& node_id,
const Target& target) {
......
......@@ -222,7 +222,7 @@ std::shared_ptr<Scope> BuildScope(Target target,
std::vector<ir::LoweredFunc> GetFuncFromImpl(
const std::shared_ptr<OpImpl>& impl,
const common::CINNValuePack& cinn_inputs,
std::vector<ir::Tensor>& tensor_inputs,
std::vector<ir::Tensor>& tensor_inputs, // NOLINT
const std::vector<std::string>& input_output_nodes,
const std::string& node_id,
const Target& target);
......
......@@ -45,7 +45,7 @@ OpLowerer::OpLowerer(
const Target& target)
: type_dict_(type_dict), shape_dict_(shape_dict), target_(target) {}
std::vector<ir::LoweredFunc> OpLowerer::Lower(GroupPtr& group) {
std::vector<ir::LoweredFunc> OpLowerer::Lower(GroupPtr& group) { // NOLINT
VLOG(3) << "Lowering Group : " << group->group_id
<< " , Op Pattern : " << group->op_pattern_kind;
group->input_names.clear();
......
......@@ -52,8 +52,8 @@ class OpLowerer {
OpLowerer(const absl::flat_hash_map<std::string, Type>&,
const absl::flat_hash_map<std::string, shape_t>&,
const Target&);
std::vector<ir::LoweredFunc> Lower(GroupPtr& group);
std::vector<ir::LoweredFunc> LowerWithoutSchedule(GroupPtr& group);
std::vector<ir::LoweredFunc> Lower(GroupPtr& group); // NOLINT
std::vector<ir::LoweredFunc> LowerWithoutSchedule(GroupPtr& group); // NOLINT
private:
std::vector<ir::LoweredFunc> IRLowerOp(IRComputeFunction, GroupPtr&);
......@@ -75,7 +75,7 @@ class OpLowerer {
DEFINE_IR_COMPUTE(OutEWiseFusable);
void IRSchedule(
ir::IRSchedule& ir_sch,
ir::IRSchedule& ir_sch, // NOLINT
const GroupPtr& group,
const std::unordered_map<std::string, ir::Tensor>& tensor_map);
......
......@@ -32,7 +32,7 @@ namespace framework {
using frontend::NetBuilder;
using frontend::RunDecomposer;
void CodeGen(ir::LoweredFunc& func) {
void CodeGen(const ir::LoweredFunc& func) {
#ifdef CINN_WITH_CUDA
auto target = common::DefaultNVGPUTarget();
Module::Builder builder("module_builder", target);
......@@ -56,7 +56,7 @@ void CodeGen(ir::LoweredFunc& func) {
#endif
}
void Compile(NetBuilder& net_builder) {
void Compile(NetBuilder& net_builder) { // NOLINT
auto program = net_builder.Build();
auto target = common::DefaultTarget();
RunDecomposer(&program, target);
......
......@@ -92,8 +92,8 @@ ir::Tensor GetTensor(
std::vector<ir::Tensor> CollectInputTensor(
const Node* node,
std::vector<ir::Tensor>& func_args,
std::unordered_map<std::string, ir::Tensor>& tensor_map,
std::vector<ir::Tensor>& func_args, // NOLINT
std::unordered_map<std::string, ir::Tensor>& tensor_map, // NOLINT
const absl::flat_hash_map<std::string, Type>& type_dict,
const absl::flat_hash_map<std::string, shape_t>& shape_dict) {
std::vector<ir::Tensor> tensors;
......@@ -543,7 +543,7 @@ bool WithoutLastDimInReduce(const std::vector<int>& shape,
}
}
void LoopOrderAssignReduce(ir::IRSchedule& ir_sch,
void LoopOrderAssignReduce(ir::IRSchedule& ir_sch, // NOLINT
const std::string& block_name,
const std::vector<int>& axes,
const common::Target& target,
......@@ -593,7 +593,7 @@ void LoopOrderAssignReduce(ir::IRSchedule& ir_sch,
}
}
void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch,
void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch, // NOLINT
const std::string& block_name,
const std::vector<int>& inshape,
const std::vector<int>& axes,
......@@ -707,7 +707,7 @@ void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch,
ir_sch.Reorder(block_name, new_order);
}
void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch,
void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch, // NOLINT
const std::string& block_name,
const std::vector<int>& inshape,
const std::vector<int>& axes,
......@@ -974,7 +974,7 @@ Node* GetMasterToComputeAt(
}
void LoopAssignReduce(
ir::IRSchedule& ir_sch,
ir::IRSchedule& ir_sch, // NOLINT
const Node* node,
const Node* reducer,
const Target& target,
......@@ -1217,8 +1217,8 @@ class RemoveExpr : public ir::IRMutator<> {
};
void MergeLoops(ir::Expr root,
std::vector<ir::Expr>& src,
std::vector<ir::Expr>& dst,
std::vector<ir::Expr>& src, // NOLINT
std::vector<ir::Expr>& dst, // NOLINT
int index) {
if (index < 0) {
return;
......@@ -1247,7 +1247,7 @@ void MergeLoops(ir::Expr root,
}
void InsertSyncThread(
ir::IRSchedule& ir_sch,
ir::IRSchedule& ir_sch, // NOLINT
const Node* node,
const absl::flat_hash_map<std::string, shape_t>& shape_dict,
const std::unordered_map<std::string, ir::Tensor>& tensor_map) {
......@@ -1318,7 +1318,7 @@ class InsertExpr : public ir::IRMutator<> {
};
void MergeReduceToReduce(
ir::IRSchedule& ir_sch,
ir::IRSchedule& ir_sch, // NOLINT
const Node* node,
const Node* master,
const absl::flat_hash_map<std::string, shape_t>& shape_dict,
......@@ -1506,7 +1506,7 @@ void MergeReduceToReduce(
}
void MergeReduceLoop(
ir::IRSchedule& ir_sch,
ir::IRSchedule& ir_sch, // NOLINT
Node* node,
const Node* master,
const absl::flat_hash_map<std::string, shape_t>& shape_dict,
......@@ -1611,7 +1611,7 @@ class FindExprInBlock : public ir::IRMutator<> {
};
void LoopComputeAt(
ir::IRSchedule& ir_sch,
ir::IRSchedule& ir_sch, // NOLINT
Node* node,
const Node* master,
const GroupPtr& group,
......@@ -1712,7 +1712,7 @@ std::unordered_set<Node*> GetMasters(
}
void SyncThreadWithShared(
ir::IRSchedule& ir_sch,
ir::IRSchedule& ir_sch, // NOLINT
const GroupPtr& group,
const std::unordered_set<Node*>& nodes_inline,
const std::unordered_set<Node*>& nodes_set,
......
......@@ -31,8 +31,8 @@ ir::Tensor GetTensor(
std::vector<ir::Tensor> CollectInputTensor(
const Node* node,
std::vector<ir::Tensor>& func_args,
std::unordered_map<std::string, ir::Tensor>& tensor_map,
std::vector<ir::Tensor>& func_args, // NOLINT
std::unordered_map<std::string, ir::Tensor>& tensor_map, // NOLINT
const absl::flat_hash_map<std::string, Type>& type_dict,
const absl::flat_hash_map<std::string, shape_t>& shape_dict);
......@@ -87,7 +87,7 @@ std::unordered_set<Node*> GetMasters(
const std::unordered_set<Node*>& nodes_set);
void LoopAssignReduce(
ir::IRSchedule& ir_sch,
ir::IRSchedule& ir_sch, // NOLINT
const Node* node,
const Node* reducer,
const Target& target,
......@@ -95,7 +95,7 @@ void LoopAssignReduce(
const absl::flat_hash_map<std::string, shape_t>& shape_dict);
void LoopComputeAt(
ir::IRSchedule& ir_sch,
ir::IRSchedule& ir_sch, // NOLINT
Node* node,
const Node* master,
const GroupPtr& group,
......@@ -103,7 +103,7 @@ void LoopComputeAt(
const std::unordered_map<std::string, ir::Tensor>& tensor_map);
void SyncThreadWithShared(
ir::IRSchedule& ir_sch,
ir::IRSchedule& ir_sch, // NOLINT
const GroupPtr& group,
const std::unordered_set<Node*>& nodes_inline,
const std::unordered_set<Node*>& nodes_set,
......
......@@ -36,8 +36,8 @@ class ParallelCompiler {
};
public:
explicit ParallelCompiler(std::shared_ptr<Scope>& scope,
std::shared_ptr<Graph>& graph,
explicit ParallelCompiler(std::shared_ptr<Scope>& scope, // NOLINT
std::shared_ptr<Graph>& graph, // NOLINT
const CompileOptions& option,
const common::Target& target)
: scope_(scope), graph_(graph), option_(option), target_(target) {}
......@@ -53,8 +53,8 @@ class ParallelCompiler {
struct Task {
public:
Task(ParallelCompiler* p,
std::shared_ptr<Scope>& s,
std::shared_ptr<Graph>& g,
std::shared_ptr<Scope>& s, // NOLINT
std::shared_ptr<Graph>& g, // NOLINT
const CompileOptions& cp,
const Target& t)
: compiler(p), scope(s), graph(g), options(cp), target(t) {}
......
......@@ -258,7 +258,7 @@ TEST(Operator, Operator_BroadcastTo) {
common::CINNValuePack GetComputeResult(
const std::shared_ptr<OpImpl> &impl,
std::vector<common::CINNValue> &cinn_inputs,
std::vector<common::CINNValue> &cinn_inputs, // NOLINT
const std::string &output_name = "") {
if (FLAGS_cinn_ir_schedule) {
cinn_inputs.emplace_back(output_name);
......
......@@ -44,7 +44,7 @@ Module LowerToModule(const std::string test_name,
const std::shared_ptr<OpImpl> &impl,
std::vector<std::string> input_names,
const std::string &output_name,
std::vector<ir::Tensor> &inputs,
std::vector<ir::Tensor> &inputs, // NOLINT
std::vector<common::CINNValue> cinn_inputs,
const Target &target) {
Module::Builder builder("module", target);
......
......@@ -67,7 +67,9 @@ std::unordered_map<std::string, int> special_attrs = {
{"axes", 2},
{"perm", 2}};
bool IsSameSubexpression(Node* op1, Node* op2, shape_dict_t& shape_dict) {
bool IsSameSubexpression(Node* op1,
Node* op2,
shape_dict_t& shape_dict) { // NOLINT
// Get the input edges for op1 and op2 in order.
auto op1_in_edges = op1->inlinks_in_order();
auto op2_in_edges = op2->inlinks_in_order();
......@@ -201,13 +203,14 @@ void RemoveNodes(framework::Graph* graph, GraphNode* node) {
graph->DropNode(node);
}
void RemoveNodes(framework::Graph* graph, std::vector<Node*>& nodes) {
void RemoveNodes(framework::Graph* graph, const std::vector<Node*>& nodes) {
for (auto* node : nodes) {
RemoveNodes(graph, node);
}
}
void RemoveNodes(framework::Graph* graph, std::vector<NodeData*>& nodes_data) {
void RemoveNodes(framework::Graph* graph,
const std::vector<NodeData*>& nodes_data) {
for (auto* data : nodes_data) {
if (std::find(graph->outputs.begin(), graph->outputs.end(), data) !=
graph->outputs.end()) {
......
......@@ -19,7 +19,7 @@
namespace cinn {
namespace frontend {
int GetSize(std::vector<int>& shape) {
int GetSize(const std::vector<int>& shape) {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
}
......@@ -36,7 +36,7 @@ std::unordered_map<std::string, std::vector<float>> GetInputRandom(
}
std::unordered_map<std::string, std::vector<float>> RunModelTest(
Program& program,
Program& program, // NOLINT
const std::vector<std::string>&& passes,
const std::unordered_map<std::string, std::vector<float>>& input_data,
const std::unordered_set<std::string>& fetch_ids) {
......
......@@ -19,11 +19,11 @@
namespace cinn {
namespace frontend {
int GetSize(std::vector<int>& shape) {
int GetSize(const std::vector<int>& shape) {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
}
void RunModelTest(Program& program,
void RunModelTest(Program& program, // NOLINT
const std::vector<Variable>&& inputs,
const std::unordered_set<std::string>& fetch_ids) {
// init input data.
......
......@@ -130,7 +130,7 @@ class DotBuilder {
const shape_dict_t& shape_dict() const { return shape_dict_; }
// Currently the constructor of `NodeData` needs to pass in `Shared<Node>`.
NodeData* Var(common::Shared<Node>& producer) {
NodeData* Var(common::Shared<Node>& producer) { // NOLINT
auto* res = new NodeData(producer, 0, 0, node_name("var"), false);
graph_->RegisterNode(producer->id(), res);
graph_->RegisterNode(res->id(), producer.get());
......
......@@ -19,11 +19,11 @@
namespace cinn {
namespace frontend {
int GetSize(std::vector<int>& shape) {
int GetSize(const std::vector<int>& shape) {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
}
void RunModelTest(Program& program,
void RunModelTest(Program& program, // NOLINT
const std::vector<Variable>&& inputs,
const std::unordered_set<std::string>& fetch_ids) {
// init input data.
......
......@@ -38,8 +38,8 @@ using dtype_dict_t = absl::flat_hash_map<std::string, common::Type>;
using shape_dict_t = absl::flat_hash_map<std::string, framework::shape_t>;
void InferShape(Node* node,
dtype_dict_t& dtype_dict,
shape_dict_t& shape_dict) {
dtype_dict_t& dtype_dict, // NOLINT
shape_dict_t& shape_dict) { // NOLINT
VLOG(3) << "Begin InferShape of node " << node->id();
auto op_infershape = Operator::GetAttrs<infershape_t>("infershape");
auto op_inferdtype = Operator::GetAttrs<inferdtype_t>("inferdtype");
......
......@@ -24,8 +24,9 @@ namespace pass {
void InferShape(
framework::Node* node,
absl::flat_hash_map<std::string, common::Type>& dtype_dict,
absl::flat_hash_map<std::string, framework::shape_t>& shape_dict);
absl::flat_hash_map<std::string, common::Type>& dtype_dict, // NOLINT
absl::flat_hash_map<std::string, framework::shape_t>&
shape_dict); // NOLINT
} // namespace pass
} // namespace hlir
......
......@@ -20,7 +20,7 @@ namespace cinn {
namespace frontend {
std::unordered_map<std::string, std::vector<float>> RunModelTest(
Program& program,
Program& program, // NOLINT
const std::vector<std::string>&& passes,
const std::unordered_map<std::string, std::vector<float>>& input_data,
const std::unordered_set<std::string>& fetch_ids) {
......
......@@ -39,7 +39,7 @@ namespace cinn {
namespace hlir {
namespace pe {
void IRElementwiseSchedule(ir::IRSchedule &ir_sch,
void IRElementwiseSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape,
const common::Target &target) {
VLOG(3) << "Before IRElementwiseSchedule, new ir is : "
......@@ -67,7 +67,7 @@ void IRElementwiseSchedule(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRInjectiveSchedule(ir::IRSchedule &ir_sch,
void IRInjectiveSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape,
const common::Target &target) {
VLOG(3) << "Before IRInjectiveSchedule, new ir is : "
......@@ -95,7 +95,7 @@ void IRInjectiveSchedule(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch,
void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape,
const common::Target &target,
bool vectorizable) {
......@@ -132,7 +132,7 @@ void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRCudaScheduleInjective(ir::IRSchedule &ir_sch,
void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape,
const common::Target &target) {
VLOG(3) << "Begin IRCudaScheduleInjective ";
......@@ -208,7 +208,7 @@ std::vector<common::CINNValue> IRCudaScheduleMatMul(
return {common::CINNValue(ir_sch.GetModule().GetExprs().at(0))};
}
void IRCudaScheduleMul(ir::IRSchedule &ir_sch,
void IRCudaScheduleMul(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape,
const common::Target &target) {
auto all_blocks = ir_sch.GetAllBlocks();
......@@ -221,7 +221,7 @@ void IRCudaScheduleMul(ir::IRSchedule &ir_sch,
ir_sch.Bind(loops[1], "threadIdx.x");
}
void IRMulScheduleCPU(ir::IRSchedule &ir_sch,
void IRMulScheduleCPU(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &reduce_first_shape,
const common::Target &target) {
ir_sch.MergeExprs();
......@@ -238,7 +238,7 @@ void IRMulScheduleCPU(ir::IRSchedule &ir_sch,
}
}
void IRCudaSplitSchedule(ir::IRSchedule &ir_sch,
void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<std::vector<int>> &output_shapes,
int axis,
const common::Target &target) {
......@@ -334,7 +334,7 @@ void IRCudaSplitSchedule(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRCudaScheduleReduce(ir::IRSchedule &ir_sch,
void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor output,
int last_dimension_num,
const common::Target &target) {
......@@ -390,7 +390,7 @@ void IRCudaScheduleReduce(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch,
void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor tmp_out,
ir::Tensor out,
const common::Target &target) {
......@@ -478,7 +478,7 @@ void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch,
void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reduce_tmp_out,
ir::Tensor tmp_out,
ir::Tensor out,
......@@ -621,7 +621,7 @@ void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch,
void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reshape,
ir::Tensor internal,
ir::Tensor reduce_out,
......@@ -880,7 +880,7 @@ void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch,
void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reshape,
ir::Tensor internal,
ir::Tensor tmp_out,
......@@ -991,7 +991,7 @@ void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch,
// ir_sch.GetLoops(out->name)[0]);
}
void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) {
void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) { // NOLINT
ir_sch.MergeExprs();
auto all_blocks = ir_sch.GetAllBlocks();
CHECK_EQ(all_blocks.size(), 3U);
......@@ -1010,7 +1010,7 @@ void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) {
ir_sch.ComputeAt(all_blocks[1], loops[0]);
}
void IRPoolScheduleGPU(ir::IRSchedule &ir_sch,
void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target,
int arg_pack_size) {
VLOG(3) << "Before IRPoolScheduleGPU: "
......@@ -1028,7 +1028,7 @@ void IRPoolScheduleGPU(ir::IRSchedule &ir_sch,
VLOG(3) << "End IRPoolScheduleGPU: " << ir_sch.GetModule().GetExprs().at(0);
}
void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch,
void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target) {
VLOG(3) << "Before IRGlobalPoolScheduleGPU: "
<< ir_sch.GetModule().GetExprs().at(0);
......@@ -1071,7 +1071,7 @@ void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch,
void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<ir::Expr> &tensors) {
if (tensors.size() == 3U) {
CHECK(tensors[1].as_tensor());
......@@ -1097,7 +1097,8 @@ void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRCudaScheduleConv(ir::IRSchedule &ir_sch, const common::Target &target) {
void IRCudaScheduleConv(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target) {
VLOG(3) << "Begin IRCudaScheduleConv with expr: "
<< ir_sch.GetModule().GetExprs().at(0);
auto &res = ScheduleParam::get_cuda_instance().GetParam();
......@@ -1238,10 +1239,10 @@ void IRCudaScheduleConv(ir::IRSchedule &ir_sch, const common::Target &target) {
<< ir_sch.GetModule().GetExprs().at(0);
}
void IRCudaScheduleConv2(ir::IRSchedule &ir_sch,
ir::Tensor &input_pad,
ir::Tensor &weights,
ir::Tensor &output,
void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, // NOLINT
ir::Tensor &output, // NOLINT
const common::Target &target,
const std::string &key) {
auto &res = ScheduleParam::get_cuda_instance().GetParam();
......
......@@ -31,20 +31,20 @@ namespace cinn {
namespace hlir {
namespace pe {
void IRElementwiseSchedule(ir::IRSchedule &ir_sch,
void IRElementwiseSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape,
const common::Target &target);
void IRInjectiveSchedule(ir::IRSchedule &ir_sch,
void IRInjectiveSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape,
const common::Target &target);
void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch,
void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape,
const common::Target &target,
bool vectorizable = true);
void IRCudaScheduleInjective(ir::IRSchedule &ir_sch,
void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape,
const common::Target &target);
......@@ -53,68 +53,69 @@ std::vector<common::CINNValue> IRCudaScheduleMatMul(
const std::vector<int> &output_shape,
const common::Target &target);
void IRCudaScheduleMul(ir::IRSchedule &ir_sch,
void IRCudaScheduleMul(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape,
const common::Target &target);
void IRMulScheduleCPU(ir::IRSchedule &ir_sch,
void IRMulScheduleCPU(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &reduce_first_shape,
const common::Target &target);
void IRCudaSplitSchedule(ir::IRSchedule &ir_sch,
void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<std::vector<int>> &output_shapes,
int axis,
const common::Target &target);
void IRCudaScheduleReduce(ir::IRSchedule &ir_sch,
void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor out,
int last_dimension_num,
const common::Target &target);
void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch,
void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reduce_tmp_out,
ir::Tensor tmp_out,
ir::Tensor out,
const common::Target &target);
void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch,
void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor tmp_out,
ir::Tensor out,
const common::Target &target);
void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch,
void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reshape,
ir::Tensor internal,
ir::Tensor out,
const common::Target &target);
void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch,
void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reshape,
ir::Tensor internal,
ir::Tensor tmp_out,
ir::Tensor out,
const common::Target &target);
void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis = -1);
void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis = -1); // NOLINT
void IRPoolScheduleGPU(ir::IRSchedule &ir_sch,
void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target,
int arg_pack_size = 3);
void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch,
void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<ir::Expr> &tensors);
void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch,
void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target);
void IRCudaScheduleConv2(ir::IRSchedule &ir_sch,
ir::Tensor &input_pad,
ir::Tensor &weights,
ir::Tensor &output,
void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, // NOLINT
ir::Tensor &output, // NOLINT
const common::Target &target,
const std::string &key);
void IRCudaScheduleConv(ir::IRSchedule &ir_sch, const common::Target &target);
void IRCudaScheduleConv(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target);
} // namespace pe
} // namespace hlir
......
......@@ -456,8 +456,8 @@ int GetTailSize(const std::vector<int>& inshape, const std::vector<int>& axes) {
std::vector<int> GetFirstStepReduceShape(const std::vector<int>& shape,
const std::vector<int>& axes,
bool& inbound,
int& tail) {
bool& inbound, // NOLINT
int& tail) { // NOLINT
// post parallel size
int post_parallel_size = GetPostParallelSize(shape, axes);
// the size to unfold las reduce axis
......
......@@ -41,8 +41,8 @@ std::vector<ir::Tensor> winograd_transform_matrices(const int& tile_size,
std::vector<int> GetFirstStepReduceShape(const std::vector<int>& shape,
const std::vector<int>& axes,
bool& inbound,
int& tail);
bool& inbound, // NOLINT
int& tail); // NOLINT
} // namespace pe
} // namespace hlir
......
......@@ -624,7 +624,7 @@ void PoolScheduleCPU(poly::StageMap stages,
}
void PoolScheduleGPU(poly::StageMap stages,
ir::Tensor &output,
const ir::Tensor &output,
const common::Target &target) {
CHECK_GE(stages[output]->axis_names().size(), 4);
stages[output]->Fuse({0, 1, 2, 3});
......@@ -866,7 +866,7 @@ void CreateX86SerialData(const std::string &file_name) {
void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages,
const ir::Tensor &res,
ir::Tensor &packed_out,
ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation,
const ir::Tensor &data,
......@@ -1017,7 +1017,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages,
void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages,
const ir::Tensor &res,
ir::Tensor &packed_out,
ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation,
const ir::Tensor &data,
......@@ -1139,7 +1139,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages,
void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages,
const ir::Tensor &res,
ir::Tensor &packed_out,
ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation,
const ir::Tensor &data,
......@@ -1244,7 +1244,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages,
void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages,
const ir::Tensor &res,
ir::Tensor &packed_out,
ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation,
const ir::Tensor &data,
......@@ -1377,7 +1377,7 @@ void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages,
void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse(
poly::StageMap stages,
const ir::Tensor &res,
ir::Tensor &packed_out,
ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation,
const ir::Tensor &data,
......@@ -2300,7 +2300,7 @@ void SaveSerialData(
}
void CudaScheduleDepthwiseConv(poly::StageMap stages,
ir::Tensor &output,
ir::Tensor &output, // NOLINT
const common::Target &target) {
auto OL = stages[output]->CacheWrite("local", stages, output);
stages[output]->Bind(0, "blockIdx.x");
......@@ -2313,9 +2313,9 @@ void CudaScheduleDepthwiseConv(poly::StageMap stages,
}
void CudaScheduleConv(poly::StageMap stages,
ir::Tensor &input_pad,
ir::Tensor &weights,
ir::Tensor &output,
ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, // NOLINT
ir::Tensor &output, // NOLINT
const common::Target &target) {
auto &res = ScheduleParam::get_cuda_instance().GetParam();
int n = output->shape[0].as_int32();
......@@ -2382,9 +2382,9 @@ void CudaScheduleConv(poly::StageMap stages,
}
void CudaScheduleConv2(poly::StageMap stages,
ir::Tensor &input_pad,
ir::Tensor &weights,
ir::Tensor &output,
ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, // NOLINT
ir::Tensor &output, // NOLINT
const common::Target &target,
const std::string &key) {
auto &res = ScheduleParam::get_cuda_instance().GetParam();
......@@ -2516,7 +2516,7 @@ void CudaScheduleConv2(poly::StageMap stages,
}
void CudaScheduleWinogradConv(poly::StageMap wino_stages,
std::vector<ir::Tensor> &all_tensors,
std::vector<ir::Tensor> &all_tensors, // NOLINT
const common::Target &target) {
auto &res = ScheduleParam::get_cuda_instance().GetParam();
auto &wino_weights_dilation = all_tensors[0];
......
......@@ -124,7 +124,7 @@ void GetConv2d1x1Factors(absl::flat_hash_map<std::string, int> *factors,
void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages,
const ir::Tensor &res,
ir::Tensor &packed_out,
ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation,
const ir::Tensor &data,
......@@ -138,12 +138,12 @@ void PoolScheduleCPU(poly::StageMap stages,
const ir::Tensor &output,
const common::Target &target);
void PoolScheduleGPU(poly::StageMap stages,
ir::Tensor &output,
const ir::Tensor &output,
const common::Target &target);
void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages,
const ir::Tensor &res,
ir::Tensor &packed_out,
ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation,
const ir::Tensor &data,
......@@ -151,7 +151,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages,
void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages,
const ir::Tensor &res,
ir::Tensor &packed_out,
ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation,
const ir::Tensor &data,
......@@ -161,7 +161,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages,
void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages,
const ir::Tensor &res,
ir::Tensor &packed_out,
ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation,
const ir::Tensor &data,
......@@ -170,7 +170,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages,
void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse(
poly::StageMap stages,
const ir::Tensor &res,
ir::Tensor &packed_out,
ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation,
const ir::Tensor &data,
......@@ -218,23 +218,23 @@ void CudaTwoStepReduceSchedule(poly::StageMap stages,
const common::Target &target);
void CudaScheduleDepthwiseConv(poly::StageMap stages,
ir::Tensor &output,
ir::Tensor &output, // NOLINT
const common::Target &target);
void CudaScheduleConv(poly::StageMap stages,
ir::Tensor &input_pad,
ir::Tensor &weights,
ir::Tensor &output,
ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, // NOLINT
ir::Tensor &output, // NOLINT
const common::Target &target);
void CudaScheduleWinogradConv(poly::StageMap wino_stages,
std::vector<ir::Tensor> &all_tensors,
std::vector<ir::Tensor> &all_tensors, // NOLINT
const common::Target &target);
void CudaScheduleConv2(poly::StageMap stages,
ir::Tensor &input_pad,
ir::Tensor &weights,
ir::Tensor &output,
ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, // NOLINT
ir::Tensor &output, // NOLINT
const common::Target &target,
const std::string &key);
......
......@@ -96,7 +96,7 @@ class ScheduleImpl {
int write_buffer_index,
const std::string& memory_type);
void SyncThreads(const Expr& ir_node, bool after_node = true);
void SetBuffer(Expr& block,
void SetBuffer(Expr& block, // NOLINT
const std::string& memory_type,
bool fixed = false);
Expr Reorder(const std::vector<Expr>& loops);
......@@ -114,7 +114,7 @@ class ScheduleImpl {
Expr Rfactor(const Expr& rf_loop, int rf_axis);
Expr AddUnitLoop(const Expr& block) const;
void Annotate(const Expr& block, const std::string& key, const attr_t& value);
void Unannotate(Expr& block, const std::string& key);
void Unannotate(Expr& block, const std::string& key); // NOLINT
void FlattenLoops(const std::vector<Expr>& loops,
const bool force_flat = false);
void CopyTransformAndLoopInfo(const Expr& block, const Expr& block_target);
......
......@@ -247,9 +247,9 @@ class IRSchedule {
* \param memory_type The memory type we want to set. Should be "local",
* "shared" or "global".
*/
void SetBuffer(Expr& block,
void SetBuffer(Expr& block, // NOLINT
const std::string& memory_type,
bool fixed = false);
bool fixed = false); // NOLINT
/**
* \brief Reorder the loops in the order of vector.
......@@ -391,7 +391,7 @@ class IRSchedule {
* \param block The block to be unannotated
* \param key The attribute key
*/
void Unannotate(Expr& block, const std::string& key);
void Unannotate(Expr& block, const std::string& key); // NOLINT
/*!
* \brief flatten the loops in one dim.
......@@ -620,7 +620,7 @@ class LeafBlockRemovalPlan : public ir::IRMutator<> {
class ComputeInlineChecker : public ir::IRMutator<> {
public:
ComputeInlineChecker(IRSchedule& schedule, Expr& block)
ComputeInlineChecker(IRSchedule& schedule, Expr& block) // NOLINT
: ir_schedule_(schedule), block_(block) {}
bool Check();
......
......@@ -503,7 +503,7 @@ Expr MakeCacheBlock(const std::vector<IterRange>& buffer_ranges,
return block;
}
void FindInsertionPoint(Expr& root, CacheBlockInfo* info, bool is_write) {
void FindInsertionPoint(const Expr& root, CacheBlockInfo* info, bool is_write) {
Expr find_tensor =
is_write ? Expr(info->write_tensor) : Expr(info->read_tensor);
auto find_produce_read =
......@@ -651,7 +651,7 @@ Expr ConstructOtherStmtChain(const std::vector<Expr>& stmts,
Expr ConstructNewLoopChain(const std::vector<Expr>& chain,
const std::vector<Expr>& ordered_loops,
const std::set<Expr, CompExpr>& loop_set,
std::vector<Expr>& if_nodes) {
std::vector<Expr>& if_nodes) { // NOLINT
std::vector<std::set<std::string>> condition_vars;
// In each IfThenElse node, find the vars its condition depends on.
for (auto& if_expr : if_nodes) {
......@@ -923,7 +923,7 @@ void CheckComputeAtValidation(const Expr& block,
CHECK(find_block_in_loop.empty()) << "loop should not be block's ancestor!";
}
void InsertBlock(Expr& for_loop, const Expr& insertion, int index) {
void InsertBlock(Expr& for_loop, const Expr& insertion, int index) { // NOLINT
CHECK(for_loop.As<ir::For>());
CHECK(for_loop.As<ir::For>()->body.As<Block>());
ir::Block* dst_block = for_loop.As<ir::For>()->body.As<Block>();
......
......@@ -326,7 +326,7 @@ Expr MakeCacheBlock(const std::vector<IterRange>& buffer_ranges,
* @param info The information of cache block.
* @param is_write Are we inserting a write cache tensor or a read cache tensor.
*/
void FindInsertionPoint(Expr& root, CacheBlockInfo* info, bool is_write);
void FindInsertionPoint(const Expr& root, CacheBlockInfo* info, bool is_write);
/**
* \brief Given a vector of For loops, return a set of them.
......@@ -359,7 +359,7 @@ std::vector<Expr> GetLoopsInRange(const Expr& top, const Expr& bottom);
Expr ConstructNewLoopChain(const std::vector<Expr>& chain,
const std::vector<Expr>& ordered_loops,
const std::set<Expr, CompExpr>& loop_set,
std::vector<Expr>& if_nodes);
std::vector<Expr>& if_nodes); // NOLINT
/*!
* \brief Find producers of block in root.
......@@ -395,7 +395,9 @@ void CheckComputeAtValidation(const Expr& block,
* - `index = -1` means inserted into the tail
* - otherwise, it should be a index between [0, stmts size)
*/
void InsertBlock(Expr& for_loop, const Expr& insertion, int index = 0);
void InsertBlock(Expr& for_loop, // NOLINT
const Expr& insertion,
int index = 0); // NOLINT
/*!
* \brief Make a union of two range. The detailed function is :
......
......@@ -49,7 +49,7 @@ void CheckNoIslCallRemains(Expr* expr) {
}
}
void BindBuffer(StageMap& stages) {
void BindBuffer(StageMap& stages) { // NOLINT
absl::flat_hash_map<std::string, ir::_Tensor_*> tensor_map;
for (auto& stage : stages) {
tensor_map[stage.second->tensor()->name] = stage.second->tensor();
......@@ -71,13 +71,13 @@ void BindBuffer(StageMap& stages) {
}
}
Expr LowerGroup(
const poly::ScheduleGroup& group,
const std::map<std::string, Expr>& tuple_to_expr,
std::map<std::string, ir::Tensor>* global_tensor_map,
std::unordered_map<std::string, std::vector<Expr>>& resized_buffer_cache,
StageMap stage_map,
ir::CudaAxisInfo* cuda_axis_info) {
Expr LowerGroup(const poly::ScheduleGroup& group,
const std::map<std::string, Expr>& tuple_to_expr,
std::map<std::string, ir::Tensor>* global_tensor_map,
std::unordered_map<std::string, std::vector<Expr>>&
resized_buffer_cache, // NOLINT
StageMap stage_map,
ir::CudaAxisInfo* cuda_axis_info) {
BindBuffer(stage_map);
std::vector<poly::Stage*> stages;
for (auto& node : group.nodes) {
......
......@@ -69,7 +69,7 @@ void CheckNoIslCallRemains(const Expr* expr);
Expr LowerGroup(const poly::ScheduleGroup& group,
const std::map<std::string, Expr>& tuple_to_expr,
std::map<std::string, Tensor>* global_tensor_map,
std::unordered_set<std::string>& resized_buffer,
std::unordered_set<std::string>& resized_buffer, // NOLINT
StageMap stage_map,
ir::CudaAxisInfo* cuda_axis_info = nullptr);
......
......@@ -55,7 +55,8 @@ void PartialSimplify(
//! Simplify the expression but Load.
struct SimplifyButStoreLoadMutator : public ir::IRMutator<ir::Expr*> {
common::cas_intervals_t& var_intervals;
explicit SimplifyButStoreLoadMutator(common::cas_intervals_t& var_intervals)
explicit SimplifyButStoreLoadMutator(
common::cas_intervals_t& var_intervals) // NOLINT
: var_intervals(var_intervals) {}
void operator()(Expr* x) { ir::IRMutator<ir::Expr*>::Visit(x, x); }
......
......@@ -177,7 +177,7 @@ bool IsBetween(const common::GraphNode* x,
return false;
}
std::vector<Group> TopoSortGroups(std::vector<Group>& groups) {
std::vector<Group> TopoSortGroups(std::vector<Group>& groups) { // NOLINT
// collect indegree.
absl::flat_hash_map<Group*, int> group_indegree;
std::vector<Group*> start_groups;
......
......@@ -40,7 +40,7 @@
namespace cinn {
namespace poly {
void RemoveDuplicate(std::vector<std::vector<Expr>> &indices) {
void RemoveDuplicate(std::vector<std::vector<Expr>> &indices) { // NOLINT
std::set<std::string> temp;
for (int i = 0; i < indices.size(); i++) {
std::string index_str = "";
......@@ -309,7 +309,7 @@ int Minus(const Expr &a, const Expr &b) {
}
// Return the range = max - min among all indices[i][axis](i = 0,1,2,...)
int GetRange(std::vector<std::vector<Expr>> &indices, int axis) {
int GetRange(std::vector<std::vector<Expr>> &indices, int axis) { // NOLINT
Expr max_expr = indices[0][axis];
Expr min_expr = indices[0][axis];
for (auto i = 1; i < indices.size(); i++) {
......@@ -1420,7 +1420,7 @@ struct CacheReplaceMutator : public ir::IRMutator<> {
};
} // namespace
void CacheReadWriteReplace(std::vector<ir::Tensor> &readers,
void CacheReadWriteReplace(const std::vector<ir::Tensor> &readers,
ir::Tensor cache_tensor,
std::string origin_tensor_name) {
for (auto k : readers) {
......
......@@ -270,7 +270,7 @@ class Stage : public Object {
* @param readers the readers of the \p tensor
*/
ir::Tensor CacheRead(const std::string& memory_type,
std::vector<ir::Tensor>& readers,
std::vector<ir::Tensor>& readers, // NOLINT
poly::StageMap stages);
/**
......@@ -303,7 +303,8 @@ class Stage : public Object {
void ShowISL() const;
void AddForLoopInTransform(std::vector<std::vector<Expr>>& indices);
void AddForLoopInTransform(
std::vector<std::vector<Expr>>& indices); // NOLINT
/**
* Create a cache for write to the original tensor.
* @param tensor the tensor to create the cache for.
......@@ -312,7 +313,7 @@ class Stage : public Object {
*/
ir::Tensor CacheWrite(const std::string& memory_type,
poly::StageMap stages,
ir::Tensor& key_tensor);
ir::Tensor& key_tensor); // NOLINT
/**
* Generate the `syncthreads()` code to sync all threads on CUDA backends.
......
......@@ -236,12 +236,12 @@ inline auto __binary_op_fn_dispatch(T1 x, T2 y, F fn, std::false_type) {
template <typename T1, typename T2, typename F>
inline void __binary_op_visitor_dispatch(
CINNValue &v, T1 lhs, T2 rhs, F fn, std::true_type) {
CINNValue &v, T1 lhs, T2 rhs, F fn, std::true_type) { // NOLINT
v = CINNValue();
}
template <typename T1, typename T2, typename F>
inline void __binary_op_visitor_dispatch(
CINNValue &v, T1 lhs, T2 rhs, F fn, std::false_type) {
CINNValue &v, T1 lhs, T2 rhs, F fn, std::false_type) { // NOLINT
v.Set(fn(lhs, rhs));
}
......
......@@ -500,25 +500,25 @@ __host__ __device__ inline float16 operator-(const float16& a) {
#endif
}
__host__ __device__ inline float16& operator+=(float16& a,
__host__ __device__ inline float16& operator+=(float16& a, // NOLINT
const float16& b) { // NOLINT
a = a + b;
return a;
}
__host__ __device__ inline float16& operator-=(float16& a,
__host__ __device__ inline float16& operator-=(float16& a, // NOLINT
const float16& b) { // NOLINT
a = a - b;
return a;
}
__host__ __device__ inline float16& operator*=(float16& a,
__host__ __device__ inline float16& operator*=(float16& a, // NOLINT
const float16& b) { // NOLINT
a = a * b;
return a;
}
__host__ __device__ inline float16& operator/=(float16& a,
__host__ __device__ inline float16& operator/=(float16& a, // NOLINT
const float16& b) { // NOLINT
a = a / b;
return a;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册