未验证 提交 3e5018df 编写于 作者: W Wang Xin 提交者: GitHub

[CodeStyle][CINN] fix cpplint codestyle for [runtime/references] (#55068)

上级 13c2342a
...@@ -145,7 +145,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) { ...@@ -145,7 +145,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize) {
ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
const ir::LoweredFunc& old_func, const ir::LoweredFunc& old_func,
ir::Expr& body) { ir::Expr& body) { // NOLINT
ir::ModuleExpr mod_expr(std::vector<ir::Expr>({body})); ir::ModuleExpr mod_expr(std::vector<ir::Expr>({body}));
ir::IRSchedule ir_sch(mod_expr); ir::IRSchedule ir_sch(mod_expr);
......
...@@ -46,7 +46,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize); ...@@ -46,7 +46,7 @@ bool NeedsMultiLevelTiling(const ir::ScheduleBlockRealize& sche_block_realize);
*/ */
ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target, ir::LoweredFunc UpdateFuncWithNewBody(const common::Target& target,
const ir::LoweredFunc& old_func, const ir::LoweredFunc& old_func,
ir::Expr& body); ir::Expr& body); // NOLINT
} // namespace auto_schedule } // namespace auto_schedule
} // namespace cinn } // namespace cinn
...@@ -64,7 +64,7 @@ class AutoInline : public AutoGenRule { ...@@ -64,7 +64,7 @@ class AutoInline : public AutoGenRule {
const std::string& block_name) override; const std::string& block_name) override;
private: private:
void Apply(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); void Apply(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); // NOLINT
private: private:
std::vector<ir::Expr> all_block_realizes_; std::vector<ir::Expr> all_block_realizes_;
......
...@@ -120,9 +120,12 @@ class MultiLevelTiling : public AutoGenRule { ...@@ -120,9 +120,12 @@ class MultiLevelTiling : public AutoGenRule {
} }
private: private:
void ApplyTiling(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); void ApplyTiling(ir::IRSchedule* ir_schedule,
void ApplyCacheRead(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); ir::Expr& block_expr); // NOLINT
void ApplyCacheWrite(ir::IRSchedule* ir_schedule, ir::Expr& block_expr); void ApplyCacheRead(ir::IRSchedule* ir_schedule,
ir::Expr& block_expr); // NOLINT
void ApplyCacheWrite(ir::IRSchedule* ir_schedule,
ir::Expr& block_expr); // NOLINT
private: private:
std::vector<ir::Expr> all_block_realizes_; std::vector<ir::Expr> all_block_realizes_;
......
...@@ -51,7 +51,7 @@ using cinn::hlir::op::ExternalApiRegistry; ...@@ -51,7 +51,7 @@ using cinn::hlir::op::ExternalApiRegistry;
// *** update a scheduled function with several post-processors // *** update a scheduled function with several post-processors
ir::LoweredFunc FuncWithUpdatedBody(const common::Target& target, ir::LoweredFunc FuncWithUpdatedBody(const common::Target& target,
const ir::LoweredFunc& old_func, const ir::LoweredFunc& old_func,
ir::Expr& body); ir::Expr& body); // NOLINT
// check whether a scheduled lowered function is valid // check whether a scheduled lowered function is valid
bool PruneInvalid(const ir::LoweredFunc& lowered_func, bool PruneInvalid(const ir::LoweredFunc& lowered_func,
const common::Target& target); const common::Target& target);
......
...@@ -1038,13 +1038,13 @@ llvm::Value *CodeGenLLVM::Visit(const ir::_Tensor_ *op) { ...@@ -1038,13 +1038,13 @@ llvm::Value *CodeGenLLVM::Visit(const ir::_Tensor_ *op) {
template <typename T, template <typename T,
std::enable_if_t<std::is_same<const ir::Expr &, T>::value, int> = 0> std::enable_if_t<std::is_same<const ir::Expr &, T>::value, int> = 0>
void appendBody(std::vector<Expr> &new_body, T &&v) { void appendBody(std::vector<Expr> &new_body, T &&v) { // NOLINT
new_body.push_back(v); new_body.push_back(v);
} }
template <typename T, template <typename T,
std::enable_if_t<!std::is_same<const ir::Expr &, T>::value, int> = 1> std::enable_if_t<!std::is_same<const ir::Expr &, T>::value, int> = 1>
void appendBody(std::vector<Expr> &new_body, T &&v) { void appendBody(std::vector<Expr> &new_body, T &&v) { // NOLINT
new_body.insert(new_body.end(), v.begin(), v.end()); new_body.insert(new_body.end(), v.begin(), v.end());
} }
......
...@@ -98,7 +98,7 @@ class SymbolTable { ...@@ -98,7 +98,7 @@ class SymbolTable {
}; };
struct SymbolTableGuard { struct SymbolTableGuard {
explicit SymbolTableGuard(SymbolTable &symbol_table) explicit SymbolTableGuard(SymbolTable &symbol_table) // NOLINT
: symbol_table_(symbol_table) { : symbol_table_(symbol_table) {
symbol_table.PushScope(); symbol_table.PushScope();
} }
......
...@@ -180,7 +180,8 @@ class GiNaCToExprVisitor : public GiNaC::symbol::visitor, ...@@ -180,7 +180,8 @@ class GiNaCToExprVisitor : public GiNaC::symbol::visitor,
ir::Expr cur; ir::Expr cur;
public: public:
explicit GiNaCToExprVisitor(std::map<std::string, ir::Expr>& repr_to_expr) explicit GiNaCToExprVisitor(
std::map<std::string, ir::Expr>& repr_to_expr) // NOLINT
: repr_to_expr(repr_to_expr) {} : repr_to_expr(repr_to_expr) {}
Expr operator()(GiNaC::ex ex) { Expr operator()(GiNaC::ex ex) {
......
...@@ -500,25 +500,25 @@ __host__ __device__ inline float16 operator-(const float16& a) { ...@@ -500,25 +500,25 @@ __host__ __device__ inline float16 operator-(const float16& a) {
#endif #endif
} }
__host__ __device__ inline float16& operator+=(float16& a, __host__ __device__ inline float16& operator+=(float16& a, // NOLINT
const float16& b) { // NOLINT const float16& b) { // NOLINT
a = a + b; a = a + b;
return a; return a;
} }
__host__ __device__ inline float16& operator-=(float16& a, __host__ __device__ inline float16& operator-=(float16& a, // NOLINT
const float16& b) { // NOLINT const float16& b) { // NOLINT
a = a - b; a = a - b;
return a; return a;
} }
__host__ __device__ inline float16& operator*=(float16& a, __host__ __device__ inline float16& operator*=(float16& a, // NOLINT
const float16& b) { // NOLINT const float16& b) { // NOLINT
a = a * b; a = a * b;
return a; return a;
} }
__host__ __device__ inline float16& operator/=(float16& a, __host__ __device__ inline float16& operator/=(float16& a, // NOLINT
const float16& b) { // NOLINT const float16& b) { // NOLINT
a = a / b; a = a / b;
return a; return a;
......
...@@ -42,7 +42,7 @@ struct ComputationContext { ...@@ -42,7 +42,7 @@ struct ComputationContext {
std::shared_ptr<ComputationContext> CompileProgram( std::shared_ptr<ComputationContext> CompileProgram(
const Target &target, const Target &target,
Program &program, Program &program, // NOLINT
const std::vector<Variable> &outputs, const std::vector<Variable> &outputs,
std::shared_ptr<hlir::framework::Scope> scope, std::shared_ptr<hlir::framework::Scope> scope,
const CinnComputation::CompileOptions &options, const CinnComputation::CompileOptions &options,
......
...@@ -59,7 +59,7 @@ class CinnComputation { ...@@ -59,7 +59,7 @@ class CinnComputation {
*/ */
static std::shared_ptr<CinnComputation> BuildAndCompile( static std::shared_ptr<CinnComputation> BuildAndCompile(
const Target &target, const Target &target,
NetBuilder &builder, NetBuilder &builder, // NOLINT
const CompileOptions &options = DefaultCompileOptions(), const CompileOptions &options = DefaultCompileOptions(),
const std::vector<Variable> &outputs = {}, const std::vector<Variable> &outputs = {},
void *stream = nullptr); void *stream = nullptr);
...@@ -77,7 +77,7 @@ class CinnComputation { ...@@ -77,7 +77,7 @@ class CinnComputation {
*/ */
static std::shared_ptr<CinnComputation> Compile( static std::shared_ptr<CinnComputation> Compile(
const Target &target, const Target &target,
Program &program, Program &program, // NOLINT
const CompileOptions &options = DefaultCompileOptions(), const CompileOptions &options = DefaultCompileOptions(),
const std::vector<Variable> &outputs = {}, const std::vector<Variable> &outputs = {},
void *stream = nullptr); void *stream = nullptr);
...@@ -130,7 +130,9 @@ class CinnComputation { ...@@ -130,7 +130,9 @@ class CinnComputation {
* @param data address of the memory buffer to store tensor's data * @param data address of the memory buffer to store tensor's data
* @param size size of the memory buffer * @param size size of the memory buffer
*/ */
void SetTensorData(hlir::framework::Tensor &t, void *data, size_t size); void SetTensorData(hlir::framework::Tensor &t, // NOLINT
void *data,
size_t size);
/** /**
* set the data of a tensor (specified by it's name) from user specified * set the data of a tensor (specified by it's name) from user specified
...@@ -148,7 +150,9 @@ class CinnComputation { ...@@ -148,7 +150,9 @@ class CinnComputation {
* @param data address of the memory buffer to store tensor's data * @param data address of the memory buffer to store tensor's data
* @param size size of the memory buffer * @param size size of the memory buffer
*/ */
void GetTensorData(hlir::framework::Tensor &t, void *data, size_t size); void GetTensorData(hlir::framework::Tensor &t, // NOLINT
void *data,
size_t size);
/** /**
* copy the data of a tensor (specified by it's name) to user specified * copy the data of a tensor (specified by it's name) to user specified
* buffer. if tensor is in NVGPU device memory, cudaMemcpy is used. * buffer. if tensor is in NVGPU device memory, cudaMemcpy is used.
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
namespace cinn { namespace cinn {
namespace frontend { namespace frontend {
int GetSize(std::vector<int>& shape) { int GetSize(const std::vector<int>& shape) {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
} }
......
...@@ -296,10 +296,10 @@ std::tuple<std::unique_ptr<Program>, ...@@ -296,10 +296,10 @@ std::tuple<std::unique_ptr<Program>,
absl::flat_hash_map<std::string, Variable>, absl::flat_hash_map<std::string, Variable>,
absl::flat_hash_map<std::string, std::string>, absl::flat_hash_map<std::string, std::string>,
absl::flat_hash_set<std::string>> absl::flat_hash_set<std::string>>
LoadPaddleProgram( LoadPaddleProgram(const std::string& model_dir,
const std::string& model_dir,
Scope* scope, Scope* scope,
std::unordered_map<std::string, std::vector<int>>& input_shape_map, std::unordered_map<std::string, std::vector<int>>&
input_shape_map, // NOLINT
bool is_combined, bool is_combined,
const common::Target& target) { const common::Target& target) {
VLOG(1) << "Loading Paddle model from " << model_dir; VLOG(1) << "Loading Paddle model from " << model_dir;
......
...@@ -528,10 +528,10 @@ std::tuple<std::unique_ptr<Program>, ...@@ -528,10 +528,10 @@ std::tuple<std::unique_ptr<Program>,
absl::flat_hash_map<std::string, Variable>, absl::flat_hash_map<std::string, Variable>,
absl::flat_hash_map<std::string, std::string>, absl::flat_hash_map<std::string, std::string>,
absl::flat_hash_set<std::string>> absl::flat_hash_set<std::string>>
LoadPaddleProgram( LoadPaddleProgram(const std::string& model_dir,
const std::string& model_dir,
hlir::framework::Scope* scope, hlir::framework::Scope* scope,
std::unordered_map<std::string, std::vector<int>>& input_shape_map, std::unordered_map<std::string, std::vector<int>>&
input_shape_map, // NOLINT
bool is_combined, bool is_combined,
const common::Target& target = common::DefaultHostTarget()); const common::Target& target = common::DefaultHostTarget());
......
...@@ -1643,7 +1643,7 @@ std::shared_ptr<Scope> BuildScope(Target target, ...@@ -1643,7 +1643,7 @@ std::shared_ptr<Scope> BuildScope(Target target,
std::vector<ir::LoweredFunc> GetFuncFromImpl( std::vector<ir::LoweredFunc> GetFuncFromImpl(
const std::shared_ptr<OpImpl>& impl, const std::shared_ptr<OpImpl>& impl,
const common::CINNValuePack& cinn_inputs, const common::CINNValuePack& cinn_inputs,
std::vector<ir::Tensor>& all_arg_tensors, std::vector<ir::Tensor>& all_arg_tensors, // NOLINT
const std::vector<std::string>& input_output_nodes, const std::vector<std::string>& input_output_nodes,
const std::string& node_id, const std::string& node_id,
const Target& target) { const Target& target) {
......
...@@ -222,7 +222,7 @@ std::shared_ptr<Scope> BuildScope(Target target, ...@@ -222,7 +222,7 @@ std::shared_ptr<Scope> BuildScope(Target target,
std::vector<ir::LoweredFunc> GetFuncFromImpl( std::vector<ir::LoweredFunc> GetFuncFromImpl(
const std::shared_ptr<OpImpl>& impl, const std::shared_ptr<OpImpl>& impl,
const common::CINNValuePack& cinn_inputs, const common::CINNValuePack& cinn_inputs,
std::vector<ir::Tensor>& tensor_inputs, std::vector<ir::Tensor>& tensor_inputs, // NOLINT
const std::vector<std::string>& input_output_nodes, const std::vector<std::string>& input_output_nodes,
const std::string& node_id, const std::string& node_id,
const Target& target); const Target& target);
......
...@@ -45,7 +45,7 @@ OpLowerer::OpLowerer( ...@@ -45,7 +45,7 @@ OpLowerer::OpLowerer(
const Target& target) const Target& target)
: type_dict_(type_dict), shape_dict_(shape_dict), target_(target) {} : type_dict_(type_dict), shape_dict_(shape_dict), target_(target) {}
std::vector<ir::LoweredFunc> OpLowerer::Lower(GroupPtr& group) { std::vector<ir::LoweredFunc> OpLowerer::Lower(GroupPtr& group) { // NOLINT
VLOG(3) << "Lowering Group : " << group->group_id VLOG(3) << "Lowering Group : " << group->group_id
<< " , Op Pattern : " << group->op_pattern_kind; << " , Op Pattern : " << group->op_pattern_kind;
group->input_names.clear(); group->input_names.clear();
......
...@@ -52,8 +52,8 @@ class OpLowerer { ...@@ -52,8 +52,8 @@ class OpLowerer {
OpLowerer(const absl::flat_hash_map<std::string, Type>&, OpLowerer(const absl::flat_hash_map<std::string, Type>&,
const absl::flat_hash_map<std::string, shape_t>&, const absl::flat_hash_map<std::string, shape_t>&,
const Target&); const Target&);
std::vector<ir::LoweredFunc> Lower(GroupPtr& group); std::vector<ir::LoweredFunc> Lower(GroupPtr& group); // NOLINT
std::vector<ir::LoweredFunc> LowerWithoutSchedule(GroupPtr& group); std::vector<ir::LoweredFunc> LowerWithoutSchedule(GroupPtr& group); // NOLINT
private: private:
std::vector<ir::LoweredFunc> IRLowerOp(IRComputeFunction, GroupPtr&); std::vector<ir::LoweredFunc> IRLowerOp(IRComputeFunction, GroupPtr&);
...@@ -75,7 +75,7 @@ class OpLowerer { ...@@ -75,7 +75,7 @@ class OpLowerer {
DEFINE_IR_COMPUTE(OutEWiseFusable); DEFINE_IR_COMPUTE(OutEWiseFusable);
void IRSchedule( void IRSchedule(
ir::IRSchedule& ir_sch, ir::IRSchedule& ir_sch, // NOLINT
const GroupPtr& group, const GroupPtr& group,
const std::unordered_map<std::string, ir::Tensor>& tensor_map); const std::unordered_map<std::string, ir::Tensor>& tensor_map);
......
...@@ -32,7 +32,7 @@ namespace framework { ...@@ -32,7 +32,7 @@ namespace framework {
using frontend::NetBuilder; using frontend::NetBuilder;
using frontend::RunDecomposer; using frontend::RunDecomposer;
void CodeGen(ir::LoweredFunc& func) { void CodeGen(const ir::LoweredFunc& func) {
#ifdef CINN_WITH_CUDA #ifdef CINN_WITH_CUDA
auto target = common::DefaultNVGPUTarget(); auto target = common::DefaultNVGPUTarget();
Module::Builder builder("module_builder", target); Module::Builder builder("module_builder", target);
...@@ -56,7 +56,7 @@ void CodeGen(ir::LoweredFunc& func) { ...@@ -56,7 +56,7 @@ void CodeGen(ir::LoweredFunc& func) {
#endif #endif
} }
void Compile(NetBuilder& net_builder) { void Compile(NetBuilder& net_builder) { // NOLINT
auto program = net_builder.Build(); auto program = net_builder.Build();
auto target = common::DefaultTarget(); auto target = common::DefaultTarget();
RunDecomposer(&program, target); RunDecomposer(&program, target);
......
...@@ -92,8 +92,8 @@ ir::Tensor GetTensor( ...@@ -92,8 +92,8 @@ ir::Tensor GetTensor(
std::vector<ir::Tensor> CollectInputTensor( std::vector<ir::Tensor> CollectInputTensor(
const Node* node, const Node* node,
std::vector<ir::Tensor>& func_args, std::vector<ir::Tensor>& func_args, // NOLINT
std::unordered_map<std::string, ir::Tensor>& tensor_map, std::unordered_map<std::string, ir::Tensor>& tensor_map, // NOLINT
const absl::flat_hash_map<std::string, Type>& type_dict, const absl::flat_hash_map<std::string, Type>& type_dict,
const absl::flat_hash_map<std::string, shape_t>& shape_dict) { const absl::flat_hash_map<std::string, shape_t>& shape_dict) {
std::vector<ir::Tensor> tensors; std::vector<ir::Tensor> tensors;
...@@ -543,7 +543,7 @@ bool WithoutLastDimInReduce(const std::vector<int>& shape, ...@@ -543,7 +543,7 @@ bool WithoutLastDimInReduce(const std::vector<int>& shape,
} }
} }
void LoopOrderAssignReduce(ir::IRSchedule& ir_sch, void LoopOrderAssignReduce(ir::IRSchedule& ir_sch, // NOLINT
const std::string& block_name, const std::string& block_name,
const std::vector<int>& axes, const std::vector<int>& axes,
const common::Target& target, const common::Target& target,
...@@ -593,7 +593,7 @@ void LoopOrderAssignReduce(ir::IRSchedule& ir_sch, ...@@ -593,7 +593,7 @@ void LoopOrderAssignReduce(ir::IRSchedule& ir_sch,
} }
} }
void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch, void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch, // NOLINT
const std::string& block_name, const std::string& block_name,
const std::vector<int>& inshape, const std::vector<int>& inshape,
const std::vector<int>& axes, const std::vector<int>& axes,
...@@ -707,7 +707,7 @@ void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch, ...@@ -707,7 +707,7 @@ void LoopAssignReduceWithoutLast(ir::IRSchedule& ir_sch,
ir_sch.Reorder(block_name, new_order); ir_sch.Reorder(block_name, new_order);
} }
void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch, void LoopAssignReduceWithLast(ir::IRSchedule& ir_sch, // NOLINT
const std::string& block_name, const std::string& block_name,
const std::vector<int>& inshape, const std::vector<int>& inshape,
const std::vector<int>& axes, const std::vector<int>& axes,
...@@ -974,7 +974,7 @@ Node* GetMasterToComputeAt( ...@@ -974,7 +974,7 @@ Node* GetMasterToComputeAt(
} }
void LoopAssignReduce( void LoopAssignReduce(
ir::IRSchedule& ir_sch, ir::IRSchedule& ir_sch, // NOLINT
const Node* node, const Node* node,
const Node* reducer, const Node* reducer,
const Target& target, const Target& target,
...@@ -1217,8 +1217,8 @@ class RemoveExpr : public ir::IRMutator<> { ...@@ -1217,8 +1217,8 @@ class RemoveExpr : public ir::IRMutator<> {
}; };
void MergeLoops(ir::Expr root, void MergeLoops(ir::Expr root,
std::vector<ir::Expr>& src, std::vector<ir::Expr>& src, // NOLINT
std::vector<ir::Expr>& dst, std::vector<ir::Expr>& dst, // NOLINT
int index) { int index) {
if (index < 0) { if (index < 0) {
return; return;
...@@ -1247,7 +1247,7 @@ void MergeLoops(ir::Expr root, ...@@ -1247,7 +1247,7 @@ void MergeLoops(ir::Expr root,
} }
void InsertSyncThread( void InsertSyncThread(
ir::IRSchedule& ir_sch, ir::IRSchedule& ir_sch, // NOLINT
const Node* node, const Node* node,
const absl::flat_hash_map<std::string, shape_t>& shape_dict, const absl::flat_hash_map<std::string, shape_t>& shape_dict,
const std::unordered_map<std::string, ir::Tensor>& tensor_map) { const std::unordered_map<std::string, ir::Tensor>& tensor_map) {
...@@ -1318,7 +1318,7 @@ class InsertExpr : public ir::IRMutator<> { ...@@ -1318,7 +1318,7 @@ class InsertExpr : public ir::IRMutator<> {
}; };
void MergeReduceToReduce( void MergeReduceToReduce(
ir::IRSchedule& ir_sch, ir::IRSchedule& ir_sch, // NOLINT
const Node* node, const Node* node,
const Node* master, const Node* master,
const absl::flat_hash_map<std::string, shape_t>& shape_dict, const absl::flat_hash_map<std::string, shape_t>& shape_dict,
...@@ -1506,7 +1506,7 @@ void MergeReduceToReduce( ...@@ -1506,7 +1506,7 @@ void MergeReduceToReduce(
} }
void MergeReduceLoop( void MergeReduceLoop(
ir::IRSchedule& ir_sch, ir::IRSchedule& ir_sch, // NOLINT
Node* node, Node* node,
const Node* master, const Node* master,
const absl::flat_hash_map<std::string, shape_t>& shape_dict, const absl::flat_hash_map<std::string, shape_t>& shape_dict,
...@@ -1611,7 +1611,7 @@ class FindExprInBlock : public ir::IRMutator<> { ...@@ -1611,7 +1611,7 @@ class FindExprInBlock : public ir::IRMutator<> {
}; };
void LoopComputeAt( void LoopComputeAt(
ir::IRSchedule& ir_sch, ir::IRSchedule& ir_sch, // NOLINT
Node* node, Node* node,
const Node* master, const Node* master,
const GroupPtr& group, const GroupPtr& group,
...@@ -1712,7 +1712,7 @@ std::unordered_set<Node*> GetMasters( ...@@ -1712,7 +1712,7 @@ std::unordered_set<Node*> GetMasters(
} }
void SyncThreadWithShared( void SyncThreadWithShared(
ir::IRSchedule& ir_sch, ir::IRSchedule& ir_sch, // NOLINT
const GroupPtr& group, const GroupPtr& group,
const std::unordered_set<Node*>& nodes_inline, const std::unordered_set<Node*>& nodes_inline,
const std::unordered_set<Node*>& nodes_set, const std::unordered_set<Node*>& nodes_set,
......
...@@ -31,8 +31,8 @@ ir::Tensor GetTensor( ...@@ -31,8 +31,8 @@ ir::Tensor GetTensor(
std::vector<ir::Tensor> CollectInputTensor( std::vector<ir::Tensor> CollectInputTensor(
const Node* node, const Node* node,
std::vector<ir::Tensor>& func_args, std::vector<ir::Tensor>& func_args, // NOLINT
std::unordered_map<std::string, ir::Tensor>& tensor_map, std::unordered_map<std::string, ir::Tensor>& tensor_map, // NOLINT
const absl::flat_hash_map<std::string, Type>& type_dict, const absl::flat_hash_map<std::string, Type>& type_dict,
const absl::flat_hash_map<std::string, shape_t>& shape_dict); const absl::flat_hash_map<std::string, shape_t>& shape_dict);
...@@ -87,7 +87,7 @@ std::unordered_set<Node*> GetMasters( ...@@ -87,7 +87,7 @@ std::unordered_set<Node*> GetMasters(
const std::unordered_set<Node*>& nodes_set); const std::unordered_set<Node*>& nodes_set);
void LoopAssignReduce( void LoopAssignReduce(
ir::IRSchedule& ir_sch, ir::IRSchedule& ir_sch, // NOLINT
const Node* node, const Node* node,
const Node* reducer, const Node* reducer,
const Target& target, const Target& target,
...@@ -95,7 +95,7 @@ void LoopAssignReduce( ...@@ -95,7 +95,7 @@ void LoopAssignReduce(
const absl::flat_hash_map<std::string, shape_t>& shape_dict); const absl::flat_hash_map<std::string, shape_t>& shape_dict);
void LoopComputeAt( void LoopComputeAt(
ir::IRSchedule& ir_sch, ir::IRSchedule& ir_sch, // NOLINT
Node* node, Node* node,
const Node* master, const Node* master,
const GroupPtr& group, const GroupPtr& group,
...@@ -103,7 +103,7 @@ void LoopComputeAt( ...@@ -103,7 +103,7 @@ void LoopComputeAt(
const std::unordered_map<std::string, ir::Tensor>& tensor_map); const std::unordered_map<std::string, ir::Tensor>& tensor_map);
void SyncThreadWithShared( void SyncThreadWithShared(
ir::IRSchedule& ir_sch, ir::IRSchedule& ir_sch, // NOLINT
const GroupPtr& group, const GroupPtr& group,
const std::unordered_set<Node*>& nodes_inline, const std::unordered_set<Node*>& nodes_inline,
const std::unordered_set<Node*>& nodes_set, const std::unordered_set<Node*>& nodes_set,
......
...@@ -36,8 +36,8 @@ class ParallelCompiler { ...@@ -36,8 +36,8 @@ class ParallelCompiler {
}; };
public: public:
explicit ParallelCompiler(std::shared_ptr<Scope>& scope, explicit ParallelCompiler(std::shared_ptr<Scope>& scope, // NOLINT
std::shared_ptr<Graph>& graph, std::shared_ptr<Graph>& graph, // NOLINT
const CompileOptions& option, const CompileOptions& option,
const common::Target& target) const common::Target& target)
: scope_(scope), graph_(graph), option_(option), target_(target) {} : scope_(scope), graph_(graph), option_(option), target_(target) {}
...@@ -53,8 +53,8 @@ class ParallelCompiler { ...@@ -53,8 +53,8 @@ class ParallelCompiler {
struct Task { struct Task {
public: public:
Task(ParallelCompiler* p, Task(ParallelCompiler* p,
std::shared_ptr<Scope>& s, std::shared_ptr<Scope>& s, // NOLINT
std::shared_ptr<Graph>& g, std::shared_ptr<Graph>& g, // NOLINT
const CompileOptions& cp, const CompileOptions& cp,
const Target& t) const Target& t)
: compiler(p), scope(s), graph(g), options(cp), target(t) {} : compiler(p), scope(s), graph(g), options(cp), target(t) {}
......
...@@ -258,7 +258,7 @@ TEST(Operator, Operator_BroadcastTo) { ...@@ -258,7 +258,7 @@ TEST(Operator, Operator_BroadcastTo) {
common::CINNValuePack GetComputeResult( common::CINNValuePack GetComputeResult(
const std::shared_ptr<OpImpl> &impl, const std::shared_ptr<OpImpl> &impl,
std::vector<common::CINNValue> &cinn_inputs, std::vector<common::CINNValue> &cinn_inputs, // NOLINT
const std::string &output_name = "") { const std::string &output_name = "") {
if (FLAGS_cinn_ir_schedule) { if (FLAGS_cinn_ir_schedule) {
cinn_inputs.emplace_back(output_name); cinn_inputs.emplace_back(output_name);
......
...@@ -44,7 +44,7 @@ Module LowerToModule(const std::string test_name, ...@@ -44,7 +44,7 @@ Module LowerToModule(const std::string test_name,
const std::shared_ptr<OpImpl> &impl, const std::shared_ptr<OpImpl> &impl,
std::vector<std::string> input_names, std::vector<std::string> input_names,
const std::string &output_name, const std::string &output_name,
std::vector<ir::Tensor> &inputs, std::vector<ir::Tensor> &inputs, // NOLINT
std::vector<common::CINNValue> cinn_inputs, std::vector<common::CINNValue> cinn_inputs,
const Target &target) { const Target &target) {
Module::Builder builder("module", target); Module::Builder builder("module", target);
......
...@@ -67,7 +67,9 @@ std::unordered_map<std::string, int> special_attrs = { ...@@ -67,7 +67,9 @@ std::unordered_map<std::string, int> special_attrs = {
{"axes", 2}, {"axes", 2},
{"perm", 2}}; {"perm", 2}};
bool IsSameSubexpression(Node* op1, Node* op2, shape_dict_t& shape_dict) { bool IsSameSubexpression(Node* op1,
Node* op2,
shape_dict_t& shape_dict) { // NOLINT
// Get the input edges for op1 and op2 in order. // Get the input edges for op1 and op2 in order.
auto op1_in_edges = op1->inlinks_in_order(); auto op1_in_edges = op1->inlinks_in_order();
auto op2_in_edges = op2->inlinks_in_order(); auto op2_in_edges = op2->inlinks_in_order();
...@@ -201,13 +203,14 @@ void RemoveNodes(framework::Graph* graph, GraphNode* node) { ...@@ -201,13 +203,14 @@ void RemoveNodes(framework::Graph* graph, GraphNode* node) {
graph->DropNode(node); graph->DropNode(node);
} }
void RemoveNodes(framework::Graph* graph, std::vector<Node*>& nodes) { void RemoveNodes(framework::Graph* graph, const std::vector<Node*>& nodes) {
for (auto* node : nodes) { for (auto* node : nodes) {
RemoveNodes(graph, node); RemoveNodes(graph, node);
} }
} }
void RemoveNodes(framework::Graph* graph, std::vector<NodeData*>& nodes_data) { void RemoveNodes(framework::Graph* graph,
const std::vector<NodeData*>& nodes_data) {
for (auto* data : nodes_data) { for (auto* data : nodes_data) {
if (std::find(graph->outputs.begin(), graph->outputs.end(), data) != if (std::find(graph->outputs.begin(), graph->outputs.end(), data) !=
graph->outputs.end()) { graph->outputs.end()) {
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
namespace cinn { namespace cinn {
namespace frontend { namespace frontend {
int GetSize(std::vector<int>& shape) { int GetSize(const std::vector<int>& shape) {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
} }
...@@ -36,7 +36,7 @@ std::unordered_map<std::string, std::vector<float>> GetInputRandom( ...@@ -36,7 +36,7 @@ std::unordered_map<std::string, std::vector<float>> GetInputRandom(
} }
std::unordered_map<std::string, std::vector<float>> RunModelTest( std::unordered_map<std::string, std::vector<float>> RunModelTest(
Program& program, Program& program, // NOLINT
const std::vector<std::string>&& passes, const std::vector<std::string>&& passes,
const std::unordered_map<std::string, std::vector<float>>& input_data, const std::unordered_map<std::string, std::vector<float>>& input_data,
const std::unordered_set<std::string>& fetch_ids) { const std::unordered_set<std::string>& fetch_ids) {
......
...@@ -19,11 +19,11 @@ ...@@ -19,11 +19,11 @@
namespace cinn { namespace cinn {
namespace frontend { namespace frontend {
int GetSize(std::vector<int>& shape) { int GetSize(const std::vector<int>& shape) {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
} }
void RunModelTest(Program& program, void RunModelTest(Program& program, // NOLINT
const std::vector<Variable>&& inputs, const std::vector<Variable>&& inputs,
const std::unordered_set<std::string>& fetch_ids) { const std::unordered_set<std::string>& fetch_ids) {
// init input data. // init input data.
......
...@@ -130,7 +130,7 @@ class DotBuilder { ...@@ -130,7 +130,7 @@ class DotBuilder {
const shape_dict_t& shape_dict() const { return shape_dict_; } const shape_dict_t& shape_dict() const { return shape_dict_; }
// Currently the constructor of `NodeData` needs to pass in `Shared<Node>`. // Currently the constructor of `NodeData` needs to pass in `Shared<Node>`.
NodeData* Var(common::Shared<Node>& producer) { NodeData* Var(common::Shared<Node>& producer) { // NOLINT
auto* res = new NodeData(producer, 0, 0, node_name("var"), false); auto* res = new NodeData(producer, 0, 0, node_name("var"), false);
graph_->RegisterNode(producer->id(), res); graph_->RegisterNode(producer->id(), res);
graph_->RegisterNode(res->id(), producer.get()); graph_->RegisterNode(res->id(), producer.get());
......
...@@ -19,11 +19,11 @@ ...@@ -19,11 +19,11 @@
namespace cinn { namespace cinn {
namespace frontend { namespace frontend {
int GetSize(std::vector<int>& shape) { int GetSize(const std::vector<int>& shape) {
return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()); return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>());
} }
void RunModelTest(Program& program, void RunModelTest(Program& program, // NOLINT
const std::vector<Variable>&& inputs, const std::vector<Variable>&& inputs,
const std::unordered_set<std::string>& fetch_ids) { const std::unordered_set<std::string>& fetch_ids) {
// init input data. // init input data.
......
...@@ -38,8 +38,8 @@ using dtype_dict_t = absl::flat_hash_map<std::string, common::Type>; ...@@ -38,8 +38,8 @@ using dtype_dict_t = absl::flat_hash_map<std::string, common::Type>;
using shape_dict_t = absl::flat_hash_map<std::string, framework::shape_t>; using shape_dict_t = absl::flat_hash_map<std::string, framework::shape_t>;
void InferShape(Node* node, void InferShape(Node* node,
dtype_dict_t& dtype_dict, dtype_dict_t& dtype_dict, // NOLINT
shape_dict_t& shape_dict) { shape_dict_t& shape_dict) { // NOLINT
VLOG(3) << "Begin InferShape of node " << node->id(); VLOG(3) << "Begin InferShape of node " << node->id();
auto op_infershape = Operator::GetAttrs<infershape_t>("infershape"); auto op_infershape = Operator::GetAttrs<infershape_t>("infershape");
auto op_inferdtype = Operator::GetAttrs<inferdtype_t>("inferdtype"); auto op_inferdtype = Operator::GetAttrs<inferdtype_t>("inferdtype");
......
...@@ -24,8 +24,9 @@ namespace pass { ...@@ -24,8 +24,9 @@ namespace pass {
void InferShape( void InferShape(
framework::Node* node, framework::Node* node,
absl::flat_hash_map<std::string, common::Type>& dtype_dict, absl::flat_hash_map<std::string, common::Type>& dtype_dict, // NOLINT
absl::flat_hash_map<std::string, framework::shape_t>& shape_dict); absl::flat_hash_map<std::string, framework::shape_t>&
shape_dict); // NOLINT
} // namespace pass } // namespace pass
} // namespace hlir } // namespace hlir
......
...@@ -20,7 +20,7 @@ namespace cinn { ...@@ -20,7 +20,7 @@ namespace cinn {
namespace frontend { namespace frontend {
std::unordered_map<std::string, std::vector<float>> RunModelTest( std::unordered_map<std::string, std::vector<float>> RunModelTest(
Program& program, Program& program, // NOLINT
const std::vector<std::string>&& passes, const std::vector<std::string>&& passes,
const std::unordered_map<std::string, std::vector<float>>& input_data, const std::unordered_map<std::string, std::vector<float>>& input_data,
const std::unordered_set<std::string>& fetch_ids) { const std::unordered_set<std::string>& fetch_ids) {
......
...@@ -39,7 +39,7 @@ namespace cinn { ...@@ -39,7 +39,7 @@ namespace cinn {
namespace hlir { namespace hlir {
namespace pe { namespace pe {
void IRElementwiseSchedule(ir::IRSchedule &ir_sch, void IRElementwiseSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target) { const common::Target &target) {
VLOG(3) << "Before IRElementwiseSchedule, new ir is : " VLOG(3) << "Before IRElementwiseSchedule, new ir is : "
...@@ -67,7 +67,7 @@ void IRElementwiseSchedule(ir::IRSchedule &ir_sch, ...@@ -67,7 +67,7 @@ void IRElementwiseSchedule(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRInjectiveSchedule(ir::IRSchedule &ir_sch, void IRInjectiveSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target) { const common::Target &target) {
VLOG(3) << "Before IRInjectiveSchedule, new ir is : " VLOG(3) << "Before IRInjectiveSchedule, new ir is : "
...@@ -95,7 +95,7 @@ void IRInjectiveSchedule(ir::IRSchedule &ir_sch, ...@@ -95,7 +95,7 @@ void IRInjectiveSchedule(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target, const common::Target &target,
bool vectorizable) { bool vectorizable) {
...@@ -132,7 +132,7 @@ void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, ...@@ -132,7 +132,7 @@ void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target) { const common::Target &target) {
VLOG(3) << "Begin IRCudaScheduleInjective "; VLOG(3) << "Begin IRCudaScheduleInjective ";
...@@ -208,7 +208,7 @@ std::vector<common::CINNValue> IRCudaScheduleMatMul( ...@@ -208,7 +208,7 @@ std::vector<common::CINNValue> IRCudaScheduleMatMul(
return {common::CINNValue(ir_sch.GetModule().GetExprs().at(0))}; return {common::CINNValue(ir_sch.GetModule().GetExprs().at(0))};
} }
void IRCudaScheduleMul(ir::IRSchedule &ir_sch, void IRCudaScheduleMul(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target) { const common::Target &target) {
auto all_blocks = ir_sch.GetAllBlocks(); auto all_blocks = ir_sch.GetAllBlocks();
...@@ -221,7 +221,7 @@ void IRCudaScheduleMul(ir::IRSchedule &ir_sch, ...@@ -221,7 +221,7 @@ void IRCudaScheduleMul(ir::IRSchedule &ir_sch,
ir_sch.Bind(loops[1], "threadIdx.x"); ir_sch.Bind(loops[1], "threadIdx.x");
} }
void IRMulScheduleCPU(ir::IRSchedule &ir_sch, void IRMulScheduleCPU(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &reduce_first_shape, const std::vector<int> &reduce_first_shape,
const common::Target &target) { const common::Target &target) {
ir_sch.MergeExprs(); ir_sch.MergeExprs();
...@@ -238,7 +238,7 @@ void IRMulScheduleCPU(ir::IRSchedule &ir_sch, ...@@ -238,7 +238,7 @@ void IRMulScheduleCPU(ir::IRSchedule &ir_sch,
} }
} }
void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<std::vector<int>> &output_shapes, const std::vector<std::vector<int>> &output_shapes,
int axis, int axis,
const common::Target &target) { const common::Target &target) {
...@@ -334,7 +334,7 @@ void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, ...@@ -334,7 +334,7 @@ void IRCudaSplitSchedule(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor output, ir::Tensor output,
int last_dimension_num, int last_dimension_num,
const common::Target &target) { const common::Target &target) {
...@@ -390,7 +390,7 @@ void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, ...@@ -390,7 +390,7 @@ void IRCudaScheduleReduce(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor tmp_out, ir::Tensor tmp_out,
ir::Tensor out, ir::Tensor out,
const common::Target &target) { const common::Target &target) {
...@@ -478,7 +478,7 @@ void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, ...@@ -478,7 +478,7 @@ void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reduce_tmp_out, ir::Tensor reduce_tmp_out,
ir::Tensor tmp_out, ir::Tensor tmp_out,
ir::Tensor out, ir::Tensor out,
...@@ -621,7 +621,7 @@ void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, ...@@ -621,7 +621,7 @@ void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reshape, ir::Tensor reshape,
ir::Tensor internal, ir::Tensor internal,
ir::Tensor reduce_out, ir::Tensor reduce_out,
...@@ -880,7 +880,7 @@ void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, ...@@ -880,7 +880,7 @@ void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reshape, ir::Tensor reshape,
ir::Tensor internal, ir::Tensor internal,
ir::Tensor tmp_out, ir::Tensor tmp_out,
...@@ -991,7 +991,7 @@ void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, ...@@ -991,7 +991,7 @@ void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch,
// ir_sch.GetLoops(out->name)[0]); // ir_sch.GetLoops(out->name)[0]);
} }
void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) { void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) { // NOLINT
ir_sch.MergeExprs(); ir_sch.MergeExprs();
auto all_blocks = ir_sch.GetAllBlocks(); auto all_blocks = ir_sch.GetAllBlocks();
CHECK_EQ(all_blocks.size(), 3U); CHECK_EQ(all_blocks.size(), 3U);
...@@ -1010,7 +1010,7 @@ void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) { ...@@ -1010,7 +1010,7 @@ void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis) {
ir_sch.ComputeAt(all_blocks[1], loops[0]); ir_sch.ComputeAt(all_blocks[1], loops[0]);
} }
void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target, const common::Target &target,
int arg_pack_size) { int arg_pack_size) {
VLOG(3) << "Before IRPoolScheduleGPU: " VLOG(3) << "Before IRPoolScheduleGPU: "
...@@ -1028,7 +1028,7 @@ void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, ...@@ -1028,7 +1028,7 @@ void IRPoolScheduleGPU(ir::IRSchedule &ir_sch,
VLOG(3) << "End IRPoolScheduleGPU: " << ir_sch.GetModule().GetExprs().at(0); VLOG(3) << "End IRPoolScheduleGPU: " << ir_sch.GetModule().GetExprs().at(0);
} }
void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target) { const common::Target &target) {
VLOG(3) << "Before IRGlobalPoolScheduleGPU: " VLOG(3) << "Before IRGlobalPoolScheduleGPU: "
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
...@@ -1071,7 +1071,7 @@ void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, ...@@ -1071,7 +1071,7 @@ void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<ir::Expr> &tensors) { const std::vector<ir::Expr> &tensors) {
if (tensors.size() == 3U) { if (tensors.size() == 3U) {
CHECK(tensors[1].as_tensor()); CHECK(tensors[1].as_tensor());
...@@ -1097,7 +1097,8 @@ void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, ...@@ -1097,7 +1097,8 @@ void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch,
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRCudaScheduleConv(ir::IRSchedule &ir_sch, const common::Target &target) { void IRCudaScheduleConv(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target) {
VLOG(3) << "Begin IRCudaScheduleConv with expr: " VLOG(3) << "Begin IRCudaScheduleConv with expr: "
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
auto &res = ScheduleParam::get_cuda_instance().GetParam(); auto &res = ScheduleParam::get_cuda_instance().GetParam();
...@@ -1238,10 +1239,10 @@ void IRCudaScheduleConv(ir::IRSchedule &ir_sch, const common::Target &target) { ...@@ -1238,10 +1239,10 @@ void IRCudaScheduleConv(ir::IRSchedule &ir_sch, const common::Target &target) {
<< ir_sch.GetModule().GetExprs().at(0); << ir_sch.GetModule().GetExprs().at(0);
} }
void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor &input_pad, ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, ir::Tensor &weights, // NOLINT
ir::Tensor &output, ir::Tensor &output, // NOLINT
const common::Target &target, const common::Target &target,
const std::string &key) { const std::string &key) {
auto &res = ScheduleParam::get_cuda_instance().GetParam(); auto &res = ScheduleParam::get_cuda_instance().GetParam();
......
...@@ -31,20 +31,20 @@ namespace cinn { ...@@ -31,20 +31,20 @@ namespace cinn {
namespace hlir { namespace hlir {
namespace pe { namespace pe {
void IRElementwiseSchedule(ir::IRSchedule &ir_sch, void IRElementwiseSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target); const common::Target &target);
void IRInjectiveSchedule(ir::IRSchedule &ir_sch, void IRInjectiveSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target); const common::Target &target);
void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, void IRScheduleInjectiveCPU(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target, const common::Target &target,
bool vectorizable = true); bool vectorizable = true);
void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, void IRCudaScheduleInjective(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target); const common::Target &target);
...@@ -53,68 +53,69 @@ std::vector<common::CINNValue> IRCudaScheduleMatMul( ...@@ -53,68 +53,69 @@ std::vector<common::CINNValue> IRCudaScheduleMatMul(
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target); const common::Target &target);
void IRCudaScheduleMul(ir::IRSchedule &ir_sch, void IRCudaScheduleMul(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &output_shape, const std::vector<int> &output_shape,
const common::Target &target); const common::Target &target);
void IRMulScheduleCPU(ir::IRSchedule &ir_sch, void IRMulScheduleCPU(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<int> &reduce_first_shape, const std::vector<int> &reduce_first_shape,
const common::Target &target); const common::Target &target);
void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, void IRCudaSplitSchedule(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<std::vector<int>> &output_shapes, const std::vector<std::vector<int>> &output_shapes,
int axis, int axis,
const common::Target &target); const common::Target &target);
void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, void IRCudaScheduleReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor out, ir::Tensor out,
int last_dimension_num, int last_dimension_num,
const common::Target &target); const common::Target &target);
void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, void IRCudaScheduleBlockReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reduce_tmp_out, ir::Tensor reduce_tmp_out,
ir::Tensor tmp_out, ir::Tensor tmp_out,
ir::Tensor out, ir::Tensor out,
const common::Target &target); const common::Target &target);
void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, void IRCudaScheduleBlockReduceInternal(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor tmp_out, ir::Tensor tmp_out,
ir::Tensor out, ir::Tensor out,
const common::Target &target); const common::Target &target);
void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, void IRCudaScheduleBlockShuffleReduce(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reshape, ir::Tensor reshape,
ir::Tensor internal, ir::Tensor internal,
ir::Tensor out, ir::Tensor out,
const common::Target &target); const common::Target &target);
void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, void IRCudaTwoStepReduceSchedule(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor reshape, ir::Tensor reshape,
ir::Tensor internal, ir::Tensor internal,
ir::Tensor tmp_out, ir::Tensor tmp_out,
ir::Tensor out, ir::Tensor out,
const common::Target &target); const common::Target &target);
void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis = -1); void IRSoftmaxScheduleCPU(ir::IRSchedule &ir_sch, int axis = -1); // NOLINT
void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, void IRPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target, const common::Target &target,
int arg_pack_size = 3); int arg_pack_size = 3);
void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, void IRCudaScheduleDepthwiseConv(ir::IRSchedule &ir_sch, // NOLINT
const std::vector<ir::Expr> &tensors); const std::vector<ir::Expr> &tensors);
void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, void IRGlobalPoolScheduleGPU(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target); const common::Target &target);
void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, void IRCudaScheduleConv2(ir::IRSchedule &ir_sch, // NOLINT
ir::Tensor &input_pad, ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, ir::Tensor &weights, // NOLINT
ir::Tensor &output, ir::Tensor &output, // NOLINT
const common::Target &target, const common::Target &target,
const std::string &key); const std::string &key);
void IRCudaScheduleConv(ir::IRSchedule &ir_sch, const common::Target &target); void IRCudaScheduleConv(ir::IRSchedule &ir_sch, // NOLINT
const common::Target &target);
} // namespace pe } // namespace pe
} // namespace hlir } // namespace hlir
......
...@@ -456,8 +456,8 @@ int GetTailSize(const std::vector<int>& inshape, const std::vector<int>& axes) { ...@@ -456,8 +456,8 @@ int GetTailSize(const std::vector<int>& inshape, const std::vector<int>& axes) {
std::vector<int> GetFirstStepReduceShape(const std::vector<int>& shape, std::vector<int> GetFirstStepReduceShape(const std::vector<int>& shape,
const std::vector<int>& axes, const std::vector<int>& axes,
bool& inbound, bool& inbound, // NOLINT
int& tail) { int& tail) { // NOLINT
// post parallel size // post parallel size
int post_parallel_size = GetPostParallelSize(shape, axes); int post_parallel_size = GetPostParallelSize(shape, axes);
// the size to unfold las reduce axis // the size to unfold las reduce axis
......
...@@ -41,8 +41,8 @@ std::vector<ir::Tensor> winograd_transform_matrices(const int& tile_size, ...@@ -41,8 +41,8 @@ std::vector<ir::Tensor> winograd_transform_matrices(const int& tile_size,
std::vector<int> GetFirstStepReduceShape(const std::vector<int>& shape, std::vector<int> GetFirstStepReduceShape(const std::vector<int>& shape,
const std::vector<int>& axes, const std::vector<int>& axes,
bool& inbound, bool& inbound, // NOLINT
int& tail); int& tail); // NOLINT
} // namespace pe } // namespace pe
} // namespace hlir } // namespace hlir
......
...@@ -624,7 +624,7 @@ void PoolScheduleCPU(poly::StageMap stages, ...@@ -624,7 +624,7 @@ void PoolScheduleCPU(poly::StageMap stages,
} }
void PoolScheduleGPU(poly::StageMap stages, void PoolScheduleGPU(poly::StageMap stages,
ir::Tensor &output, const ir::Tensor &output,
const common::Target &target) { const common::Target &target) {
CHECK_GE(stages[output]->axis_names().size(), 4); CHECK_GE(stages[output]->axis_names().size(), 4);
stages[output]->Fuse({0, 1, 2, 3}); stages[output]->Fuse({0, 1, 2, 3});
...@@ -866,7 +866,7 @@ void CreateX86SerialData(const std::string &file_name) { ...@@ -866,7 +866,7 @@ void CreateX86SerialData(const std::string &file_name) {
void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages,
const ir::Tensor &res, const ir::Tensor &res,
ir::Tensor &packed_out, ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad, const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation, const ir::Tensor &weights_dilation,
const ir::Tensor &data, const ir::Tensor &data,
...@@ -1017,7 +1017,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, ...@@ -1017,7 +1017,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages,
void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages,
const ir::Tensor &res, const ir::Tensor &res,
ir::Tensor &packed_out, ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad, const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation, const ir::Tensor &weights_dilation,
const ir::Tensor &data, const ir::Tensor &data,
...@@ -1139,7 +1139,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, ...@@ -1139,7 +1139,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages,
void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages,
const ir::Tensor &res, const ir::Tensor &res,
ir::Tensor &packed_out, ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad, const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation, const ir::Tensor &weights_dilation,
const ir::Tensor &data, const ir::Tensor &data,
...@@ -1244,7 +1244,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, ...@@ -1244,7 +1244,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages,
void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages,
const ir::Tensor &res, const ir::Tensor &res,
ir::Tensor &packed_out, ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad, const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation, const ir::Tensor &weights_dilation,
const ir::Tensor &data, const ir::Tensor &data,
...@@ -1377,7 +1377,7 @@ void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, ...@@ -1377,7 +1377,7 @@ void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages,
void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse(
poly::StageMap stages, poly::StageMap stages,
const ir::Tensor &res, const ir::Tensor &res,
ir::Tensor &packed_out, ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad, const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation, const ir::Tensor &weights_dilation,
const ir::Tensor &data, const ir::Tensor &data,
...@@ -2300,7 +2300,7 @@ void SaveSerialData( ...@@ -2300,7 +2300,7 @@ void SaveSerialData(
} }
void CudaScheduleDepthwiseConv(poly::StageMap stages, void CudaScheduleDepthwiseConv(poly::StageMap stages,
ir::Tensor &output, ir::Tensor &output, // NOLINT
const common::Target &target) { const common::Target &target) {
auto OL = stages[output]->CacheWrite("local", stages, output); auto OL = stages[output]->CacheWrite("local", stages, output);
stages[output]->Bind(0, "blockIdx.x"); stages[output]->Bind(0, "blockIdx.x");
...@@ -2313,9 +2313,9 @@ void CudaScheduleDepthwiseConv(poly::StageMap stages, ...@@ -2313,9 +2313,9 @@ void CudaScheduleDepthwiseConv(poly::StageMap stages,
} }
void CudaScheduleConv(poly::StageMap stages, void CudaScheduleConv(poly::StageMap stages,
ir::Tensor &input_pad, ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, ir::Tensor &weights, // NOLINT
ir::Tensor &output, ir::Tensor &output, // NOLINT
const common::Target &target) { const common::Target &target) {
auto &res = ScheduleParam::get_cuda_instance().GetParam(); auto &res = ScheduleParam::get_cuda_instance().GetParam();
int n = output->shape[0].as_int32(); int n = output->shape[0].as_int32();
...@@ -2382,9 +2382,9 @@ void CudaScheduleConv(poly::StageMap stages, ...@@ -2382,9 +2382,9 @@ void CudaScheduleConv(poly::StageMap stages,
} }
void CudaScheduleConv2(poly::StageMap stages, void CudaScheduleConv2(poly::StageMap stages,
ir::Tensor &input_pad, ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, ir::Tensor &weights, // NOLINT
ir::Tensor &output, ir::Tensor &output, // NOLINT
const common::Target &target, const common::Target &target,
const std::string &key) { const std::string &key) {
auto &res = ScheduleParam::get_cuda_instance().GetParam(); auto &res = ScheduleParam::get_cuda_instance().GetParam();
...@@ -2516,7 +2516,7 @@ void CudaScheduleConv2(poly::StageMap stages, ...@@ -2516,7 +2516,7 @@ void CudaScheduleConv2(poly::StageMap stages,
} }
void CudaScheduleWinogradConv(poly::StageMap wino_stages, void CudaScheduleWinogradConv(poly::StageMap wino_stages,
std::vector<ir::Tensor> &all_tensors, std::vector<ir::Tensor> &all_tensors, // NOLINT
const common::Target &target) { const common::Target &target) {
auto &res = ScheduleParam::get_cuda_instance().GetParam(); auto &res = ScheduleParam::get_cuda_instance().GetParam();
auto &wino_weights_dilation = all_tensors[0]; auto &wino_weights_dilation = all_tensors[0];
......
...@@ -124,7 +124,7 @@ void GetConv2d1x1Factors(absl::flat_hash_map<std::string, int> *factors, ...@@ -124,7 +124,7 @@ void GetConv2d1x1Factors(absl::flat_hash_map<std::string, int> *factors,
void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages, void Conv2d_NCHWc_Schedule_CPU(poly::StageMap stages,
const ir::Tensor &res, const ir::Tensor &res,
ir::Tensor &packed_out, ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad, const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation, const ir::Tensor &weights_dilation,
const ir::Tensor &data, const ir::Tensor &data,
...@@ -138,12 +138,12 @@ void PoolScheduleCPU(poly::StageMap stages, ...@@ -138,12 +138,12 @@ void PoolScheduleCPU(poly::StageMap stages,
const ir::Tensor &output, const ir::Tensor &output,
const common::Target &target); const common::Target &target);
void PoolScheduleGPU(poly::StageMap stages, void PoolScheduleGPU(poly::StageMap stages,
ir::Tensor &output, const ir::Tensor &output,
const common::Target &target); const common::Target &target);
void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages,
const ir::Tensor &res, const ir::Tensor &res,
ir::Tensor &packed_out, ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad, const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation, const ir::Tensor &weights_dilation,
const ir::Tensor &data, const ir::Tensor &data,
...@@ -151,7 +151,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages, ...@@ -151,7 +151,7 @@ void Conv2d_NCHWc_Schedule_CPU_Nofuse(poly::StageMap stages,
void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages,
const ir::Tensor &res, const ir::Tensor &res,
ir::Tensor &packed_out, ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad, const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation, const ir::Tensor &weights_dilation,
const ir::Tensor &data, const ir::Tensor &data,
...@@ -161,7 +161,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages, ...@@ -161,7 +161,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU(poly::StageMap stages,
void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages,
const ir::Tensor &res, const ir::Tensor &res,
ir::Tensor &packed_out, ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad, const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation, const ir::Tensor &weights_dilation,
const ir::Tensor &data, const ir::Tensor &data,
...@@ -170,7 +170,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages, ...@@ -170,7 +170,7 @@ void Conv2d_NCHWc_1X1_Schedule_CPU_Nofuse(poly::StageMap stages,
void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse( void Depthwise_Conv2d_NCHWc_Schedule_CPU_Nofuse(
poly::StageMap stages, poly::StageMap stages,
const ir::Tensor &res, const ir::Tensor &res,
ir::Tensor &packed_out, ir::Tensor &packed_out, // NOLINT
const ir::Tensor &input_pad, const ir::Tensor &input_pad,
const ir::Tensor &weights_dilation, const ir::Tensor &weights_dilation,
const ir::Tensor &data, const ir::Tensor &data,
...@@ -218,23 +218,23 @@ void CudaTwoStepReduceSchedule(poly::StageMap stages, ...@@ -218,23 +218,23 @@ void CudaTwoStepReduceSchedule(poly::StageMap stages,
const common::Target &target); const common::Target &target);
void CudaScheduleDepthwiseConv(poly::StageMap stages, void CudaScheduleDepthwiseConv(poly::StageMap stages,
ir::Tensor &output, ir::Tensor &output, // NOLINT
const common::Target &target); const common::Target &target);
void CudaScheduleConv(poly::StageMap stages, void CudaScheduleConv(poly::StageMap stages,
ir::Tensor &input_pad, ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, ir::Tensor &weights, // NOLINT
ir::Tensor &output, ir::Tensor &output, // NOLINT
const common::Target &target); const common::Target &target);
void CudaScheduleWinogradConv(poly::StageMap wino_stages, void CudaScheduleWinogradConv(poly::StageMap wino_stages,
std::vector<ir::Tensor> &all_tensors, std::vector<ir::Tensor> &all_tensors, // NOLINT
const common::Target &target); const common::Target &target);
void CudaScheduleConv2(poly::StageMap stages, void CudaScheduleConv2(poly::StageMap stages,
ir::Tensor &input_pad, ir::Tensor &input_pad, // NOLINT
ir::Tensor &weights, ir::Tensor &weights, // NOLINT
ir::Tensor &output, ir::Tensor &output, // NOLINT
const common::Target &target, const common::Target &target,
const std::string &key); const std::string &key);
......
...@@ -96,7 +96,7 @@ class ScheduleImpl { ...@@ -96,7 +96,7 @@ class ScheduleImpl {
int write_buffer_index, int write_buffer_index,
const std::string& memory_type); const std::string& memory_type);
void SyncThreads(const Expr& ir_node, bool after_node = true); void SyncThreads(const Expr& ir_node, bool after_node = true);
void SetBuffer(Expr& block, void SetBuffer(Expr& block, // NOLINT
const std::string& memory_type, const std::string& memory_type,
bool fixed = false); bool fixed = false);
Expr Reorder(const std::vector<Expr>& loops); Expr Reorder(const std::vector<Expr>& loops);
...@@ -114,7 +114,7 @@ class ScheduleImpl { ...@@ -114,7 +114,7 @@ class ScheduleImpl {
Expr Rfactor(const Expr& rf_loop, int rf_axis); Expr Rfactor(const Expr& rf_loop, int rf_axis);
Expr AddUnitLoop(const Expr& block) const; Expr AddUnitLoop(const Expr& block) const;
void Annotate(const Expr& block, const std::string& key, const attr_t& value); void Annotate(const Expr& block, const std::string& key, const attr_t& value);
void Unannotate(Expr& block, const std::string& key); void Unannotate(Expr& block, const std::string& key); // NOLINT
void FlattenLoops(const std::vector<Expr>& loops, void FlattenLoops(const std::vector<Expr>& loops,
const bool force_flat = false); const bool force_flat = false);
void CopyTransformAndLoopInfo(const Expr& block, const Expr& block_target); void CopyTransformAndLoopInfo(const Expr& block, const Expr& block_target);
......
...@@ -247,9 +247,9 @@ class IRSchedule { ...@@ -247,9 +247,9 @@ class IRSchedule {
* \param memory_type The memory type we want to set. Should be "local", * \param memory_type The memory type we want to set. Should be "local",
* "shared" or "global". * "shared" or "global".
*/ */
void SetBuffer(Expr& block, void SetBuffer(Expr& block, // NOLINT
const std::string& memory_type, const std::string& memory_type,
bool fixed = false); bool fixed = false); // NOLINT
/** /**
* \brief Reorder the loops in the order of vector. * \brief Reorder the loops in the order of vector.
...@@ -391,7 +391,7 @@ class IRSchedule { ...@@ -391,7 +391,7 @@ class IRSchedule {
* \param block The block to be unannotated * \param block The block to be unannotated
* \param key The attribute key * \param key The attribute key
*/ */
void Unannotate(Expr& block, const std::string& key); void Unannotate(Expr& block, const std::string& key); // NOLINT
/*! /*!
* \brief flatten the loops in one dim. * \brief flatten the loops in one dim.
...@@ -620,7 +620,7 @@ class LeafBlockRemovalPlan : public ir::IRMutator<> { ...@@ -620,7 +620,7 @@ class LeafBlockRemovalPlan : public ir::IRMutator<> {
class ComputeInlineChecker : public ir::IRMutator<> { class ComputeInlineChecker : public ir::IRMutator<> {
public: public:
ComputeInlineChecker(IRSchedule& schedule, Expr& block) ComputeInlineChecker(IRSchedule& schedule, Expr& block) // NOLINT
: ir_schedule_(schedule), block_(block) {} : ir_schedule_(schedule), block_(block) {}
bool Check(); bool Check();
......
...@@ -503,7 +503,7 @@ Expr MakeCacheBlock(const std::vector<IterRange>& buffer_ranges, ...@@ -503,7 +503,7 @@ Expr MakeCacheBlock(const std::vector<IterRange>& buffer_ranges,
return block; return block;
} }
void FindInsertionPoint(Expr& root, CacheBlockInfo* info, bool is_write) { void FindInsertionPoint(const Expr& root, CacheBlockInfo* info, bool is_write) {
Expr find_tensor = Expr find_tensor =
is_write ? Expr(info->write_tensor) : Expr(info->read_tensor); is_write ? Expr(info->write_tensor) : Expr(info->read_tensor);
auto find_produce_read = auto find_produce_read =
...@@ -651,7 +651,7 @@ Expr ConstructOtherStmtChain(const std::vector<Expr>& stmts, ...@@ -651,7 +651,7 @@ Expr ConstructOtherStmtChain(const std::vector<Expr>& stmts,
Expr ConstructNewLoopChain(const std::vector<Expr>& chain, Expr ConstructNewLoopChain(const std::vector<Expr>& chain,
const std::vector<Expr>& ordered_loops, const std::vector<Expr>& ordered_loops,
const std::set<Expr, CompExpr>& loop_set, const std::set<Expr, CompExpr>& loop_set,
std::vector<Expr>& if_nodes) { std::vector<Expr>& if_nodes) { // NOLINT
std::vector<std::set<std::string>> condition_vars; std::vector<std::set<std::string>> condition_vars;
// In each IfThenElse node, find the vars its condition depends on. // In each IfThenElse node, find the vars its condition depends on.
for (auto& if_expr : if_nodes) { for (auto& if_expr : if_nodes) {
...@@ -923,7 +923,7 @@ void CheckComputeAtValidation(const Expr& block, ...@@ -923,7 +923,7 @@ void CheckComputeAtValidation(const Expr& block,
CHECK(find_block_in_loop.empty()) << "loop should not be block's ancestor!"; CHECK(find_block_in_loop.empty()) << "loop should not be block's ancestor!";
} }
void InsertBlock(Expr& for_loop, const Expr& insertion, int index) { void InsertBlock(Expr& for_loop, const Expr& insertion, int index) { // NOLINT
CHECK(for_loop.As<ir::For>()); CHECK(for_loop.As<ir::For>());
CHECK(for_loop.As<ir::For>()->body.As<Block>()); CHECK(for_loop.As<ir::For>()->body.As<Block>());
ir::Block* dst_block = for_loop.As<ir::For>()->body.As<Block>(); ir::Block* dst_block = for_loop.As<ir::For>()->body.As<Block>();
......
...@@ -326,7 +326,7 @@ Expr MakeCacheBlock(const std::vector<IterRange>& buffer_ranges, ...@@ -326,7 +326,7 @@ Expr MakeCacheBlock(const std::vector<IterRange>& buffer_ranges,
* @param info The information of cache block. * @param info The information of cache block.
* @param is_write Are we inserting a write cache tensor or a read cache tensor. * @param is_write Are we inserting a write cache tensor or a read cache tensor.
*/ */
void FindInsertionPoint(Expr& root, CacheBlockInfo* info, bool is_write); void FindInsertionPoint(const Expr& root, CacheBlockInfo* info, bool is_write);
/** /**
* \brief Given a vector of For loops, return a set of them. * \brief Given a vector of For loops, return a set of them.
...@@ -359,7 +359,7 @@ std::vector<Expr> GetLoopsInRange(const Expr& top, const Expr& bottom); ...@@ -359,7 +359,7 @@ std::vector<Expr> GetLoopsInRange(const Expr& top, const Expr& bottom);
Expr ConstructNewLoopChain(const std::vector<Expr>& chain, Expr ConstructNewLoopChain(const std::vector<Expr>& chain,
const std::vector<Expr>& ordered_loops, const std::vector<Expr>& ordered_loops,
const std::set<Expr, CompExpr>& loop_set, const std::set<Expr, CompExpr>& loop_set,
std::vector<Expr>& if_nodes); std::vector<Expr>& if_nodes); // NOLINT
/*! /*!
* \brief Find producers of block in root. * \brief Find producers of block in root.
...@@ -395,7 +395,9 @@ void CheckComputeAtValidation(const Expr& block, ...@@ -395,7 +395,9 @@ void CheckComputeAtValidation(const Expr& block,
* - `index = -1` means inserted into the tail * - `index = -1` means inserted into the tail
* - otherwise, it should be a index between [0, stmts size) * - otherwise, it should be a index between [0, stmts size)
*/ */
void InsertBlock(Expr& for_loop, const Expr& insertion, int index = 0); void InsertBlock(Expr& for_loop, // NOLINT
const Expr& insertion,
int index = 0); // NOLINT
/*! /*!
* \brief Make a union of two range. The detailed function is : * \brief Make a union of two range. The detailed function is :
......
...@@ -49,7 +49,7 @@ void CheckNoIslCallRemains(Expr* expr) { ...@@ -49,7 +49,7 @@ void CheckNoIslCallRemains(Expr* expr) {
} }
} }
void BindBuffer(StageMap& stages) { void BindBuffer(StageMap& stages) { // NOLINT
absl::flat_hash_map<std::string, ir::_Tensor_*> tensor_map; absl::flat_hash_map<std::string, ir::_Tensor_*> tensor_map;
for (auto& stage : stages) { for (auto& stage : stages) {
tensor_map[stage.second->tensor()->name] = stage.second->tensor(); tensor_map[stage.second->tensor()->name] = stage.second->tensor();
...@@ -71,11 +71,11 @@ void BindBuffer(StageMap& stages) { ...@@ -71,11 +71,11 @@ void BindBuffer(StageMap& stages) {
} }
} }
Expr LowerGroup( Expr LowerGroup(const poly::ScheduleGroup& group,
const poly::ScheduleGroup& group,
const std::map<std::string, Expr>& tuple_to_expr, const std::map<std::string, Expr>& tuple_to_expr,
std::map<std::string, ir::Tensor>* global_tensor_map, std::map<std::string, ir::Tensor>* global_tensor_map,
std::unordered_map<std::string, std::vector<Expr>>& resized_buffer_cache, std::unordered_map<std::string, std::vector<Expr>>&
resized_buffer_cache, // NOLINT
StageMap stage_map, StageMap stage_map,
ir::CudaAxisInfo* cuda_axis_info) { ir::CudaAxisInfo* cuda_axis_info) {
BindBuffer(stage_map); BindBuffer(stage_map);
......
...@@ -69,7 +69,7 @@ void CheckNoIslCallRemains(const Expr* expr); ...@@ -69,7 +69,7 @@ void CheckNoIslCallRemains(const Expr* expr);
Expr LowerGroup(const poly::ScheduleGroup& group, Expr LowerGroup(const poly::ScheduleGroup& group,
const std::map<std::string, Expr>& tuple_to_expr, const std::map<std::string, Expr>& tuple_to_expr,
std::map<std::string, Tensor>* global_tensor_map, std::map<std::string, Tensor>* global_tensor_map,
std::unordered_set<std::string>& resized_buffer, std::unordered_set<std::string>& resized_buffer, // NOLINT
StageMap stage_map, StageMap stage_map,
ir::CudaAxisInfo* cuda_axis_info = nullptr); ir::CudaAxisInfo* cuda_axis_info = nullptr);
......
...@@ -55,7 +55,8 @@ void PartialSimplify( ...@@ -55,7 +55,8 @@ void PartialSimplify(
//! Simplify the expression but Load. //! Simplify the expression but Load.
struct SimplifyButStoreLoadMutator : public ir::IRMutator<ir::Expr*> { struct SimplifyButStoreLoadMutator : public ir::IRMutator<ir::Expr*> {
common::cas_intervals_t& var_intervals; common::cas_intervals_t& var_intervals;
explicit SimplifyButStoreLoadMutator(common::cas_intervals_t& var_intervals) explicit SimplifyButStoreLoadMutator(
common::cas_intervals_t& var_intervals) // NOLINT
: var_intervals(var_intervals) {} : var_intervals(var_intervals) {}
void operator()(Expr* x) { ir::IRMutator<ir::Expr*>::Visit(x, x); } void operator()(Expr* x) { ir::IRMutator<ir::Expr*>::Visit(x, x); }
......
...@@ -177,7 +177,7 @@ bool IsBetween(const common::GraphNode* x, ...@@ -177,7 +177,7 @@ bool IsBetween(const common::GraphNode* x,
return false; return false;
} }
std::vector<Group> TopoSortGroups(std::vector<Group>& groups) { std::vector<Group> TopoSortGroups(std::vector<Group>& groups) { // NOLINT
// collect indegree. // collect indegree.
absl::flat_hash_map<Group*, int> group_indegree; absl::flat_hash_map<Group*, int> group_indegree;
std::vector<Group*> start_groups; std::vector<Group*> start_groups;
......
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
namespace cinn { namespace cinn {
namespace poly { namespace poly {
void RemoveDuplicate(std::vector<std::vector<Expr>> &indices) { void RemoveDuplicate(std::vector<std::vector<Expr>> &indices) { // NOLINT
std::set<std::string> temp; std::set<std::string> temp;
for (int i = 0; i < indices.size(); i++) { for (int i = 0; i < indices.size(); i++) {
std::string index_str = ""; std::string index_str = "";
...@@ -309,7 +309,7 @@ int Minus(const Expr &a, const Expr &b) { ...@@ -309,7 +309,7 @@ int Minus(const Expr &a, const Expr &b) {
} }
// Return the range = max - min among all indices[i][axis](i = 0,1,2,...) // Return the range = max - min among all indices[i][axis](i = 0,1,2,...)
int GetRange(std::vector<std::vector<Expr>> &indices, int axis) { int GetRange(std::vector<std::vector<Expr>> &indices, int axis) { // NOLINT
Expr max_expr = indices[0][axis]; Expr max_expr = indices[0][axis];
Expr min_expr = indices[0][axis]; Expr min_expr = indices[0][axis];
for (auto i = 1; i < indices.size(); i++) { for (auto i = 1; i < indices.size(); i++) {
...@@ -1420,7 +1420,7 @@ struct CacheReplaceMutator : public ir::IRMutator<> { ...@@ -1420,7 +1420,7 @@ struct CacheReplaceMutator : public ir::IRMutator<> {
}; };
} // namespace } // namespace
void CacheReadWriteReplace(std::vector<ir::Tensor> &readers, void CacheReadWriteReplace(const std::vector<ir::Tensor> &readers,
ir::Tensor cache_tensor, ir::Tensor cache_tensor,
std::string origin_tensor_name) { std::string origin_tensor_name) {
for (auto k : readers) { for (auto k : readers) {
......
...@@ -270,7 +270,7 @@ class Stage : public Object { ...@@ -270,7 +270,7 @@ class Stage : public Object {
* @param readers the readers of the \p tensor * @param readers the readers of the \p tensor
*/ */
ir::Tensor CacheRead(const std::string& memory_type, ir::Tensor CacheRead(const std::string& memory_type,
std::vector<ir::Tensor>& readers, std::vector<ir::Tensor>& readers, // NOLINT
poly::StageMap stages); poly::StageMap stages);
/** /**
...@@ -303,7 +303,8 @@ class Stage : public Object { ...@@ -303,7 +303,8 @@ class Stage : public Object {
void ShowISL() const; void ShowISL() const;
void AddForLoopInTransform(std::vector<std::vector<Expr>>& indices); void AddForLoopInTransform(
std::vector<std::vector<Expr>>& indices); // NOLINT
/** /**
* Create a cache for write to the original tensor. * Create a cache for write to the original tensor.
* @param tensor the tensor to create the cache for. * @param tensor the tensor to create the cache for.
...@@ -312,7 +313,7 @@ class Stage : public Object { ...@@ -312,7 +313,7 @@ class Stage : public Object {
*/ */
ir::Tensor CacheWrite(const std::string& memory_type, ir::Tensor CacheWrite(const std::string& memory_type,
poly::StageMap stages, poly::StageMap stages,
ir::Tensor& key_tensor); ir::Tensor& key_tensor); // NOLINT
/** /**
* Generate the `syncthreads()` code to sync all threads on CUDA backends. * Generate the `syncthreads()` code to sync all threads on CUDA backends.
......
...@@ -236,12 +236,12 @@ inline auto __binary_op_fn_dispatch(T1 x, T2 y, F fn, std::false_type) { ...@@ -236,12 +236,12 @@ inline auto __binary_op_fn_dispatch(T1 x, T2 y, F fn, std::false_type) {
template <typename T1, typename T2, typename F> template <typename T1, typename T2, typename F>
inline void __binary_op_visitor_dispatch( inline void __binary_op_visitor_dispatch(
CINNValue &v, T1 lhs, T2 rhs, F fn, std::true_type) { CINNValue &v, T1 lhs, T2 rhs, F fn, std::true_type) { // NOLINT
v = CINNValue(); v = CINNValue();
} }
template <typename T1, typename T2, typename F> template <typename T1, typename T2, typename F>
inline void __binary_op_visitor_dispatch( inline void __binary_op_visitor_dispatch(
CINNValue &v, T1 lhs, T2 rhs, F fn, std::false_type) { CINNValue &v, T1 lhs, T2 rhs, F fn, std::false_type) { // NOLINT
v.Set(fn(lhs, rhs)); v.Set(fn(lhs, rhs));
} }
......
...@@ -500,25 +500,25 @@ __host__ __device__ inline float16 operator-(const float16& a) { ...@@ -500,25 +500,25 @@ __host__ __device__ inline float16 operator-(const float16& a) {
#endif #endif
} }
__host__ __device__ inline float16& operator+=(float16& a, __host__ __device__ inline float16& operator+=(float16& a, // NOLINT
const float16& b) { // NOLINT const float16& b) { // NOLINT
a = a + b; a = a + b;
return a; return a;
} }
__host__ __device__ inline float16& operator-=(float16& a, __host__ __device__ inline float16& operator-=(float16& a, // NOLINT
const float16& b) { // NOLINT const float16& b) { // NOLINT
a = a - b; a = a - b;
return a; return a;
} }
__host__ __device__ inline float16& operator*=(float16& a, __host__ __device__ inline float16& operator*=(float16& a, // NOLINT
const float16& b) { // NOLINT const float16& b) { // NOLINT
a = a * b; a = a * b;
return a; return a;
} }
__host__ __device__ inline float16& operator/=(float16& a, __host__ __device__ inline float16& operator/=(float16& a, // NOLINT
const float16& b) { // NOLINT const float16& b) { // NOLINT
a = a / b; a = a / b;
return a; return a;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册