From 3a387df62ebe84719e9837f1bebb05701b013c3a Mon Sep 17 00:00:00 2001 From: Wilber Date: Thu, 8 Dec 2022 10:39:50 +0800 Subject: [PATCH] [Inference] inference add cinn interface (#48741) --- .../framework/paddle2cinn/build_cinn_pass.cc | 28 ++++++++++++++--- paddle/fluid/inference/analysis/argument.h | 3 ++ .../inference/analysis/ir_pass_manager.cc | 2 ++ paddle/fluid/inference/api/analysis_config.cc | 31 ++++++++++++++++++- .../fluid/inference/api/analysis_predictor.cc | 30 +++++++++++------- .../inference/api/paddle_analysis_config.h | 16 ++++++++++ .../inference/api/paddle_pass_builder.cc | 7 +++++ .../fluid/inference/api/paddle_pass_builder.h | 3 ++ 8 files changed, 104 insertions(+), 16 deletions(-) mode change 100755 => 100644 paddle/fluid/inference/api/analysis_config.cc mode change 100755 => 100644 paddle/fluid/inference/api/analysis_predictor.cc diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc index e97a56a743..f7306bfc9a 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc @@ -484,7 +484,8 @@ void AnalyseClusterVariables( const std::unordered_set& deny_var_set, GraphNodeSet* cluster_inputs, GraphNodeSet* cluster_outputs, - GraphNodeSet* cluster_internals) { + GraphNodeSet* cluster_internals, + bool is_inference_stage) { // collecting all input and output of op for (auto* op_node : cluster) { const auto& op_name = op_node->Name(); @@ -523,6 +524,18 @@ void AnalyseClusterVariables( for (auto* var_node : *cluster_internals) { cluster_outputs->erase(var_node); } + + if (is_inference_stage) { + // If part of the output of the Op is not used by other operators, change it + // to internal. such as transpose2 op's XShape out. + auto outs = *cluster_outputs; + for (auto* node : outs) { + if (node->outputs.empty()) { + cluster_outputs->erase(node); + cluster_internals->insert(node); + } + } + } } void AddLinkToCinnOp(const GraphNodeSet& cluster_inputs, @@ -611,7 +624,7 @@ void ReplaceSubGraphWithCinnOpNode( // Here we using SubgraphDetector to detecte the subgraph that // all of op node supported by CINN. We using OpMapperRegistry // to check whether the op node supported by CINN. -void SearchAllSubgraphs(Graph* graph) { +void SearchAllSubgraphs(Graph* graph, bool is_inference_stage) { auto allow_ops = StringSplit(FLAGS_allow_cinn_ops, kDelim); auto deny_ops = StringSplit(FLAGS_deny_cinn_ops, kDelim); OpTransInfo trans_info; @@ -671,7 +684,8 @@ void SearchAllSubgraphs(Graph* graph) { deny_var_set, &cluster_inputs, &cluster_outputs, - &cluster_internals); + &cluster_internals, + is_inference_stage); VLOG(4) << "Cluster Ops: " << cluster_debug_info(cluster_set); VLOG(4) << "Cluster input vars: " << cluster_debug_info(cluster_inputs); @@ -698,7 +712,13 @@ void SearchAllSubgraphs(Graph* graph) { } } // namespace -void BuildCinnPass::ApplyImpl(Graph* graph) const { SearchAllSubgraphs(graph); } +void BuildCinnPass::ApplyImpl(Graph* graph) const { + bool is_inference_stage{false}; + if (Has("is_inference_stage")) { + is_inference_stage = Get("is_inference_stage"); + } + SearchAllSubgraphs(graph, is_inference_stage); +} } // namespace paddle2cinn } // namespace framework diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 8750a9afb4..a72c1fe762 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -368,6 +368,9 @@ struct Argument { DECL_ARGUMENT_FIELD(enable_gpu_half, EnableGPUHalf, bool); DECL_ARGUMENT_FIELD(mixed_precision_mode, MixedPrecisionMode, int); + // cinn compiler related + DECL_ARGUMENT_FIELD(use_cinn_compiler, UseCinnCompiler, bool); + private: std::unordered_set valid_fields_; }; diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index cbcc48a7f6..25b371cb2f 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -235,6 +235,8 @@ void IRPassManager::CreatePasses(Argument *argument, new framework::ProgramDesc *(&argument->main_program())); } else if (pass_name == "memory_optimize_pass") { pass->Set("root_predictor_id", new int(argument->root_predictor_id())); + } else if (pass_name == "build_cinn_pass") { + pass->Set("is_inference_stage", new bool(argument->use_cinn_compiler())); } if (pass_name == "lite_subgraph_pass") { bool lite_enable_int8 = diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc old mode 100755 new mode 100644 index c5e648dffc..17afc4f840 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -477,6 +477,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // profile related. CP_MEMBER(with_profile_); + // cinn compiler related. + CP_MEMBER(use_cinn_compiler_); + // glog related. CP_MEMBER(with_glog_info_); @@ -542,7 +545,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { #undef CP_MEMBER Update(); - if (use_tensorrt_) { + if (use_tensorrt_ || use_cinn_compiler_) { // Update() will reset all the passes, when some tensorRT pass is deleted in // other.pass_builder(), it will set again, so we just remove the // deleted_pass. @@ -872,6 +875,14 @@ void AnalysisConfig::Update() { } } + // TODO(wilber): An ugly method to update pass, need to be fixed. + if (use_cinn_compiler_) { + pass_builder()->ClearPasses(); + for (const auto &pass : kCINNCompilerPasses) { + pass_builder()->AppendPass(pass); + } + } + if (use_dlnne_) { pass_builder()->ClearPasses(); for (const auto &pass : kDlnneSubgraphPasses) { @@ -1316,6 +1327,9 @@ std::string AnalysisConfig::Summary() { os.InsertRow({"use_lite", use_lite_ ? "true" : "false"}); } + // cinn compiler + os.InsertRow({"use_cinn_compiler", use_cinn_compiler_ ? "true" : "false"}); + // ir info os.InsertRow({"ir_optim", enable_ir_optim_ ? "true" : "false"}); os.InsertRow({"ir_debug", ir_debug_ ? "true" : "false"}); @@ -1429,4 +1443,19 @@ void AnalysisConfig::Exp_DisableMixedInferOps( mixed_black_list_ = black_list; } +void AnalysisConfig::Exp_EnableCINNCompiler() { +#ifdef PADDLE_WITH_CINN + use_cinn_compiler_ = true; + Update(); +#else + PADDLE_THROW(platform::errors::Unavailable( + "You tried to use CINN compiler, but Paddle was not compiled " + "with CINN.")); +#endif +} + +bool AnalysisConfig::cinn_compiler_enabled() const { + return use_cinn_compiler_; +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc old mode 100755 new mode 100644 index bf89db83dd..13dba59492 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1217,6 +1217,10 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetMKLDNNEnabledOpTypes(config_.mkldnn_enabled_op_types_); } + if (config_.use_cinn_compiler_) { + argument_.SetUseCinnCompiler(config_.use_cinn_compiler_); + } + #ifdef PADDLE_WITH_MKLDNN if (config_.mkldnn_quantizer_enabled()) { LOG(INFO) << "Quantization is enabled"; @@ -1239,21 +1243,25 @@ void AnalysisPredictor::PrepareArgument() { #endif auto *pass_builder = config_.pass_builder(); + // TODO(inference): Need to reconstruct the pass_builder, pass should be + // processed in a single if (model_precision_ != phi::DataType::FLOAT32) { LOG(INFO) << "Model is mixed precision type with " << model_precision_ << ", we will use a new PassStrategy. Note that only the GPU " "backend is supported for now."; - pass_builder->ClearPasses(); - const auto &deleted_passes = pass_builder->GetAllDeletedPasses(); - if (config_.tensorrt_engine_enabled()) { - for (const auto &pass : kTrtLowerPrecisionPasses) { - if (deleted_passes.count(pass)) continue; - pass_builder->AppendPass(pass); - } - } else if (config_.use_gpu()) { - for (const auto &pass : kGpuLowerPrecisionPasses) { - if (deleted_passes.count(pass)) continue; - pass_builder->AppendPass(pass); + if (!config_.use_cinn_compiler_) { + pass_builder->ClearPasses(); + const auto &deleted_passes = pass_builder->GetAllDeletedPasses(); + if (config_.tensorrt_engine_enabled()) { + for (const auto &pass : kTrtLowerPrecisionPasses) { + if (deleted_passes.count(pass)) continue; + pass_builder->AppendPass(pass); + } + } else if (config_.use_gpu()) { + for (const auto &pass : kGpuLowerPrecisionPasses) { + if (deleted_passes.count(pass)) continue; + pass_builder->AppendPass(pass); + } } } } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 5521caee9f..5bf5d3de7b 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -1016,6 +1016,19 @@ struct PD_INFER_DECL AnalysisConfig { void SetSkipLoadParams(bool value) { skip_load_params_ = value; } + /// + /// \brief Enable use cinn compiler optimization. + /// + void Exp_EnableCINNCompiler(); + + /// + /// \brief A boolean state telling whether the CINN compiler optimization is + /// turned on. + /// + /// \return bool Whether the CINN compiler optimization is turned on. + /// + bool cinn_compiler_enabled() const; + protected: // Update the config. void Update(); @@ -1143,6 +1156,9 @@ struct PD_INFER_DECL AnalysisConfig { Precision lite_precision_mode_; bool lite_zero_copy_; + // CINN compiler related. + bool use_cinn_compiler_{false}; + // XPU related. bool use_xpu_{false}; int xpu_device_id_{0}; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 4ac9123112..4e397fbd04 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -204,6 +204,13 @@ const std::vector kTrtLowerPrecisionPasses{ "tensorrt_subgraph_pass", }; +const std::vector kCINNCompilerPasses{ + "gpu_cpu_map_matmul_v2_to_mul_pass", + "gpu_cpu_map_matmul_v2_to_matmul_pass", + "gpu_cpu_map_matmul_to_mul_pass", + "build_cinn_pass", +}; + GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { passes_.assign({ // "identity_scale_op_clean_pass", // diff --git a/paddle/fluid/inference/api/paddle_pass_builder.h b/paddle/fluid/inference/api/paddle_pass_builder.h index 0990a61da3..8dea84400e 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.h +++ b/paddle/fluid/inference/api/paddle_pass_builder.h @@ -349,6 +349,9 @@ PD_INFER_DECL extern const std::vector kDlnneSubgraphPasses; /// \brief List of lite subgraph passes. PD_INFER_DECL extern const std::vector kLiteSubgraphPasses; +/// \brief List of cinn compiler passes. +PD_INFER_DECL extern const std::vector kCINNCompilerPasses; + /// \brief TODO(inference): Most of the existing pass fusion operators do not /// support fp16/bf16 precision, temporarily use low precision pass to prevent /// running errors. After fusion operator supports low precision, delete this. -- GitLab