From d70104e5358cfd788730aea89ea667a5c6f10b29 Mon Sep 17 00:00:00 2001
From: TeFeng Chen <ctfeng66@163.com>
Date: Wed, 20 Apr 2022 09:50:36 +0800
Subject: [PATCH] enable auto-tune when using cinn (#41795)

* optimize preparation overhead before executing cinn compiled program

* update code notes

* fix flag annotation

* enable auto-tune when using CINN

* update cinn commit tag

* skip test

* fix lacking header file
---
 cmake/external/cinn.cmake                        |  3 +--
 .../fluid/framework/paddle2cinn/cinn_compiler.cc | 16 +++++++++++++++-
 .../fluid/framework/paddle2cinn/cinn_compiler.h  |  5 +++++
 .../operators/cinn/cinn_launch_context_test.cc   |  1 +
 .../fluid/operators/cinn/cinn_launch_op_test.cc  |  9 +++++++++
 5 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/cmake/external/cinn.cmake b/cmake/external/cinn.cmake
index cd4e0157f2a..1ca029b3add 100644
--- a/cmake/external/cinn.cmake
+++ b/cmake/external/cinn.cmake
@@ -26,7 +26,7 @@ add_definitions(-w)
 ######################################
 include(ExternalProject)
 set(CINN_PREFIX_DIR ${THIRD_PARTY_PATH}/CINN)
-set(CINN_GIT_TAG 1fd85187b6c18da4dd51f22619d093ef08d61b01)
+set(CINN_GIT_TAG 08d7680dd91dfaa65787969050eb8f1143654f10)
 set(CINN_OPTIONAL_ARGS -DPY_VERSION=${PY_VERSION}
                        -DWITH_CUDA=${WITH_GPU}
                        -DWITH_CUDNN=${WITH_GPU}
@@ -85,4 +85,3 @@ add_library(cinn SHARED IMPORTED GLOBAL)
 set_target_properties(cinn PROPERTIES IMPORTED_LOCATION "${CINN_LIB_LOCATION}/${CINN_LIB_NAME}")
 include_directories(${CINN_INCLUDE_DIR})
 add_dependencies(cinn external_cinn)
-
diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
index 83a5b6f8213..67393c288df 100644
--- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
+++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc
@@ -21,6 +21,8 @@
 #include <string>
 #include <unordered_map>
 
+#include "cinn/auto_schedule/auto_tuner.h"
+#include "cinn/auto_schedule/tuning.h"
 #include "cinn/common/target.h"
 #include "cinn/common/type.h"
 #include "cinn/frontend/decomposer/use_decomposer.h"
@@ -48,6 +50,7 @@
 #include "paddle/phi/core/utils/rw_lock.h"
 
 DECLARE_bool(enable_pe_launch_cinn);
+DECLARE_bool(enable_cinn_auto_tune);
 namespace paddle {
 namespace framework {
 namespace paddle2cinn {
@@ -58,6 +61,7 @@ using inference::analysis::Dot;
 using ::cinn::common::Target;
 using ::cinn::common::Float;
 using ::cinn::hlir::framework::GraphCompiler;
+using ::cinn::auto_schedule::AutoTuner;
 using ::cinn::hlir::framework::BuildScope;
 using ::cinn::frontend::ProgramPass;
 using ::cinn::hlir::framework::ApplyPass;
@@ -277,10 +281,20 @@ std::unique_ptr<CinnCompiledObject> CinnCompiler::CompileGraph(
   if (!FLAGS_enable_pe_launch_cinn) {
     options.with_buffer_handle_instruction_inserted = true;
   }
+  std::unique_ptr<AutoTuner> auto_tuner;
+  if (FLAGS_enable_cinn_auto_tune) {
+    VLOG(4) << "Compile with auto-tune";
+    auto_tuner = std::make_unique<AutoTuner>(target, cinn_graph.get());
+    auto_tuner->Initialize(AutoTuner::Config(), graph_compiler.get());
+    ::cinn::auto_schedule::TuningOptions tuning_options;
+    tuning_options.num_measure_trials = 0;
+    auto tuning_result = auto_tuner->Tune(tuning_options);
+    options.Apply(tuning_result);
+  }
   auto compiled_res =
       graph_compiler->Build(options, std::move(fetch_ids), stream);
   auto compiled_obj = std::make_unique<CinnCompiledObject>();
-  *compiled_obj = {std::move(graph_compiler),
+  *compiled_obj = {std::move(graph_compiler), std::move(auto_tuner),
                    std::move(compiled_res.runtime_program), scope,
                    symbol.var_model_to_program_map()};
   compiled_obj->cached_index = compiled_num;
diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h
index cf17e68156b..7e5df6faf08 100644
--- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h
+++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h
@@ -37,6 +37,10 @@ class GraphCompiler;
 class Program;
 class Scope;
 }  // namespace hlir::framework
+
+namespace auto_schedule {
+class AutoTuner;
+}  // namespace auto_schedule
 }  // namespace cinn
 
 namespace paddle {
@@ -49,6 +53,7 @@ namespace paddle2cinn {
 
 struct CinnCompiledObject {
   std::unique_ptr<::cinn::hlir::framework::GraphCompiler> compiler;
+  std::unique_ptr<::cinn::auto_schedule::AutoTuner> auto_tuner;
   std::unique_ptr<::cinn::hlir::framework::Program> runtime_program;
   std::shared_ptr<::cinn::hlir::framework::Scope> scope;
   std::unordered_map<std::string, std::string> paddle2cinn_varmap;
diff --git a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
index 15ea9a6926a..ecbfbf2f92e 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 #include <memory>
 #include <set>
 #include <utility>
+#include "cinn/auto_schedule/auto_tuner.h"
 #include "cinn/common/target.h"
 #include "cinn/common/type.h"
 #include "cinn/hlir/framework/graph_compiler.h"
diff --git a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
index 4f922945eae..b0bd043f432 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
@@ -33,6 +33,7 @@ USE_OP(cinn_instruction_run);
 USE_OP_ITSELF(elementwise_add);
 DECLARE_double(eager_delete_tensor_gb);
 DECLARE_bool(enable_pe_launch_cinn);
+DECLARE_bool(enable_cinn_auto_tune);
 
 PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
 #ifdef PADDLE_WITH_CUDA
@@ -107,6 +108,14 @@ TEST_F(TestCinnLaunchOp, TestRunInstructionByCinnProgram) {
 #endif
 }
 
+TEST_F(TestCinnLaunchOp, TestRunWithAutoTuneEnabled) {
+  FLAGS_enable_cinn_auto_tune = true;
+
+  // currently only check on cpu, will add a test for gpu after CINN ready
+  RunAndCheck(platform::CPUPlace());
+  RunAndCheck(platform::CPUPlace());
+}
+
 namespace details {
 // Testing helper function used on CinnLaunchOpKernel in the following:
 // firstly build test data, then check both expected and illegal situations
-- 
GitLab