diff --git a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
index 4e0359144c2838c2deb1a7ce0bc8be2dfa759fc0..651ebf4c4379940319fd2affd3f264672e5a7f1d 100644
--- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
@@ -622,7 +622,8 @@ void BuildOpFuncList(const platform::Place& place,
         // NOTE(Ruibiao): We do not encourage directly using scope in OP kernel.
         // But some OPs do have such behavior (e.g., cinn_launch OP). Here
         // special treatment for them.
-        if (op_with_kernel->Type() == "cinn_launch") {
+        if (op_with_kernel->Type() == "cinn_launch" ||
+            op_with_kernel->Type() == "cinn_instruction_run") {
           VLOG(6) << "OP(" << op_with_kernel->Type()
                   << ") use scope in kernel, "
                      "so pass a real scope to "
diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc
index 4642a684663b64296e373b59b0f241659a24cd73..5ad12071bd3c2d22468cce2155ba97ccec444293 100644
--- a/paddle/fluid/framework/new_executor/interpretercore.cc
+++ b/paddle/fluid/framework/new_executor/interpretercore.cc
@@ -108,7 +108,8 @@ InterpreterCore::InterpreterCore(const platform::Place& place,
                                  const std::set<std::string>& skip_gc_vars,
                                  framework::Scope* scope,
                                  bool used_for_jit,
-                                 bool used_for_control_flow_op)
+                                 bool used_for_control_flow_op,
+                                 bool used_for_cinn)
     : place_(place),
       block_(block),
       execution_config_(place, block.OpSize()),
@@ -121,9 +122,9 @@ InterpreterCore::InterpreterCore(const platform::Place& place,
 
   execution_config_.used_for_jit = used_for_jit;
   execution_config_.used_for_control_flow_op = used_for_control_flow_op;
-  execution_config_.create_local_scope = !used_for_jit &&
-                                         FLAGS_new_executor_use_local_scope &&
-                                         !used_for_control_flow_op;
+  execution_config_.create_local_scope =
+      !used_for_jit && FLAGS_new_executor_use_local_scope &&
+      !used_for_control_flow_op && !used_for_cinn;
   execution_config_.skip_gc_vars = skip_gc_vars;
   execution_config_.Log(/*log_level=*/8);
 
@@ -425,8 +426,9 @@ void InterpreterCore::BuildAndCacheInstructionCtx(Instruction* instr_node) {
   }
 
   // set runtime_ctx and infershape_ctx_
-  if (instr_node->OpBase()->Type() == "cinn_launch") {  // OP use scope in
-                                                        // kernel
+  if (instr_node->OpBase()->Type() == "cinn_launch" ||
+      instr_node->OpBase()->Type() == "cinn_instruction_run") {  // OP use scope
+                                                                 // in kernel
     Scope* local_scope = HasLocalScope() ? var_scope_.GetMutableLocalScope()
                                          : var_scope_.GetMutableScope();
     instr_node->ResetContextWithScope(ins_map, outs_map, *local_scope);
diff --git a/paddle/fluid/framework/new_executor/interpretercore.h b/paddle/fluid/framework/new_executor/interpretercore.h
index 80db521d60d04acbef55c16ca0bae20c5d31fd6e..a09942387a95c6474d9a3b56c4c8a5f6d50a0a54 100644
--- a/paddle/fluid/framework/new_executor/interpretercore.h
+++ b/paddle/fluid/framework/new_executor/interpretercore.h
@@ -46,7 +46,8 @@ class InterpreterCore {
                   const std::set<std::string>& skip_gc_vars,
                   Scope* scope,
                   bool used_for_jit = false,
-                  bool used_for_control_flow_op = false);
+                  bool used_for_control_flow_op = false,
+                  bool used_for_cinn = false);
 
   ~InterpreterCore();
 
diff --git a/paddle/fluid/operators/cinn/CMakeLists.txt b/paddle/fluid/operators/cinn/CMakeLists.txt
index e4063436c03363269f8593428c87bef0c374008c..b0b084c6928b0d5b4951c9e5ab985dc7b442f347 100644
--- a/paddle/fluid/operators/cinn/CMakeLists.txt
+++ b/paddle/fluid/operators/cinn/CMakeLists.txt
@@ -15,6 +15,7 @@ cc_library(
        build_strategy
        device_context
        parallel_executor
+       standalone_executor
        transform_type
        cinn)
 
diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
index 6469ac3ab212c0f8ff84058141b6bce6133ba3ec..0abb39573cb38ad17f957e1d3fef3015a99b9d7f 100644
--- a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc
@@ -88,16 +88,15 @@ class TestCinnInstructionRunOp : public ::testing::Test {
     cinn_launch_op->Run(scope, place);
   }
 
-  void RunAndCheck(const platform::Place& place) {
+  void RunAndCheck(const platform::Place& place, framework::Scope* scope) {
     // Run ops and check the computation results
-    framework::Scope scope;
-    InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
-    scope.Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
-    scope.Var(add_op_out_name)->GetMutable<phi::DenseTensor>();
-    elementwise_add_op->Run(scope, place);
-    cinn_launch_op->Run(scope, place);
-    CompareOpResult<float>(scope.GetVar(test_op_out_name),
-                           scope.GetVar(add_op_out_name));
+    InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, scope);
+    scope->Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
+    scope->Var(add_op_out_name)->GetMutable<phi::DenseTensor>();
+    elementwise_add_op->Run(*scope, place);
+    cinn_launch_op->Run(*scope, place);
+    CompareOpResult<float>(scope->GetVar(test_op_out_name),
+                           scope->GetVar(add_op_out_name));
   }
 
   void TearDown() override { CinnCompiler::GetInstance()->Clear(); }
@@ -106,17 +105,21 @@ class TestCinnInstructionRunOp : public ::testing::Test {
 TEST_F(TestCinnInstructionRunOp, CPU) {
   platform::CPUPlace place;
   Compile(place);
-  RunAndCheck(place);
+  framework::Scope scope1;
+  RunAndCheck(place, &scope1);
   // the second run on the same place is to check the cache logic
-  RunAndCheck(place);
+  framework::Scope scope2;
+  RunAndCheck(place, &scope2);
 }
 
 #ifdef PADDLE_WITH_CUDA
 TEST_F(TestCinnInstructionRunOp, GPU) {
   platform::CUDAPlace place;
   Compile(place);
-  RunAndCheck(place);
-  RunAndCheck(place);
+  framework::Scope scope1;
+  RunAndCheck(place, &scope1);
+  framework::Scope scope2;
+  RunAndCheck(place, &scope2);
 }
 #endif
 
diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc
index 3b9d7d00edad4db9ca3dd98b8af7ac08d087b399..982fedfe23d8c47d66bb3415e13324eebb6ec674 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_context.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc
@@ -88,8 +88,9 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph,
   }
 
   // Convert the CINN runtime program to a Paddle graph
-  runtime_graph_ = std::make_unique<framework::ir::Graph>(
-      BuildCompiledProgram(graph, compiled_obj));
+  runtime_program_desc_ = BuildCompiledProgram(graph, compiled_obj);
+  runtime_graph_ =
+      std::make_unique<framework::ir::Graph>(*runtime_program_desc_.get());
   auto& outer_varinfo = graph.Get<Name2VarInfoMap>(kMemOptVarInfoFromMainGraph);
   runtime_graph_->SetNotOwned<Name2VarInfoMap>(kMemOptVarInfoFromMainGraph,
                                                &outer_varinfo);
@@ -100,6 +101,7 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph,
     // that means it can be erased after graph execution
     if (!outer_varinfo.count(var_name)) {
       skip_eager_vars_.emplace_back(var_name);
+      skip_gc_vars_.insert(var_name);
     }
   };
   std::for_each(
@@ -313,12 +315,14 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) {
       });
 }
 
-framework::ProgramDesc CinnLaunchContext::BuildCompiledProgram(
+std::unique_ptr<framework::ProgramDesc> CinnLaunchContext::BuildCompiledProgram(
     const framework::ir::Graph& graph, const CinnCompiledObject& compiled_obj) {
   CinnRuntimeProgram* runtime_program = compiled_obj.runtime_program.get();
   // Step 0: Create an empty program_desc, there will be only one block
-  framework::ProgramDesc program_desc;
-  auto* block = program_desc.MutableBlock(0);
+  // framework::ProgramDesc program_desc;
+  std::unique_ptr<framework::ProgramDesc> program_desc(
+      new framework::ProgramDesc());
+  auto* block = program_desc->MutableBlock(0);
   const std::vector<std::unique_ptr<CinnInstruction>>& instructions =
       runtime_program->GetRunInstructions();
 
@@ -445,6 +449,46 @@ ParallelExecutor* CinnLaunchContext::InitializePE(const platform::Place& place,
   return parallel_executor_.get();
 }
 
+framework::InterpreterCore* CinnLaunchContext::InitializeInterpreterCore(
+    const platform::Place& place, framework::Scope* scope) {
+  if (!interpreter_core_ || scope != cached_scope_) {
+    VLOG(1) << "interpreter_core_ is null or scope != cached_scope_: "
+               "interpreter_core_: "
+            << interpreter_core_.get() << "; scope: " << scope
+            << "; cached_scope_: " << cached_scope_;
+    for (auto&& var_name : internal_var_names_) {
+      auto* var = scope->FindVar(var_name);
+      if (var != nullptr) {
+        continue;
+      }
+      framework::InitializeVariable(scope->Var(var_name),
+                                    framework::proto::VarType::LOD_TENSOR);
+    }
+    if (!interpreter_core_) {
+      interpreter_core_ = std::make_unique<framework::InterpreterCore>(
+          place,
+          runtime_program_desc_->Block(0),
+          skip_gc_vars_,
+          scope,
+          /*used_for_jit*/ false,
+          /*used_for_control_flow_op*/ false,
+          /*used_for_cinn*/ true);
+    } else {
+      interpreter_core_->reset_scope(scope);
+    }
+    UpdateCapturedEnv(*scope, place);
+  }
+  for (auto&& var_name : initialized_beforehand_vars_) {
+    auto* var = scope->GetVar(var_name);
+    auto* buffer = GetCinnBufferOfVar(var_name);
+    auto dim = framework::DDim(buffer->dims, buffer->dimensions);
+    var->GetMutable<phi::DenseTensor>()->Resize(dim);
+    var->GetMutable<phi::DenseTensor>()->mutable_data(
+        place, framework::paddle2cinn::TransToPaddleDataType(buffer->type));
+  }
+  return interpreter_core_.get();
+}
+
 cinn_buffer_t* CinnLaunchContext::GetCinnBufferOfVar(
     const std::string& var_name) {
   auto it = paddle2cinn_varmap_.find(var_name);
diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.h b/paddle/fluid/operators/cinn/cinn_launch_context.h
index e343d1dd9c34842f28500e6122f26942e2554c62..d6ce95de0859d0e3d2e63e0dbaef8ae7bbac5036 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_context.h
+++ b/paddle/fluid/operators/cinn/cinn_launch_context.h
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/new_executor/interpretercore.h"
 #include "paddle/fluid/framework/parallel_executor.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/phi/core/ddim.h"
@@ -74,6 +75,9 @@ class CinnLaunchContext {
   framework::ParallelExecutor* InitializePE(const platform::Place& place,
                                             framework::Scope* scope);
 
+  framework::InterpreterCore* InitializeInterpreterCore(
+      const platform::Place& place, framework::Scope* scope);
+
   // explicitly update several environment variables captured
   // by callback of execution arguments
   void UpdateCapturedEnv(const framework::Scope& scope,
@@ -132,7 +136,7 @@ class CinnLaunchContext {
 
   // Construct a Paddle ProgramDesc with the CINN runtime
   // instructions included in the compiled CINN Program
-  framework::ProgramDesc BuildCompiledProgram(
+  std::unique_ptr<framework::ProgramDesc> BuildCompiledProgram(
       const framework::ir::Graph& graph,
       const CinnCompiledObject& compiled_obj);
 
@@ -155,6 +159,10 @@ class CinnLaunchContext {
   // the variable scope compiled from cinn
   const std::shared_ptr<CinnScope> cinn_scope_;
 
+  std::unique_ptr<framework::ProgramDesc> runtime_program_desc_;
+  std::unique_ptr<framework::InterpreterCore> interpreter_core_;
+  std::set<std::string> skip_gc_vars_;
+
   // the ir::Graph object converted from the program compiled by CINN
   std::unique_ptr<framework::ir::Graph> runtime_graph_;
   // a ParallelExecutor to execute the runtime graph
diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.h b/paddle/fluid/operators/cinn/cinn_launch_op.h
index e27ef6079174bec73746b293c6dcd73c0835ba87..dc740e215d69d1b663bf560058028524a2dc1180 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_op.h
+++ b/paddle/fluid/operators/cinn/cinn_launch_op.h
@@ -31,6 +31,7 @@
 #include "paddle/fluid/platform/profiler.h"
 
 DECLARE_bool(enable_pe_launch_cinn);
+DECLARE_bool(enable_interpretercore_launch_cinn);
 namespace paddle {
 namespace operators {
 
@@ -135,12 +136,21 @@ class CinnLaunchOpKernel : public framework::OpKernel<T> {
     // Step 4. Execute the compiled CINN instructions by a PE or
     //         by the CINN compiled program in sequential order
     if (FLAGS_enable_pe_launch_cinn) {
-      platform::RecordEvent record_event_4(
-          "Step 4. Execute the runtime graph by PE.");
-      VLOG(4) << "Execute the runtime graph by PE";
-      framework::Scope& exec_scope = scope.NewScope();
-      auto* pe = launch_context->InitializePE(place, &exec_scope);
-      pe->RunWithoutFetch(launch_context->GetSkipEagerVars());
+      if (FLAGS_enable_interpretercore_launch_cinn) {
+        platform::RecordEvent record_event_4(
+            "Step 4. Execute the runtime program by InterpreterCore.");
+        VLOG(4) << "Execute the runtime program by InterpreterCore";
+        auto* interpreter_core = launch_context->InitializeInterpreterCore(
+            place, const_cast<framework::Scope*>(&scope));
+        interpreter_core->Run({});
+      } else {
+        platform::RecordEvent record_event_4(
+            "Step 4. Execute the runtime graph by PE.");
+        VLOG(4) << "Execute the runtime graph by PE";
+        framework::Scope& exec_scope = scope.NewScope();
+        auto* pe = launch_context->InitializePE(place, &exec_scope);
+        pe->RunWithoutFetch(launch_context->GetSkipEagerVars());
+      }
     } else {
       platform::RecordEvent record_event_4(
           "Step 4. Execute the compiled executable program.");
diff --git a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
index dad26dc637400ca80b727e35b41b03ff7a3faf05..e10f4a85a9a6affe04c288768aa43559095eb8c6 100644
--- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
+++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc
@@ -36,6 +36,7 @@ USE_OP(cinn_instruction_run);
 USE_OP_ITSELF(elementwise_add);
 DECLARE_double(eager_delete_tensor_gb);
 DECLARE_bool(enable_pe_launch_cinn);
+DECLARE_bool(enable_interpretercore_launch_cinn);
 DECLARE_bool(enable_cinn_auto_tune);
 
 PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
@@ -74,31 +75,34 @@ class TestCinnLaunchOp : public ::testing::Test {
                                                 {{}});
   }
 
-  void RunAndCheck(const platform::Place& place) {
+  void RunAndCheck(const platform::Place& place, framework::Scope* scope) {
     // Run ops and check the computation results
-    framework::Scope scope;
-    InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, &scope);
-    scope.Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
-    scope.Var(add_op_out_name)->GetMutable<phi::DenseTensor>();
-    elementwise_add_op->Run(scope, place);
-    cinn_launch_op->Run(scope, place);
-    CompareOpResult<float>(scope.GetVar(test_op_out_name),
-                           scope.GetVar(add_op_out_name));
+    InitVariablesWithRandomValue<float>({"x", "y"}, {10, 20}, place, scope);
+    scope->Var(test_op_out_name)->GetMutable<phi::DenseTensor>();
+    scope->Var(add_op_out_name)->GetMutable<phi::DenseTensor>();
+    elementwise_add_op->Run(*scope, place);
+    cinn_launch_op->Run(*scope, place);
+    CompareOpResult<float>(scope->GetVar(test_op_out_name),
+                           scope->GetVar(add_op_out_name));
   }
 
   void TearDown() override { CinnCompiler::GetInstance()->Clear(); }
 };
 
 TEST_F(TestCinnLaunchOp, TestRunCPUInstructionByPE) {
-  RunAndCheck(platform::CPUPlace());
+  framework::Scope scope1;
+  RunAndCheck(platform::CPUPlace(), &scope1);
   // the second run on the same place is to check the cache logic
-  RunAndCheck(platform::CPUPlace());
+  framework::Scope scope2;
+  RunAndCheck(platform::CPUPlace(), &scope2);
 }
 
 #ifdef PADDLE_WITH_CUDA
 TEST_F(TestCinnLaunchOp, TestRunGPUInstructionByPE) {
-  RunAndCheck(platform::CUDAPlace());
-  RunAndCheck(platform::CUDAPlace());
+  framework::Scope scope1;
+  RunAndCheck(platform::CUDAPlace(), &scope1);
+  framework::Scope scope2;
+  RunAndCheck(platform::CUDAPlace(), &scope2);
 }
 #endif
 
@@ -106,9 +110,11 @@ TEST_F(TestCinnLaunchOp, TestRunCPUInstructionByCinnProgram) {
   // set FLAGS_enable_pe_launch_cinn=false to switch to use
   // default scheduler of CINN to execute the compiled program
   FLAGS_enable_pe_launch_cinn = false;
-
-  RunAndCheck(platform::CPUPlace());
-  RunAndCheck(platform::CPUPlace());
+  FLAGS_enable_interpretercore_launch_cinn = false;
+  framework::Scope scope1;
+  RunAndCheck(platform::CPUPlace(), &scope1);
+  framework::Scope scope2;
+  RunAndCheck(platform::CPUPlace(), &scope2);
 }
 
 #ifdef PADDLE_WITH_CUDA
@@ -116,8 +122,11 @@ TEST_F(TestCinnLaunchOp, TestRunGPUInstructionByCinnProgram) {
   // set FLAGS_enable_pe_launch_cinn=false to switch to use
   // default scheduler of CINN to execute the compiled program
   FLAGS_enable_pe_launch_cinn = false;
-  RunAndCheck(platform::CUDAPlace());
-  RunAndCheck(platform::CUDAPlace());
+  FLAGS_enable_interpretercore_launch_cinn = false;
+  framework::Scope scope1;
+  RunAndCheck(platform::CUDAPlace(), &scope1);
+  framework::Scope scope2;
+  RunAndCheck(platform::CUDAPlace(), &scope2);
 }
 #endif
 
@@ -125,8 +134,10 @@ TEST_F(TestCinnLaunchOp, TestRunWithAutoTuneEnabled) {
   FLAGS_enable_cinn_auto_tune = true;
 
   // currently only check on cpu, will add a test for gpu after CINN ready
-  RunAndCheck(platform::CPUPlace());
-  RunAndCheck(platform::CPUPlace());
+  framework::Scope scope1;
+  RunAndCheck(platform::CPUPlace(), &scope1);
+  framework::Scope scope2;
+  RunAndCheck(platform::CPUPlace(), &scope2);
 }
 
 namespace details {
diff --git a/paddle/phi/core/flags.cc b/paddle/phi/core/flags.cc
index 29c9c63e7fd22ac90f134c876148a2d845071b7c..6850d91b8dd4519453096363634fdc6ca92a979d 100644
--- a/paddle/phi/core/flags.cc
+++ b/paddle/phi/core/flags.cc
@@ -978,6 +978,20 @@ PADDLE_DEFINE_EXPORTED_bool(enable_pe_launch_cinn,
                             "It controls whether to execute cinn compiled "
                             "program with ParallelExecutor");
 
+/*
+ * CINN related FLAG
+ * Name: FLAGS_enable_interpretercore_launch_cinn
+ * Since Version: 2.4
+ * Value Range: bool, default=true
+ * Example: FLAGS_enable_interpretercore_launch_cinn=true would execute the CINN
+ * compiled instructions of a paddle graph with InterpreterCore, otherwise with
+ * the CINN compiled runtime program in sequential order.
+ */
+PADDLE_DEFINE_EXPORTED_bool(enable_interpretercore_launch_cinn,
+                            true,
+                            "It controls whether to execute cinn compiled "
+                            "program with InterpreterCore");
+
 /*
  * CINN related FLAG
  * Name: FLAGS_enable_cinn_auto_tune