[StandaloneExe] Remove flag about Executor (#52671)

* add strategy force_sequential_run * remove flag * fix * fix * fix * fix * fix * fix * fix * fix * fix

[StandaloneExe] Remove flag about Executor (#52671)
* add strategy force_sequential_run * remove flag * fix * fix * fix * fix * fix * fix * fix * fix * fix
d6ee0a13 · kangguangli · GitHub · 3c0b1795 · d6ee0a13 · d6ee0a13
9 changed file
--- a/paddle/fluid/framework/details/build_strategy.h
+++ b/paddle/fluid/framework/details/build_strategy.h
@@ -135,7 +135,7 @@ struct BuildStrategy {
  bool fuse_adamw_{false};
  // Fused feed forward
  bool fused_feedforward_{false};
-  bool force_sequential_run_{false};
+  bool sequential_run_{false};
  // mkldnn_enabled_op_types specify the operator type list to
  // use MKLDNN acceleration. It is null in default, means
@@ -270,8 +270,7 @@ inline std::ostream &operator<<(std::ostream &os,
  os << "fuse_gemm_epilogue_: " << strategy.fuse_gemm_epilogue_ << std::endl;
  os << "fused_attention_: " << strategy.fused_attention_ << std::endl;
  os << "fused_feedforward_: " << strategy.fused_feedforward_ << std::endl;
-  os << "force_sequential_run_: " << strategy.force_sequential_run_
+  os << "sequential_run_: " << strategy.sequential_run_ << std::endl;
-     << std::endl;
  os << "mkldnn_enabled_op_types_: ";
  for (auto str : strategy.mkldnn_enabled_op_types_) {
    os << str << ", ";

--- a/paddle/fluid/framework/new_executor/interpretercore.cc
+++ b/paddle/fluid/framework/new_executor/interpretercore.cc
@@ -50,9 +50,6 @@ PADDLE_DEFINE_EXPORTED_bool(new_executor_use_local_scope,
                            true,
                            "Use local_scope in new executor(especially used "
                            "in UT), can turn off for better performance");
-PADDLE_DEFINE_EXPORTED_bool(control_flow_use_new_executor,
-                            true,
-                            "Use new executor in control flow op");
 DECLARE_bool(check_nan_inf);
 DECLARE_bool(benchmark);

--- a/paddle/fluid/framework/new_executor/interpretercore.h
+++ b/paddle/fluid/framework/new_executor/interpretercore.h
@@ -34,7 +34,6 @@
 #include "paddle/fluid/platform/device_event.h"
 DECLARE_bool(new_executor_use_local_scope);
-DECLARE_bool(control_flow_use_new_executor);
 namespace paddle {
 namespace framework {

--- a/paddle/fluid/operators/controlflow/conditional_block_op.cc
+++ b/paddle/fluid/operators/controlflow/conditional_block_op.cc
@@ -95,48 +95,28 @@ class ConditionalBlockOp : public ConditionalOp {
      auto &skip_vars =
          Attr<std::vector<std::string>>(ConditionalOp::kSkipEagerDeletionVars);
-      if (FLAGS_control_flow_use_new_executor) {
+      LOG_FIRST_N(INFO, 1)
-        LOG_FIRST_N(INFO, 1)
+          << "[ControlFlow][ConditionalBlock] New Executor is Running.";
-            << "[ControlFlow][ConditionalBlock] New Executor is Running.";
+      if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
-        if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+        VLOG(10) << "[interpreterCore cache]" << core_.get();
-          VLOG(10) << "[interpreterCore cache]" << core_.get();
+        VLOG_IF(10, core_) << platform::is_same_place(core_->GetPlace(),
-          VLOG_IF(10, core_)
+                                                      dev_place);
-              << platform::is_same_place(core_->GetPlace(), dev_place);
+        framework::interpreter::ExecutionConfig execution_config;
-          framework::interpreter::ExecutionConfig execution_config;
+        execution_config.create_local_scope = false;
-          execution_config.create_local_scope = false;
+        execution_config.used_for_control_flow_op = true;
-          execution_config.used_for_control_flow_op = true;
+        execution_config.skip_gc_vars =
-          execution_config.skip_gc_vars =
+            std::set<std::string>(skip_vars.begin(), skip_vars.end());
-              std::set<std::string>(skip_vars.begin(), skip_vars.end());
+        core_.reset(new InterpreterCore(
-          core_.reset(new InterpreterCore(
+            dev_place, *block, &cur_scope, execution_config));
-              dev_place, *block, &cur_scope, execution_config));
+        VLOG(10) << "[interpreterCore] created:" << core_;
-          VLOG(10) << "[interpreterCore cache]"
-                   << "new created:" << core_;
-        } else {
-          BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
-          core_->reset_scope(&cur_scope);
-        }
-        core_->Run({}, false);
      } else {
-        if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) {
+        BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
-          auto &pdesc = *block->Program();
+        core_->reset_scope(&cur_scope);
-          exec_.reset(new Executor(dev_place));
-          if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc);
-          ctx_ = exec_->Prepare(pdesc, block->ID(), skip_vars, false);
-#ifdef PADDLE_WITH_MKLDNN
-          platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place);
-          platform::RegisterModelLayout(ctx_->ops_, dev_place);
-#endif
-        }
-        exec_->RunPreparedContext(ctx_.get(),
-                                  &cur_scope,
-                                  /* create_local_scope */ false,
-                                  /* create_vars */ true,
-                                  /* keep_kids */ true);
      }
+      core_->Run({}, false);
    }
  }
@@ -208,47 +188,27 @@ class ConditionalBlockGradOp : public ConditionalOp {
      VLOG(3) << "Conditional Grad block.idx = " << block->ID()
              << ", scope = " << &cur_scope;
-      if (FLAGS_control_flow_use_new_executor) {
+      LOG_FIRST_N(INFO, 1)
-        LOG_FIRST_N(INFO, 1)
+          << "[ControlFlow][ConditionalGradBlock] New Executor is Running.";
-            << "[ControlFlow][ConditionalGradBlock] New Executor is Running.";
+      if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
-        if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+        VLOG(10) << "[interpreterCore cache]" << core_.get();
-          VLOG(10) << "[interpreterCore cache]" << core_.get();
+        VLOG_IF(10, core_) << platform::is_same_place(core_->GetPlace(),
-          VLOG_IF(10, core_)
+                                                      dev_place);
-              << platform::is_same_place(core_->GetPlace(), dev_place);
+        framework::interpreter::ExecutionConfig execution_config;
-          framework::interpreter::ExecutionConfig execution_config;
+        execution_config.create_local_scope = false;
-          execution_config.create_local_scope = false;
+        execution_config.used_for_control_flow_op = true;
-          execution_config.used_for_control_flow_op = true;
+        execution_config.skip_gc_vars =
-          execution_config.skip_gc_vars =
+            std::set<std::string>(inside_grads.begin(), inside_grads.end());
-              std::set<std::string>(inside_grads.begin(), inside_grads.end());
+        core_.reset(new InterpreterCore(
-          core_.reset(new InterpreterCore(
+            dev_place, *block, &cur_scope, execution_config));
-              dev_place, *block, &cur_scope, execution_config));
+        VLOG(10) << "[interpreterCore] created:" << core_;
-          VLOG(10) << "[interpreterCore cache]"
-                   << "new created:" << core_;
-        } else {
-          BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
-          core_->reset_scope(&cur_scope);
-        }
-        core_->Run({}, false);
      } else {
-        if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) {
+        BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
-          auto &pdesc = *block->Program();
+        core_->reset_scope(&cur_scope);
-          exec_.reset(new Executor(dev_place));
-          if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc);
-          ctx_ = exec_->Prepare(pdesc, block->ID(), inside_grads, false);
-#ifdef PADDLE_WITH_MKLDNN
-          platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place);
-          platform::RegisterModelLayout(ctx_->ops_, dev_place);
-#endif
-        }
-        exec_->RunPreparedContext(ctx_.get(),
-                                  &cur_scope,
-                                  /* create_local_scope */ false,
-                                  /* create_vars */ true,
-                                  /* keep_kids */ true);
      }
+      core_->Run({}, false);
      AssignLocalGradientToParentScope(
          dev_place, cur_scope, scope, inside_grads, outside_grads, inputs);
@@ -398,7 +358,8 @@ struct FilterNoGradInput<framework::OpDesc> {
                     std::vector<std::string> *vec) {
    auto f = [desc](const std::string &name) -> std::string {
      if (name == framework::kEmptyVarName) {
-        // don't drop empty var name, you can use Input(name, true) to drop it.
+        // don't drop empty var name, you can use Input(name, true) to drop
+        // it.
        return framework::kEmptyVarName;
      }
      auto var_desc =

--- a/paddle/fluid/operators/controlflow/while_op.cc
+++ b/paddle/fluid/operators/controlflow/while_op.cc
@@ -120,7 +120,6 @@ class WhileOp : public framework::OperatorBase {
    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
    auto &dev_ctx = *pool.Get(dev_place);
-    auto *program = block->Program();
    bool is_test = Attr<bool>("is_test");
    std::set<std::string> no_copy_var_names;
@@ -199,26 +198,18 @@ class WhileOp : public framework::OperatorBase {
      }
    }
-    if (FLAGS_control_flow_use_new_executor) {
+    LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running.";
-      LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running.";
+    if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
-      if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+      framework::Scope placeholder;  // Don't care if it's valid, just for
-        framework::Scope placeholder;  // Don't care if it's valid, just for
+                                     // initialize InterpreterCore
-                                       // initialize InterpreterCore
+      framework::interpreter::ExecutionConfig execution_config;
-        framework::interpreter::ExecutionConfig execution_config;
+      execution_config.create_local_scope = false;
-        execution_config.create_local_scope = false;
+      execution_config.used_for_control_flow_op = true;
-        execution_config.used_for_control_flow_op = true;
+      execution_config.skip_gc_vars =
-        execution_config.skip_gc_vars =
+          std::set<std::string>(skip_vars.begin(), skip_vars.end());
-            std::set<std::string>(skip_vars.begin(), skip_vars.end());
+      core_.reset(new framework::InterpreterCore(
-        core_.reset(new framework::InterpreterCore(
+          dev_place, *block, &placeholder, execution_config));
-            dev_place, *block, &placeholder, execution_config));
-      }
-    } else {
-      if (!executor_ ||
-          !platform::is_same_place(executor_->GetPlace(), dev_place)) {
-        executor_.reset(new framework::Executor(dev_place));
-        ctx_ = executor_->Prepare(*program, block->ID(), skip_vars);
-      }
    }
    if (!is_test) {
@@ -244,22 +235,17 @@ class WhileOp : public framework::OperatorBase {
            }
          }
        }
-        if (FLAGS_control_flow_use_new_executor) {
-          BuildScopeForControlFlowOp(*core_, *block, &current_scope);
-          core_->reset_scope(&current_scope);
-          core_->Run({}, false);
-          // restore inputs place
-          for (const auto &n : input_var_original_places) {
-            const std::string &in_name = n.first;
-            const phi::Place &original_place = n.second;
-            // input vars exist in `scope` not `current_scope`
-            TransferVariablePlace(&scope, in_name, original_place, dev_ctx);
-          }
-        } else {
+        BuildScopeForControlFlowOp(*core_, *block, &current_scope);
-          executor_->RunPreparedContext(
+        core_->reset_scope(&current_scope);
-              ctx_.get(), &current_scope, false, true, true);
+        core_->Run({}, false);
+        // restore inputs place
+        for (const auto &n : input_var_original_places) {
+          const std::string &in_name = n.first;
+          const phi::Place &original_place = n.second;
+          // input vars exist in `scope` not `current_scope`
+          TransferVariablePlace(&scope, in_name, original_place, dev_ctx);
        }
        for (auto &var_rename : rename_vars) {
@@ -273,12 +259,8 @@ class WhileOp : public framework::OperatorBase {
    } else {
      auto &current_scope = scope.NewScope();
-      if (FLAGS_control_flow_use_new_executor) {
+      BuildScopeForControlFlowOp(*core_, *block, &current_scope);
-        BuildScopeForControlFlowOp(*core_, *block, &current_scope);
+      core_->reset_scope(&current_scope);
-        core_->reset_scope(&current_scope);
-      } else {
-        executor_->CreateVariables(*program, &current_scope, block->ID());
-      }
      while (cond_data) {
        for (auto &name : current_scope.LocalVarNames()) {
@@ -295,12 +277,7 @@ class WhileOp : public framework::OperatorBase {
          }
        }
-        if (FLAGS_control_flow_use_new_executor) {
+        core_->Run({}, false);
-          core_->Run({}, false);
-        } else {
-          executor_->RunPreparedContext(
-              ctx_.get(), &current_scope, false, false, false);
-        }
        cond_data = GetCondData(
            scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>());
@@ -367,7 +344,6 @@ class WhileGradOp : public framework::OperatorBase {
    auto &dev_ctx = *pool.Get(dev_place);
    auto *block = Attr<framework::BlockDesc *>(kStepBlock);
-    auto *program = block->Program();
    auto *parent_block = block->ParentBlock();
    auto &skip_vars = Attr<std::vector<std::string>>(kSkipEagerDeletionVars);
@@ -391,28 +367,20 @@ class WhileGradOp : public framework::OperatorBase {
                          outside_og_names.size(),
                          inside_og_names.size()));
-    if (FLAGS_control_flow_use_new_executor) {
+    LOG_FIRST_N(INFO, 1)
-      LOG_FIRST_N(INFO, 1)
+        << "[ControlFlow][WhileGradOp] New Executor is Running.";
-          << "[ControlFlow][WhileGradOp] New Executor is Running.";
+    if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
-      if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
+      std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end());
-        std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end());
+      framework::Scope placeholder;  // Don't care if it's valid, just for
-        framework::Scope placeholder;  // Don't care if it's valid, just for
+                                     // initialize InterpreterCore
-                                       // initialize InterpreterCore
+      framework::interpreter::ExecutionConfig execution_config;
-        framework::interpreter::ExecutionConfig execution_config;
+      execution_config.create_local_scope = false;
-        execution_config.create_local_scope = false;
+      execution_config.used_for_control_flow_op = true;
-        execution_config.used_for_control_flow_op = true;
+      execution_config.skip_gc_vars =
-        execution_config.skip_gc_vars =
+          std::set<std::string>(skip_vars.begin(), skip_vars.end());
-            std::set<std::string>(skip_vars.begin(), skip_vars.end());
+      core_.reset(new framework::InterpreterCore(
-        core_.reset(new framework::InterpreterCore(
+          dev_place, *block, &placeholder, execution_config));
-            dev_place, *block, &placeholder, execution_config));
-      }
-    } else {
-      if (!executor_ ||
-          !platform::is_same_place(executor_->GetPlace(), dev_place)) {
-        executor_.reset(new framework::Executor(dev_place));
-        ctx_ = executor_->Prepare(*program, block->ID(), skip_vars);
-      }
    }
    for (auto cur_scope_iter = step_scopes->rbegin();
@@ -504,14 +472,9 @@ class WhileGradOp : public framework::OperatorBase {
        }
      }
-      if (FLAGS_control_flow_use_new_executor) {
+      BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter);
-        BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter);
+      core_->reset_scope(*cur_scope_iter);
-        core_->reset_scope(*cur_scope_iter);
+      core_->Run({}, false);
-        core_->Run({}, false);
-      } else {
-        executor_->RunPreparedContext(
-            ctx_.get(), *cur_scope_iter, false, true, true);
-      }
      // The Outputs(kXGRAD) contains the names of the gradient of parameters
      // and inputs.

--- a/paddle/fluid/pybind/parallel_executor.cc
+++ b/paddle/fluid/pybind/parallel_executor.cc
@@ -760,17 +760,17 @@ void BindParallelExecutor(pybind11::module &m) {  // NOLINT
                        build_strategy.fused_feedforward = True
                     )DOC")
      .def_property(
-          "force_sequential_run",
+          "sequential_run",
-          [](const BuildStrategy &self) { return self.force_sequential_run_; },
+          [](const BuildStrategy &self) { return self.sequential_run_; },
          [](BuildStrategy &self, bool b) {
            PADDLE_ENFORCE_NE(self.IsFinalized(),
                              true,
                              platform::errors::PreconditionNotMet(
                                  "BuildStrategy has been finlaized, cannot be "
                                  "configured again."));
-            self.force_sequential_run_ = b;
+            self.sequential_run_ = b;
          },
-          R"DOC((bool, optional): force_sequential_run is used to let the `StandaloneExecutor` run ops by the
+          R"DOC((bool, optional): sequential_run is used to let the `StandaloneExecutor` run ops by the
          order of `ProgramDesc`. Default is False.
                Examples:
@@ -782,7 +782,7 @@ void BindParallelExecutor(pybind11::module &m) {  // NOLINT
                        paddle.enable_static()
                        build_strategy = static.BuildStrategy()
-                        build_strategy.fused_feedforward = True
+                        build_strategy.sequential_run = True
                     )DOC")
      .def_property(
          "fuse_bn_act_ops",

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -493,26 +493,6 @@ def _to_name_str(var):
        return _to_str(var)
-def _is_dy2st_enable_standalone_executor():
-    return framework._dy2st_enable_standalone_executor_ in [
-        1,
-        '1',
-        True,
-        'True',
-        'true',
-    ]
-def _is_cuda_graph_enable_standalone_executor():
-    return framework._cuda_graph_enable_standalone_executor_ in [
-        1,
-        '1',
-        True,
-        'True',
-        'true',
-    ]
 def _prepare_fleet_executor():
    from ..distributed.fleet.proto import fleet_executor_desc_pb2
@@ -1619,10 +1599,7 @@ class Executor:
                    else program._graph
                )
                build_strategy = compiled_program._build_strategy
-                if (
+                if build_strategy is not None and build_strategy.sequential_run:
-                    build_strategy is not None
-                    and build_strategy.force_sequential_run
-                ):
                    schedule_flag = [
                        'FLAGS_new_executor_serial_run',
                        'FLAGS_new_executor_sequential_run',

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -117,12 +117,6 @@ _already_patch_varbase = False
 _current_cuda_graph_mode = None
 _global_flags_ = core.globals()
-_dy2st_enable_standalone_executor_ = os.environ.get(
-    'FLAGS_DY2ST_USE_STANDALONE_EXECUTOR', 1
-)
-_cuda_graph_enable_standalone_executor_ = os.environ.get(
-    'FLAGS_CUDA_GRAPH_USE_STANDALONE_EXECUTOR', 0
-)
 # special_op_attrs, extra_op_attrs are prepared for printing warnings
 # when turning on FLAGS_print_extra_attrs

--- a/test/standalone_executor/test_standalone_sequentail_run.py
+++ b/test/standalone_executor/test_standalone_sequentail_run.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 import unittest
 import numpy as np
@@ -31,13 +32,14 @@ class TestStandaloneExecutor(unittest.TestCase):
        return main_program, startup_program, [c]
-    def run_program(self, force_sequential_run=False):
+    def run_program(self, sequential_run=False):
        seed = 100
        paddle.seed(seed)
        np.random.seed(seed)
        main, startup, outs = self.build_program()
        build_strategy = paddle.static.BuildStrategy()
-        build_strategy.force_sequential_run = force_sequential_run
+        build_strategy.sequential_run = sequential_run
+        print(build_strategy)
        compiled_program = paddle.static.CompiledProgram(
            main, build_strategy=build_strategy
        )
@@ -60,6 +62,12 @@ class TestStandaloneExecutor(unittest.TestCase):
        ret2 = self.run_program(False)
        np.testing.assert_array_equal(ret1, ret2)
+    def test_str_flag(self):
+        paddle.enable_static()
+        os.environ['FLAGS_new_executor_sequential_run'] = 'true'
+        ret1 = self.run_program(True)
+        assert os.environ['FLAGS_new_executor_sequential_run'] == "true"
 if __name__ == "__main__":
    unittest.main()