未验证 提交 d6ee0a13 编写于 作者: K kangguangli 提交者: GitHub

[StandaloneExe] Remove flag about Executor (#52671)

* add strategy force_sequential_run

* remove flag

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix
上级 3c0b1795
...@@ -135,7 +135,7 @@ struct BuildStrategy { ...@@ -135,7 +135,7 @@ struct BuildStrategy {
bool fuse_adamw_{false}; bool fuse_adamw_{false};
// Fused feed forward // Fused feed forward
bool fused_feedforward_{false}; bool fused_feedforward_{false};
bool force_sequential_run_{false}; bool sequential_run_{false};
// mkldnn_enabled_op_types specify the operator type list to // mkldnn_enabled_op_types specify the operator type list to
// use MKLDNN acceleration. It is null in default, means // use MKLDNN acceleration. It is null in default, means
...@@ -270,8 +270,7 @@ inline std::ostream &operator<<(std::ostream &os, ...@@ -270,8 +270,7 @@ inline std::ostream &operator<<(std::ostream &os,
os << "fuse_gemm_epilogue_: " << strategy.fuse_gemm_epilogue_ << std::endl; os << "fuse_gemm_epilogue_: " << strategy.fuse_gemm_epilogue_ << std::endl;
os << "fused_attention_: " << strategy.fused_attention_ << std::endl; os << "fused_attention_: " << strategy.fused_attention_ << std::endl;
os << "fused_feedforward_: " << strategy.fused_feedforward_ << std::endl; os << "fused_feedforward_: " << strategy.fused_feedforward_ << std::endl;
os << "force_sequential_run_: " << strategy.force_sequential_run_ os << "sequential_run_: " << strategy.sequential_run_ << std::endl;
<< std::endl;
os << "mkldnn_enabled_op_types_: "; os << "mkldnn_enabled_op_types_: ";
for (auto str : strategy.mkldnn_enabled_op_types_) { for (auto str : strategy.mkldnn_enabled_op_types_) {
os << str << ", "; os << str << ", ";
......
...@@ -50,9 +50,6 @@ PADDLE_DEFINE_EXPORTED_bool(new_executor_use_local_scope, ...@@ -50,9 +50,6 @@ PADDLE_DEFINE_EXPORTED_bool(new_executor_use_local_scope,
true, true,
"Use local_scope in new executor(especially used " "Use local_scope in new executor(especially used "
"in UT), can turn off for better performance"); "in UT), can turn off for better performance");
PADDLE_DEFINE_EXPORTED_bool(control_flow_use_new_executor,
true,
"Use new executor in control flow op");
DECLARE_bool(check_nan_inf); DECLARE_bool(check_nan_inf);
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
......
...@@ -34,7 +34,6 @@ ...@@ -34,7 +34,6 @@
#include "paddle/fluid/platform/device_event.h" #include "paddle/fluid/platform/device_event.h"
DECLARE_bool(new_executor_use_local_scope); DECLARE_bool(new_executor_use_local_scope);
DECLARE_bool(control_flow_use_new_executor);
namespace paddle { namespace paddle {
namespace framework { namespace framework {
......
...@@ -95,48 +95,28 @@ class ConditionalBlockOp : public ConditionalOp { ...@@ -95,48 +95,28 @@ class ConditionalBlockOp : public ConditionalOp {
auto &skip_vars = auto &skip_vars =
Attr<std::vector<std::string>>(ConditionalOp::kSkipEagerDeletionVars); Attr<std::vector<std::string>>(ConditionalOp::kSkipEagerDeletionVars);
if (FLAGS_control_flow_use_new_executor) { LOG_FIRST_N(INFO, 1)
LOG_FIRST_N(INFO, 1) << "[ControlFlow][ConditionalBlock] New Executor is Running.";
<< "[ControlFlow][ConditionalBlock] New Executor is Running."; if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { VLOG(10) << "[interpreterCore cache]" << core_.get();
VLOG(10) << "[interpreterCore cache]" << core_.get(); VLOG_IF(10, core_) << platform::is_same_place(core_->GetPlace(),
VLOG_IF(10, core_) dev_place);
<< platform::is_same_place(core_->GetPlace(), dev_place);
framework::interpreter::ExecutionConfig execution_config;
framework::interpreter::ExecutionConfig execution_config; execution_config.create_local_scope = false;
execution_config.create_local_scope = false; execution_config.used_for_control_flow_op = true;
execution_config.used_for_control_flow_op = true; execution_config.skip_gc_vars =
execution_config.skip_gc_vars = std::set<std::string>(skip_vars.begin(), skip_vars.end());
std::set<std::string>(skip_vars.begin(), skip_vars.end());
core_.reset(new InterpreterCore(
core_.reset(new InterpreterCore( dev_place, *block, &cur_scope, execution_config));
dev_place, *block, &cur_scope, execution_config)); VLOG(10) << "[interpreterCore] created:" << core_;
VLOG(10) << "[interpreterCore cache]"
<< "new created:" << core_;
} else {
BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
core_->reset_scope(&cur_scope);
}
core_->Run({}, false);
} else { } else {
if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) { BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
auto &pdesc = *block->Program(); core_->reset_scope(&cur_scope);
exec_.reset(new Executor(dev_place));
if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc);
ctx_ = exec_->Prepare(pdesc, block->ID(), skip_vars, false);
#ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place);
platform::RegisterModelLayout(ctx_->ops_, dev_place);
#endif
}
exec_->RunPreparedContext(ctx_.get(),
&cur_scope,
/* create_local_scope */ false,
/* create_vars */ true,
/* keep_kids */ true);
} }
core_->Run({}, false);
} }
} }
...@@ -208,47 +188,27 @@ class ConditionalBlockGradOp : public ConditionalOp { ...@@ -208,47 +188,27 @@ class ConditionalBlockGradOp : public ConditionalOp {
VLOG(3) << "Conditional Grad block.idx = " << block->ID() VLOG(3) << "Conditional Grad block.idx = " << block->ID()
<< ", scope = " << &cur_scope; << ", scope = " << &cur_scope;
if (FLAGS_control_flow_use_new_executor) { LOG_FIRST_N(INFO, 1)
LOG_FIRST_N(INFO, 1) << "[ControlFlow][ConditionalGradBlock] New Executor is Running.";
<< "[ControlFlow][ConditionalGradBlock] New Executor is Running."; if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { VLOG(10) << "[interpreterCore cache]" << core_.get();
VLOG(10) << "[interpreterCore cache]" << core_.get(); VLOG_IF(10, core_) << platform::is_same_place(core_->GetPlace(),
VLOG_IF(10, core_) dev_place);
<< platform::is_same_place(core_->GetPlace(), dev_place);
framework::interpreter::ExecutionConfig execution_config;
framework::interpreter::ExecutionConfig execution_config; execution_config.create_local_scope = false;
execution_config.create_local_scope = false; execution_config.used_for_control_flow_op = true;
execution_config.used_for_control_flow_op = true; execution_config.skip_gc_vars =
execution_config.skip_gc_vars = std::set<std::string>(inside_grads.begin(), inside_grads.end());
std::set<std::string>(inside_grads.begin(), inside_grads.end());
core_.reset(new InterpreterCore(
core_.reset(new InterpreterCore( dev_place, *block, &cur_scope, execution_config));
dev_place, *block, &cur_scope, execution_config)); VLOG(10) << "[interpreterCore] created:" << core_;
VLOG(10) << "[interpreterCore cache]"
<< "new created:" << core_;
} else {
BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
core_->reset_scope(&cur_scope);
}
core_->Run({}, false);
} else { } else {
if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) { BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
auto &pdesc = *block->Program(); core_->reset_scope(&cur_scope);
exec_.reset(new Executor(dev_place));
if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc);
ctx_ = exec_->Prepare(pdesc, block->ID(), inside_grads, false);
#ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place);
platform::RegisterModelLayout(ctx_->ops_, dev_place);
#endif
}
exec_->RunPreparedContext(ctx_.get(),
&cur_scope,
/* create_local_scope */ false,
/* create_vars */ true,
/* keep_kids */ true);
} }
core_->Run({}, false);
AssignLocalGradientToParentScope( AssignLocalGradientToParentScope(
dev_place, cur_scope, scope, inside_grads, outside_grads, inputs); dev_place, cur_scope, scope, inside_grads, outside_grads, inputs);
...@@ -398,7 +358,8 @@ struct FilterNoGradInput<framework::OpDesc> { ...@@ -398,7 +358,8 @@ struct FilterNoGradInput<framework::OpDesc> {
std::vector<std::string> *vec) { std::vector<std::string> *vec) {
auto f = [desc](const std::string &name) -> std::string { auto f = [desc](const std::string &name) -> std::string {
if (name == framework::kEmptyVarName) { if (name == framework::kEmptyVarName) {
// don't drop empty var name, you can use Input(name, true) to drop it. // don't drop empty var name, you can use Input(name, true) to drop
// it.
return framework::kEmptyVarName; return framework::kEmptyVarName;
} }
auto var_desc = auto var_desc =
......
...@@ -120,7 +120,6 @@ class WhileOp : public framework::OperatorBase { ...@@ -120,7 +120,6 @@ class WhileOp : public framework::OperatorBase {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(dev_place); auto &dev_ctx = *pool.Get(dev_place);
auto *program = block->Program();
bool is_test = Attr<bool>("is_test"); bool is_test = Attr<bool>("is_test");
std::set<std::string> no_copy_var_names; std::set<std::string> no_copy_var_names;
...@@ -199,26 +198,18 @@ class WhileOp : public framework::OperatorBase { ...@@ -199,26 +198,18 @@ class WhileOp : public framework::OperatorBase {
} }
} }
if (FLAGS_control_flow_use_new_executor) { LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running.";
LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running."; if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { framework::Scope placeholder; // Don't care if it's valid, just for
framework::Scope placeholder; // Don't care if it's valid, just for // initialize InterpreterCore
// initialize InterpreterCore framework::interpreter::ExecutionConfig execution_config;
framework::interpreter::ExecutionConfig execution_config; execution_config.create_local_scope = false;
execution_config.create_local_scope = false; execution_config.used_for_control_flow_op = true;
execution_config.used_for_control_flow_op = true; execution_config.skip_gc_vars =
execution_config.skip_gc_vars = std::set<std::string>(skip_vars.begin(), skip_vars.end());
std::set<std::string>(skip_vars.begin(), skip_vars.end());
core_.reset(new framework::InterpreterCore(
core_.reset(new framework::InterpreterCore( dev_place, *block, &placeholder, execution_config));
dev_place, *block, &placeholder, execution_config));
}
} else {
if (!executor_ ||
!platform::is_same_place(executor_->GetPlace(), dev_place)) {
executor_.reset(new framework::Executor(dev_place));
ctx_ = executor_->Prepare(*program, block->ID(), skip_vars);
}
} }
if (!is_test) { if (!is_test) {
...@@ -244,22 +235,17 @@ class WhileOp : public framework::OperatorBase { ...@@ -244,22 +235,17 @@ class WhileOp : public framework::OperatorBase {
} }
} }
} }
if (FLAGS_control_flow_use_new_executor) {
BuildScopeForControlFlowOp(*core_, *block, &current_scope);
core_->reset_scope(&current_scope);
core_->Run({}, false);
// restore inputs place
for (const auto &n : input_var_original_places) {
const std::string &in_name = n.first;
const phi::Place &original_place = n.second;
// input vars exist in `scope` not `current_scope`
TransferVariablePlace(&scope, in_name, original_place, dev_ctx);
}
} else { BuildScopeForControlFlowOp(*core_, *block, &current_scope);
executor_->RunPreparedContext( core_->reset_scope(&current_scope);
ctx_.get(), &current_scope, false, true, true); core_->Run({}, false);
// restore inputs place
for (const auto &n : input_var_original_places) {
const std::string &in_name = n.first;
const phi::Place &original_place = n.second;
// input vars exist in `scope` not `current_scope`
TransferVariablePlace(&scope, in_name, original_place, dev_ctx);
} }
for (auto &var_rename : rename_vars) { for (auto &var_rename : rename_vars) {
...@@ -273,12 +259,8 @@ class WhileOp : public framework::OperatorBase { ...@@ -273,12 +259,8 @@ class WhileOp : public framework::OperatorBase {
} else { } else {
auto &current_scope = scope.NewScope(); auto &current_scope = scope.NewScope();
if (FLAGS_control_flow_use_new_executor) { BuildScopeForControlFlowOp(*core_, *block, &current_scope);
BuildScopeForControlFlowOp(*core_, *block, &current_scope); core_->reset_scope(&current_scope);
core_->reset_scope(&current_scope);
} else {
executor_->CreateVariables(*program, &current_scope, block->ID());
}
while (cond_data) { while (cond_data) {
for (auto &name : current_scope.LocalVarNames()) { for (auto &name : current_scope.LocalVarNames()) {
...@@ -295,12 +277,7 @@ class WhileOp : public framework::OperatorBase { ...@@ -295,12 +277,7 @@ class WhileOp : public framework::OperatorBase {
} }
} }
if (FLAGS_control_flow_use_new_executor) { core_->Run({}, false);
core_->Run({}, false);
} else {
executor_->RunPreparedContext(
ctx_.get(), &current_scope, false, false, false);
}
cond_data = GetCondData( cond_data = GetCondData(
scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>()); scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>());
...@@ -367,7 +344,6 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -367,7 +344,6 @@ class WhileGradOp : public framework::OperatorBase {
auto &dev_ctx = *pool.Get(dev_place); auto &dev_ctx = *pool.Get(dev_place);
auto *block = Attr<framework::BlockDesc *>(kStepBlock); auto *block = Attr<framework::BlockDesc *>(kStepBlock);
auto *program = block->Program();
auto *parent_block = block->ParentBlock(); auto *parent_block = block->ParentBlock();
auto &skip_vars = Attr<std::vector<std::string>>(kSkipEagerDeletionVars); auto &skip_vars = Attr<std::vector<std::string>>(kSkipEagerDeletionVars);
...@@ -391,28 +367,20 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -391,28 +367,20 @@ class WhileGradOp : public framework::OperatorBase {
outside_og_names.size(), outside_og_names.size(),
inside_og_names.size())); inside_og_names.size()));
if (FLAGS_control_flow_use_new_executor) { LOG_FIRST_N(INFO, 1)
LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileGradOp] New Executor is Running.";
<< "[ControlFlow][WhileGradOp] New Executor is Running."; if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) { std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end());
std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end()); framework::Scope placeholder; // Don't care if it's valid, just for
framework::Scope placeholder; // Don't care if it's valid, just for // initialize InterpreterCore
// initialize InterpreterCore framework::interpreter::ExecutionConfig execution_config;
framework::interpreter::ExecutionConfig execution_config; execution_config.create_local_scope = false;
execution_config.create_local_scope = false; execution_config.used_for_control_flow_op = true;
execution_config.used_for_control_flow_op = true; execution_config.skip_gc_vars =
execution_config.skip_gc_vars = std::set<std::string>(skip_vars.begin(), skip_vars.end());
std::set<std::string>(skip_vars.begin(), skip_vars.end());
core_.reset(new framework::InterpreterCore(
core_.reset(new framework::InterpreterCore( dev_place, *block, &placeholder, execution_config));
dev_place, *block, &placeholder, execution_config));
}
} else {
if (!executor_ ||
!platform::is_same_place(executor_->GetPlace(), dev_place)) {
executor_.reset(new framework::Executor(dev_place));
ctx_ = executor_->Prepare(*program, block->ID(), skip_vars);
}
} }
for (auto cur_scope_iter = step_scopes->rbegin(); for (auto cur_scope_iter = step_scopes->rbegin();
...@@ -504,14 +472,9 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -504,14 +472,9 @@ class WhileGradOp : public framework::OperatorBase {
} }
} }
if (FLAGS_control_flow_use_new_executor) { BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter);
BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter); core_->reset_scope(*cur_scope_iter);
core_->reset_scope(*cur_scope_iter); core_->Run({}, false);
core_->Run({}, false);
} else {
executor_->RunPreparedContext(
ctx_.get(), *cur_scope_iter, false, true, true);
}
// The Outputs(kXGRAD) contains the names of the gradient of parameters // The Outputs(kXGRAD) contains the names of the gradient of parameters
// and inputs. // and inputs.
......
...@@ -760,17 +760,17 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT ...@@ -760,17 +760,17 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
build_strategy.fused_feedforward = True build_strategy.fused_feedforward = True
)DOC") )DOC")
.def_property( .def_property(
"force_sequential_run", "sequential_run",
[](const BuildStrategy &self) { return self.force_sequential_run_; }, [](const BuildStrategy &self) { return self.sequential_run_; },
[](BuildStrategy &self, bool b) { [](BuildStrategy &self, bool b) {
PADDLE_ENFORCE_NE(self.IsFinalized(), PADDLE_ENFORCE_NE(self.IsFinalized(),
true, true,
platform::errors::PreconditionNotMet( platform::errors::PreconditionNotMet(
"BuildStrategy has been finlaized, cannot be " "BuildStrategy has been finlaized, cannot be "
"configured again.")); "configured again."));
self.force_sequential_run_ = b; self.sequential_run_ = b;
}, },
R"DOC((bool, optional): force_sequential_run is used to let the `StandaloneExecutor` run ops by the R"DOC((bool, optional): sequential_run is used to let the `StandaloneExecutor` run ops by the
order of `ProgramDesc`. Default is False. order of `ProgramDesc`. Default is False.
Examples: Examples:
...@@ -782,7 +782,7 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT ...@@ -782,7 +782,7 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
paddle.enable_static() paddle.enable_static()
build_strategy = static.BuildStrategy() build_strategy = static.BuildStrategy()
build_strategy.fused_feedforward = True build_strategy.sequential_run = True
)DOC") )DOC")
.def_property( .def_property(
"fuse_bn_act_ops", "fuse_bn_act_ops",
......
...@@ -493,26 +493,6 @@ def _to_name_str(var): ...@@ -493,26 +493,6 @@ def _to_name_str(var):
return _to_str(var) return _to_str(var)
def _is_dy2st_enable_standalone_executor():
return framework._dy2st_enable_standalone_executor_ in [
1,
'1',
True,
'True',
'true',
]
def _is_cuda_graph_enable_standalone_executor():
return framework._cuda_graph_enable_standalone_executor_ in [
1,
'1',
True,
'True',
'true',
]
def _prepare_fleet_executor(): def _prepare_fleet_executor():
from ..distributed.fleet.proto import fleet_executor_desc_pb2 from ..distributed.fleet.proto import fleet_executor_desc_pb2
...@@ -1619,10 +1599,7 @@ class Executor: ...@@ -1619,10 +1599,7 @@ class Executor:
else program._graph else program._graph
) )
build_strategy = compiled_program._build_strategy build_strategy = compiled_program._build_strategy
if ( if build_strategy is not None and build_strategy.sequential_run:
build_strategy is not None
and build_strategy.force_sequential_run
):
schedule_flag = [ schedule_flag = [
'FLAGS_new_executor_serial_run', 'FLAGS_new_executor_serial_run',
'FLAGS_new_executor_sequential_run', 'FLAGS_new_executor_sequential_run',
......
...@@ -117,12 +117,6 @@ _already_patch_varbase = False ...@@ -117,12 +117,6 @@ _already_patch_varbase = False
_current_cuda_graph_mode = None _current_cuda_graph_mode = None
_global_flags_ = core.globals() _global_flags_ = core.globals()
_dy2st_enable_standalone_executor_ = os.environ.get(
'FLAGS_DY2ST_USE_STANDALONE_EXECUTOR', 1
)
_cuda_graph_enable_standalone_executor_ = os.environ.get(
'FLAGS_CUDA_GRAPH_USE_STANDALONE_EXECUTOR', 0
)
# special_op_attrs, extra_op_attrs are prepared for printing warnings # special_op_attrs, extra_op_attrs are prepared for printing warnings
# when turning on FLAGS_print_extra_attrs # when turning on FLAGS_print_extra_attrs
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import unittest import unittest
import numpy as np import numpy as np
...@@ -31,13 +32,14 @@ class TestStandaloneExecutor(unittest.TestCase): ...@@ -31,13 +32,14 @@ class TestStandaloneExecutor(unittest.TestCase):
return main_program, startup_program, [c] return main_program, startup_program, [c]
def run_program(self, force_sequential_run=False): def run_program(self, sequential_run=False):
seed = 100 seed = 100
paddle.seed(seed) paddle.seed(seed)
np.random.seed(seed) np.random.seed(seed)
main, startup, outs = self.build_program() main, startup, outs = self.build_program()
build_strategy = paddle.static.BuildStrategy() build_strategy = paddle.static.BuildStrategy()
build_strategy.force_sequential_run = force_sequential_run build_strategy.sequential_run = sequential_run
print(build_strategy)
compiled_program = paddle.static.CompiledProgram( compiled_program = paddle.static.CompiledProgram(
main, build_strategy=build_strategy main, build_strategy=build_strategy
) )
...@@ -60,6 +62,12 @@ class TestStandaloneExecutor(unittest.TestCase): ...@@ -60,6 +62,12 @@ class TestStandaloneExecutor(unittest.TestCase):
ret2 = self.run_program(False) ret2 = self.run_program(False)
np.testing.assert_array_equal(ret1, ret2) np.testing.assert_array_equal(ret1, ret2)
def test_str_flag(self):
paddle.enable_static()
os.environ['FLAGS_new_executor_sequential_run'] = 'true'
ret1 = self.run_program(True)
assert os.environ['FLAGS_new_executor_sequential_run'] == "true"
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册