未验证 提交 d6ee0a13 编写于 作者: K kangguangli 提交者: GitHub

[StandaloneExe] Remove flag about Executor (#52671)

* add strategy force_sequential_run

* remove flag

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix
上级 3c0b1795
......@@ -135,7 +135,7 @@ struct BuildStrategy {
bool fuse_adamw_{false};
// Fused feed forward
bool fused_feedforward_{false};
bool force_sequential_run_{false};
bool sequential_run_{false};
// mkldnn_enabled_op_types specify the operator type list to
// use MKLDNN acceleration. It is null in default, means
......@@ -270,8 +270,7 @@ inline std::ostream &operator<<(std::ostream &os,
os << "fuse_gemm_epilogue_: " << strategy.fuse_gemm_epilogue_ << std::endl;
os << "fused_attention_: " << strategy.fused_attention_ << std::endl;
os << "fused_feedforward_: " << strategy.fused_feedforward_ << std::endl;
os << "force_sequential_run_: " << strategy.force_sequential_run_
<< std::endl;
os << "sequential_run_: " << strategy.sequential_run_ << std::endl;
os << "mkldnn_enabled_op_types_: ";
for (auto str : strategy.mkldnn_enabled_op_types_) {
os << str << ", ";
......
......@@ -50,9 +50,6 @@ PADDLE_DEFINE_EXPORTED_bool(new_executor_use_local_scope,
true,
"Use local_scope in new executor(especially used "
"in UT), can turn off for better performance");
PADDLE_DEFINE_EXPORTED_bool(control_flow_use_new_executor,
true,
"Use new executor in control flow op");
DECLARE_bool(check_nan_inf);
DECLARE_bool(benchmark);
......
......@@ -34,7 +34,6 @@
#include "paddle/fluid/platform/device_event.h"
DECLARE_bool(new_executor_use_local_scope);
DECLARE_bool(control_flow_use_new_executor);
namespace paddle {
namespace framework {
......
......@@ -95,48 +95,28 @@ class ConditionalBlockOp : public ConditionalOp {
auto &skip_vars =
Attr<std::vector<std::string>>(ConditionalOp::kSkipEagerDeletionVars);
if (FLAGS_control_flow_use_new_executor) {
LOG_FIRST_N(INFO, 1)
<< "[ControlFlow][ConditionalBlock] New Executor is Running.";
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
VLOG(10) << "[interpreterCore cache]" << core_.get();
VLOG_IF(10, core_)
<< platform::is_same_place(core_->GetPlace(), dev_place);
framework::interpreter::ExecutionConfig execution_config;
execution_config.create_local_scope = false;
execution_config.used_for_control_flow_op = true;
execution_config.skip_gc_vars =
std::set<std::string>(skip_vars.begin(), skip_vars.end());
core_.reset(new InterpreterCore(
dev_place, *block, &cur_scope, execution_config));
VLOG(10) << "[interpreterCore cache]"
<< "new created:" << core_;
} else {
BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
core_->reset_scope(&cur_scope);
}
core_->Run({}, false);
LOG_FIRST_N(INFO, 1)
<< "[ControlFlow][ConditionalBlock] New Executor is Running.";
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
VLOG(10) << "[interpreterCore cache]" << core_.get();
VLOG_IF(10, core_) << platform::is_same_place(core_->GetPlace(),
dev_place);
framework::interpreter::ExecutionConfig execution_config;
execution_config.create_local_scope = false;
execution_config.used_for_control_flow_op = true;
execution_config.skip_gc_vars =
std::set<std::string>(skip_vars.begin(), skip_vars.end());
core_.reset(new InterpreterCore(
dev_place, *block, &cur_scope, execution_config));
VLOG(10) << "[interpreterCore] created:" << core_;
} else {
if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) {
auto &pdesc = *block->Program();
exec_.reset(new Executor(dev_place));
if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc);
ctx_ = exec_->Prepare(pdesc, block->ID(), skip_vars, false);
#ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place);
platform::RegisterModelLayout(ctx_->ops_, dev_place);
#endif
}
exec_->RunPreparedContext(ctx_.get(),
&cur_scope,
/* create_local_scope */ false,
/* create_vars */ true,
/* keep_kids */ true);
BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
core_->reset_scope(&cur_scope);
}
core_->Run({}, false);
}
}
......@@ -208,47 +188,27 @@ class ConditionalBlockGradOp : public ConditionalOp {
VLOG(3) << "Conditional Grad block.idx = " << block->ID()
<< ", scope = " << &cur_scope;
if (FLAGS_control_flow_use_new_executor) {
LOG_FIRST_N(INFO, 1)
<< "[ControlFlow][ConditionalGradBlock] New Executor is Running.";
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
VLOG(10) << "[interpreterCore cache]" << core_.get();
VLOG_IF(10, core_)
<< platform::is_same_place(core_->GetPlace(), dev_place);
framework::interpreter::ExecutionConfig execution_config;
execution_config.create_local_scope = false;
execution_config.used_for_control_flow_op = true;
execution_config.skip_gc_vars =
std::set<std::string>(inside_grads.begin(), inside_grads.end());
core_.reset(new InterpreterCore(
dev_place, *block, &cur_scope, execution_config));
VLOG(10) << "[interpreterCore cache]"
<< "new created:" << core_;
} else {
BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
core_->reset_scope(&cur_scope);
}
core_->Run({}, false);
LOG_FIRST_N(INFO, 1)
<< "[ControlFlow][ConditionalGradBlock] New Executor is Running.";
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
VLOG(10) << "[interpreterCore cache]" << core_.get();
VLOG_IF(10, core_) << platform::is_same_place(core_->GetPlace(),
dev_place);
framework::interpreter::ExecutionConfig execution_config;
execution_config.create_local_scope = false;
execution_config.used_for_control_flow_op = true;
execution_config.skip_gc_vars =
std::set<std::string>(inside_grads.begin(), inside_grads.end());
core_.reset(new InterpreterCore(
dev_place, *block, &cur_scope, execution_config));
VLOG(10) << "[interpreterCore] created:" << core_;
} else {
if (!exec_ || !platform::is_same_place(exec_->GetPlace(), dev_place)) {
auto &pdesc = *block->Program();
exec_.reset(new Executor(dev_place));
if (FLAGS_use_mkldnn) exec_->EnableMKLDNN(pdesc);
ctx_ = exec_->Prepare(pdesc, block->ID(), inside_grads, false);
#ifdef PADDLE_WITH_MKLDNN
platform::AttachPointerHashToMKLDNNKey(exec_.get(), dev_place);
platform::RegisterModelLayout(ctx_->ops_, dev_place);
#endif
}
exec_->RunPreparedContext(ctx_.get(),
&cur_scope,
/* create_local_scope */ false,
/* create_vars */ true,
/* keep_kids */ true);
BuildScopeForControlFlowOp(*core_, *block, &cur_scope);
core_->reset_scope(&cur_scope);
}
core_->Run({}, false);
AssignLocalGradientToParentScope(
dev_place, cur_scope, scope, inside_grads, outside_grads, inputs);
......@@ -398,7 +358,8 @@ struct FilterNoGradInput<framework::OpDesc> {
std::vector<std::string> *vec) {
auto f = [desc](const std::string &name) -> std::string {
if (name == framework::kEmptyVarName) {
// don't drop empty var name, you can use Input(name, true) to drop it.
// don't drop empty var name, you can use Input(name, true) to drop
// it.
return framework::kEmptyVarName;
}
auto var_desc =
......
......@@ -120,7 +120,6 @@ class WhileOp : public framework::OperatorBase {
platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
auto &dev_ctx = *pool.Get(dev_place);
auto *program = block->Program();
bool is_test = Attr<bool>("is_test");
std::set<std::string> no_copy_var_names;
......@@ -199,26 +198,18 @@ class WhileOp : public framework::OperatorBase {
}
}
if (FLAGS_control_flow_use_new_executor) {
LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running.";
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
framework::Scope placeholder; // Don't care if it's valid, just for
// initialize InterpreterCore
framework::interpreter::ExecutionConfig execution_config;
execution_config.create_local_scope = false;
execution_config.used_for_control_flow_op = true;
execution_config.skip_gc_vars =
std::set<std::string>(skip_vars.begin(), skip_vars.end());
core_.reset(new framework::InterpreterCore(
dev_place, *block, &placeholder, execution_config));
}
} else {
if (!executor_ ||
!platform::is_same_place(executor_->GetPlace(), dev_place)) {
executor_.reset(new framework::Executor(dev_place));
ctx_ = executor_->Prepare(*program, block->ID(), skip_vars);
}
LOG_FIRST_N(INFO, 1) << "[ControlFlow][WhileOp] New Executor is Running.";
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
framework::Scope placeholder; // Don't care if it's valid, just for
// initialize InterpreterCore
framework::interpreter::ExecutionConfig execution_config;
execution_config.create_local_scope = false;
execution_config.used_for_control_flow_op = true;
execution_config.skip_gc_vars =
std::set<std::string>(skip_vars.begin(), skip_vars.end());
core_.reset(new framework::InterpreterCore(
dev_place, *block, &placeholder, execution_config));
}
if (!is_test) {
......@@ -244,22 +235,17 @@ class WhileOp : public framework::OperatorBase {
}
}
}
if (FLAGS_control_flow_use_new_executor) {
BuildScopeForControlFlowOp(*core_, *block, &current_scope);
core_->reset_scope(&current_scope);
core_->Run({}, false);
// restore inputs place
for (const auto &n : input_var_original_places) {
const std::string &in_name = n.first;
const phi::Place &original_place = n.second;
// input vars exist in `scope` not `current_scope`
TransferVariablePlace(&scope, in_name, original_place, dev_ctx);
}
} else {
executor_->RunPreparedContext(
ctx_.get(), &current_scope, false, true, true);
BuildScopeForControlFlowOp(*core_, *block, &current_scope);
core_->reset_scope(&current_scope);
core_->Run({}, false);
// restore inputs place
for (const auto &n : input_var_original_places) {
const std::string &in_name = n.first;
const phi::Place &original_place = n.second;
// input vars exist in `scope` not `current_scope`
TransferVariablePlace(&scope, in_name, original_place, dev_ctx);
}
for (auto &var_rename : rename_vars) {
......@@ -273,12 +259,8 @@ class WhileOp : public framework::OperatorBase {
} else {
auto &current_scope = scope.NewScope();
if (FLAGS_control_flow_use_new_executor) {
BuildScopeForControlFlowOp(*core_, *block, &current_scope);
core_->reset_scope(&current_scope);
} else {
executor_->CreateVariables(*program, &current_scope, block->ID());
}
BuildScopeForControlFlowOp(*core_, *block, &current_scope);
core_->reset_scope(&current_scope);
while (cond_data) {
for (auto &name : current_scope.LocalVarNames()) {
......@@ -295,12 +277,7 @@ class WhileOp : public framework::OperatorBase {
}
}
if (FLAGS_control_flow_use_new_executor) {
core_->Run({}, false);
} else {
executor_->RunPreparedContext(
ctx_.get(), &current_scope, false, false, false);
}
core_->Run({}, false);
cond_data = GetCondData(
scope.FindVar(Input(kCondition))->Get<phi::DenseTensor>());
......@@ -367,7 +344,6 @@ class WhileGradOp : public framework::OperatorBase {
auto &dev_ctx = *pool.Get(dev_place);
auto *block = Attr<framework::BlockDesc *>(kStepBlock);
auto *program = block->Program();
auto *parent_block = block->ParentBlock();
auto &skip_vars = Attr<std::vector<std::string>>(kSkipEagerDeletionVars);
......@@ -391,28 +367,20 @@ class WhileGradOp : public framework::OperatorBase {
outside_og_names.size(),
inside_og_names.size()));
if (FLAGS_control_flow_use_new_executor) {
LOG_FIRST_N(INFO, 1)
<< "[ControlFlow][WhileGradOp] New Executor is Running.";
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end());
framework::Scope placeholder; // Don't care if it's valid, just for
// initialize InterpreterCore
framework::interpreter::ExecutionConfig execution_config;
execution_config.create_local_scope = false;
execution_config.used_for_control_flow_op = true;
execution_config.skip_gc_vars =
std::set<std::string>(skip_vars.begin(), skip_vars.end());
core_.reset(new framework::InterpreterCore(
dev_place, *block, &placeholder, execution_config));
}
} else {
if (!executor_ ||
!platform::is_same_place(executor_->GetPlace(), dev_place)) {
executor_.reset(new framework::Executor(dev_place));
ctx_ = executor_->Prepare(*program, block->ID(), skip_vars);
}
LOG_FIRST_N(INFO, 1)
<< "[ControlFlow][WhileGradOp] New Executor is Running.";
if (!core_ || !platform::is_same_place(core_->GetPlace(), dev_place)) {
std::set<std::string> skip_gc_vars(skip_vars.begin(), skip_vars.end());
framework::Scope placeholder; // Don't care if it's valid, just for
// initialize InterpreterCore
framework::interpreter::ExecutionConfig execution_config;
execution_config.create_local_scope = false;
execution_config.used_for_control_flow_op = true;
execution_config.skip_gc_vars =
std::set<std::string>(skip_vars.begin(), skip_vars.end());
core_.reset(new framework::InterpreterCore(
dev_place, *block, &placeholder, execution_config));
}
for (auto cur_scope_iter = step_scopes->rbegin();
......@@ -504,14 +472,9 @@ class WhileGradOp : public framework::OperatorBase {
}
}
if (FLAGS_control_flow_use_new_executor) {
BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter);
core_->reset_scope(*cur_scope_iter);
core_->Run({}, false);
} else {
executor_->RunPreparedContext(
ctx_.get(), *cur_scope_iter, false, true, true);
}
BuildScopeForControlFlowOp(*core_, *block, *cur_scope_iter);
core_->reset_scope(*cur_scope_iter);
core_->Run({}, false);
// The Outputs(kXGRAD) contains the names of the gradient of parameters
// and inputs.
......
......@@ -760,17 +760,17 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
build_strategy.fused_feedforward = True
)DOC")
.def_property(
"force_sequential_run",
[](const BuildStrategy &self) { return self.force_sequential_run_; },
"sequential_run",
[](const BuildStrategy &self) { return self.sequential_run_; },
[](BuildStrategy &self, bool b) {
PADDLE_ENFORCE_NE(self.IsFinalized(),
true,
platform::errors::PreconditionNotMet(
"BuildStrategy has been finlaized, cannot be "
"configured again."));
self.force_sequential_run_ = b;
self.sequential_run_ = b;
},
R"DOC((bool, optional): force_sequential_run is used to let the `StandaloneExecutor` run ops by the
R"DOC((bool, optional): sequential_run is used to let the `StandaloneExecutor` run ops by the
order of `ProgramDesc`. Default is False.
Examples:
......@@ -782,7 +782,7 @@ void BindParallelExecutor(pybind11::module &m) { // NOLINT
paddle.enable_static()
build_strategy = static.BuildStrategy()
build_strategy.fused_feedforward = True
build_strategy.sequential_run = True
)DOC")
.def_property(
"fuse_bn_act_ops",
......
......@@ -493,26 +493,6 @@ def _to_name_str(var):
return _to_str(var)
def _is_dy2st_enable_standalone_executor():
return framework._dy2st_enable_standalone_executor_ in [
1,
'1',
True,
'True',
'true',
]
def _is_cuda_graph_enable_standalone_executor():
return framework._cuda_graph_enable_standalone_executor_ in [
1,
'1',
True,
'True',
'true',
]
def _prepare_fleet_executor():
from ..distributed.fleet.proto import fleet_executor_desc_pb2
......@@ -1619,10 +1599,7 @@ class Executor:
else program._graph
)
build_strategy = compiled_program._build_strategy
if (
build_strategy is not None
and build_strategy.force_sequential_run
):
if build_strategy is not None and build_strategy.sequential_run:
schedule_flag = [
'FLAGS_new_executor_serial_run',
'FLAGS_new_executor_sequential_run',
......
......@@ -117,12 +117,6 @@ _already_patch_varbase = False
_current_cuda_graph_mode = None
_global_flags_ = core.globals()
_dy2st_enable_standalone_executor_ = os.environ.get(
'FLAGS_DY2ST_USE_STANDALONE_EXECUTOR', 1
)
_cuda_graph_enable_standalone_executor_ = os.environ.get(
'FLAGS_CUDA_GRAPH_USE_STANDALONE_EXECUTOR', 0
)
# special_op_attrs, extra_op_attrs are prepared for printing warnings
# when turning on FLAGS_print_extra_attrs
......
......@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import numpy as np
......@@ -31,13 +32,14 @@ class TestStandaloneExecutor(unittest.TestCase):
return main_program, startup_program, [c]
def run_program(self, force_sequential_run=False):
def run_program(self, sequential_run=False):
seed = 100
paddle.seed(seed)
np.random.seed(seed)
main, startup, outs = self.build_program()
build_strategy = paddle.static.BuildStrategy()
build_strategy.force_sequential_run = force_sequential_run
build_strategy.sequential_run = sequential_run
print(build_strategy)
compiled_program = paddle.static.CompiledProgram(
main, build_strategy=build_strategy
)
......@@ -60,6 +62,12 @@ class TestStandaloneExecutor(unittest.TestCase):
ret2 = self.run_program(False)
np.testing.assert_array_equal(ret1, ret2)
def test_str_flag(self):
paddle.enable_static()
os.environ['FLAGS_new_executor_sequential_run'] = 'true'
ret1 = self.run_program(True)
assert os.environ['FLAGS_new_executor_sequential_run'] == "true"
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册