未验证 提交 fb376285 编写于 作者: R Ruibiao Chen 提交者: GitHub

Enable standalone executor for single-GPU training (#45913)

* Enable standalone executor for single-GPU training

* Disable CompiledProgram._graph

* Fix CI errors
上级 d44ada53
...@@ -1555,23 +1555,32 @@ class Executor(object): ...@@ -1555,23 +1555,32 @@ class Executor(object):
place, core.CustomPlace): place, core.CustomPlace):
return False return False
use_standalone_executor_for_compiled_program = os.environ.get( use_standalone_executor_for_distribution = os.environ.get(
'FLAGS_CONVERT_GRAPH_TO_PROGRAM', 'FLAGS_CONVERT_GRAPH_TO_PROGRAM',
None) in [1, '1', True, 'True', 'true'] None) in [1, '1', True, 'True', 'true']
# Only support fleet when 'FLAGS_CONVERT_GRAPH_TO_PROGRAM' is set to true
from paddle.distributed.fleet import fleet
if fleet._role_maker is not None and not use_standalone_executor_for_compiled_program:
warnings.warn("Standalone executor is not used for fleet",
UserWarning)
return False
compiled = isinstance(program, compiled = isinstance(program,
compiler.CompiledProgram) or isinstance( compiler.CompiledProgram) or isinstance(
program._graph, compiler.CompiledProgram) program._graph, compiler.CompiledProgram)
if compiled: if compiled:
compiled_program = program if isinstance( compiled_program = program if isinstance(
program, compiler.CompiledProgram) else program._graph program, compiler.CompiledProgram) else program._graph
# delete this code after supporting compiled_program._graph
if compiled_program._program is None:
warnings.warn("Standalone executor is not used for Graph",
UserWarning)
return use_standalone_executor_for_distribution
# delete this code after supporting distribution
if compiled_program._build_strategy is not None and (
compiled_program._build_strategy.is_distribution
or compiled_program._build_strategy.num_trainers > 1):
warnings.warn(
"Standalone executor is not used for distribution",
UserWarning)
return use_standalone_executor_for_distribution
# Unsupported case 1: data parallel # Unsupported case 1: data parallel
if compiled_program._is_data_parallel and len( if compiled_program._is_data_parallel and len(
compiled_program._get_places( compiled_program._get_places(
...@@ -1611,10 +1620,14 @@ class Executor(object): ...@@ -1611,10 +1620,14 @@ class Executor(object):
UserWarning) UserWarning)
return False return False
return use_standalone_executor_for_compiled_program # delete this code after supporting fleet
else: from paddle.distributed.fleet import fleet
assert isinstance(program, Program) if fleet._role_maker is not None:
return True warnings.warn("Standalone executor is not used for fleet",
UserWarning)
return use_standalone_executor_for_distribution
return True
# NOTE: This is an experimental feature. If `export FLAGS_USE_STANDALONE_EXECUTOR=1 `, # NOTE: This is an experimental feature. If `export FLAGS_USE_STANDALONE_EXECUTOR=1 `,
# use StandaloneExecutor to run the program. # use StandaloneExecutor to run the program.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册