diff --git a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py index b6dea66f7bfcb7bf108a709bd5ad40087c44a121..b3a925070b320a785429570743f2e99de51fe3b2 100644 --- a/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py +++ b/python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py @@ -473,9 +473,7 @@ class OptimizationTuner: parent_env = copy.copy(os.environ.copy()) # env flags need for profile - new_env = { - "FLAGS_USE_STANDALONE_EXECUTOR": "False", - } + new_env = {} new_env.update(parent_env) # TODO if any rank hang or fail, kill all processes diff --git a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py index f943e58748d07042d2e69a3c9856f846bb007769..5d519bcc94e06b544284d899c9253063018215e0 100644 --- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py +++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py @@ -34,7 +34,6 @@ from paddle.distributed.auto_parallel.utils import ( ring_id_to_process_group, ) from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole -from paddle.fluid.executor import _is_enable_standalone_executor from paddle.static import default_main_program from paddle.utils import unique_name @@ -97,8 +96,7 @@ class DataParallelOptimizationPass(PassBase): self.global_rank = int(self.get_attr("global_rank")) self.use_sharding = self.get_attr("use_sharding") self.coalesce_prefix = 'coalesce_grad' - if _is_enable_standalone_executor(): - self.gradient_sync_stream = "gradient_sync_stream" + self.gradient_sync_stream = "gradient_sync_stream" with paddle.static.program_guard(main_program, startup_program): self._analyze_program() @@ -316,8 +314,7 @@ class DataParallelOptimizationPass(PassBase): def _calc_wait_comms(self): - if _is_enable_standalone_executor(): - return + return block = default_main_program().global_block() @@ -602,7 +599,7 @@ class DataParallelOptimizationPass(PassBase): # multiple stream executor(standalone exe). This function just for standalone exe. Refactor here # in future when only one executor stay. - if not _is_enable_standalone_executor() or len(grad_groups) == 0: + if len(grad_groups) == 0: return block = default_main_program().global_block() diff --git a/python/paddle/distributed/passes/auto_parallel_grad_clip.py b/python/paddle/distributed/passes/auto_parallel_grad_clip.py index 525420422af681258b5a632e5c14f3b435cc0468..20e602c9a20446c7c683f9a3efd894186aa91f3d 100644 --- a/python/paddle/distributed/passes/auto_parallel_grad_clip.py +++ b/python/paddle/distributed/passes/auto_parallel_grad_clip.py @@ -18,7 +18,6 @@ import numpy as np import paddle from paddle.distributed.fleet.meta_optimizers.common import OP_ROLE_KEY, OpRole -from paddle.fluid.executor import _is_enable_standalone_executor from ..auto_parallel.dist_attribute import OperatorDistAttr, TensorDistAttr from ..auto_parallel.operators.common import ( @@ -460,10 +459,7 @@ class ClipGradByGloblNormPass(PassBase): ) self.clip_helper._init_dist_attr(allreduce_op) - if ( - _is_enable_standalone_executor() - and insert_leaf_fill_constant_node - ): + if insert_leaf_fill_constant_node: # NOTE add naive deps for global norm sync in graph exe j = idx - 1 diff --git a/python/paddle/distributed/passes/auto_parallel_sharding.py b/python/paddle/distributed/passes/auto_parallel_sharding.py index d9c82746ae9fbb3717767f2b9f570e26480793f8..acdbcf18d33b8229e1c5e2e7681a2cc630b81e4b 100644 --- a/python/paddle/distributed/passes/auto_parallel_sharding.py +++ b/python/paddle/distributed/passes/auto_parallel_sharding.py @@ -35,7 +35,6 @@ from paddle.distributed.auto_parallel.utils import ( set_var_dist_attr, ) from paddle.distributed.fleet.meta_optimizers.sharding.utils import get_var_size -from paddle.fluid.executor import _is_enable_standalone_executor from paddle.framework import core from paddle.static import default_main_program, default_startup_program from paddle.utils import unique_name @@ -1168,7 +1167,7 @@ class ShardingPass(PassBase): P.S. this overlap pass is ONLY adapted for standalone executor (graph based) and stream awared allocator. """ - if not _is_enable_standalone_executor() or (not self.enable_overlap): + if not self.enable_overlap: return self.grad_comm_group_stream_pairs = [] diff --git a/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py b/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py index 224629af72794347f99caaec6185f250e8d873b1..c164b6e8ddbc478216e725a602d1c39f4764d758 100644 --- a/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py +++ b/python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py @@ -21,7 +21,6 @@ from paddle.distributed.auto_parallel.utils import ( OpRole, insert_dependencies_for_vars, ) -from paddle.fluid.executor import _is_enable_standalone_executor from .auto_parallel_sharding import ShardingPass, _supported_optimizer_type from .pass_base import PassBase, register_pass @@ -70,9 +69,7 @@ class AutoParalSupplementDepPass(PassBase): def _apply_single_impl(self, main_program, startup_program, context): # TODO general this pass for all case. - if not _is_enable_standalone_executor or not _sharding_pass_applied( - context - ): + if not _sharding_pass_applied(context): return self._dist_context = self.get_attr("dist_context", None) diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index d205a122db654b67c43a2af5e22198fd8b5954a4..2b8b4fcd380e05c806d5bd73527e20dda825415c 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -493,14 +493,6 @@ def _to_name_str(var): return _to_str(var) -def _is_enable_standalone_executor(): - return ( - framework._enable_standalone_executor_ is None - or framework._enable_standalone_executor_ - in [1, '1', True, 'True', 'true'] - ) - - def _is_dy2st_enable_standalone_executor(): return framework._dy2st_enable_standalone_executor_ in [ 1, @@ -1004,8 +996,6 @@ class Executor: "__auto_checkpoint_executor__" ) - # NOTE: Whether to use experimental executor `StandaloneExecutor`. - self._enable_interpreter_core = _is_enable_standalone_executor() self._executor_cache = _ExecutorCache() self._fleet_executor = None @@ -1605,9 +1595,7 @@ class Executor: return True - if self._enable_interpreter_core and _can_use_interpreter_core( - program, self.place - ): + if _can_use_interpreter_core(program, self.place): if feed is None: feed = {} @@ -1685,132 +1673,12 @@ class Executor: acp._auto_checkpoint(self, program) - # For backward compatibility, run directly. - if not compiled: - - return self._run_program( - program, - feed=feed, - fetch_list=fetch_list, - feed_var_name=feed_var_name, - fetch_var_name=fetch_var_name, - scope=scope, - return_numpy=return_numpy, - use_program_cache=use_program_cache, - ) - program._compile(scope, self.place) assert ( program._is_inference ), f"Program must have _is_inference = True, but get {program._is_inference}" return self._run_inference(program._executor, feed) - def _run_program( - self, - program, - feed, - fetch_list, - feed_var_name, - fetch_var_name, - scope, - return_numpy, - use_program_cache, - ): - from paddle.optimizer.lr import LRScheduler - - if feed is None: - feed = {} - elif isinstance(feed, (list, tuple)): - assert len(feed) == 1, "Not compiled with data parallel" - feed = feed[0] - - if not isinstance(feed, dict): - raise TypeError( - "feed requires dict as its Parameter. But you passed in %s" - % (type(feed)) - ) - - assert program is not None, "The program should not be Empty" - if not isinstance(program, Program): - raise TypeError( - "Executor requires Program as its Parameter. But you passed in %s" - % (type(program)) - ) - - if not isinstance(fetch_var_name, str): - raise TypeError( - "The name of fetch variable requires string as its Parameter. But you passed in %s" - % (type(fetch_var_name)) - ) - - if use_program_cache: - cache_key = _get_strong_program_cache_key(program, feed, fetch_list) - cached_program = self._get_program_cache(cache_key) - cached_ctx = self._get_ctx_cache(cache_key) - cached_scope = self._get_scope_cache(cache_key) - if cached_program is None: - cached_program = _add_feed_fetch_ops( - program=program, - feed=feed, - fetch_list=fetch_list, - feed_var_name=feed_var_name, - fetch_var_name=fetch_var_name, - ) - self._add_program_cache(cache_key, cached_program) - fetch_list_str = list(map(_to_name_str, fetch_list)) - cached_ctx = self._default_executor.prepare( - cached_program.desc, 0, fetch_list_str, False - ) - # currently, we cache program, vars, sub_scope here - # we suppose that in a life cycle of training, a user - # will not create many programs. So, here the basic - # rule of caching is to cache all unseen (program, var, scope) - # when a user use use_program_cache. - cached_scope = scope.new_scope() - self._default_executor.create_variables( - cached_program.desc, cached_scope, 0 - ) - self._add_ctx_cache(cache_key, cached_ctx) - self._add_scope_cache(cache_key, cached_scope) - program = cached_program - ctx = cached_ctx - scope = cached_scope - else: - program = _add_feed_fetch_ops( - program=program, - feed=feed, - fetch_list=fetch_list, - feed_var_name=feed_var_name, - fetch_var_name=fetch_var_name, - ) - - self._feed_data(program, feed, feed_var_name, scope) - if hasattr(program, 'lr_schedulerr'): - assert isinstance( - program.lr_scheduler, LRScheduler - ), "must be LRScheduler" - lr_scheduler = program.lr_scheduler - lr_value = lr_scheduler() - lr_var = program.global_block().vars[lr_scheduler._var_name] - data = np.array([lr_value]).astype(convert_dtype(lr_var.dtype)) - tensor = core.get_variable_tensor(scope, lr_scheduler._var_name) - tensor.set(data, self.place) - - if not use_program_cache: - self._default_executor.run( - program.desc, scope, 0, True, True, [fetch_var_name] - ) - else: - self._default_executor.run_prepared_ctx( - ctx, scope, False, False, False - ) - arr = scope.find_var(fetch_var_name).get_fetch_list() - tensors = arr._move_to_list() - if return_numpy: - return as_numpy(tensors) - else: - return tensors - def _run_inference(self, exe, feed): return exe.run(feed) diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 97fcb9ad1ad7349112cbb903e0c4bb5aa7e61fa9..98d0fbf0620b7d96f7c9f4c5270d9d5bd31061c5 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -116,9 +116,7 @@ _already_patch_eager_tensor = False _already_patch_varbase = False _current_cuda_graph_mode = None _global_flags_ = core.globals() -_enable_standalone_executor_ = os.environ.get( - 'FLAGS_USE_STANDALONE_EXECUTOR', None -) + _dy2st_enable_standalone_executor_ = os.environ.get( 'FLAGS_DY2ST_USE_STANDALONE_EXECUTOR', 1 ) @@ -270,17 +268,6 @@ ipu_index_attr_name = 'ipu_index' ipu_stage_attr_name = 'ipu_stage' -@signature_safe_contextmanager -def _enable_standalone_executor(enable=True): - global _enable_standalone_executor_ - original_ = _enable_standalone_executor_ - _enable_standalone_executor_ = enable - try: - yield - finally: - _enable_standalone_executor_ = original_ - - @signature_safe_contextmanager def ipu_shard_guard(index=-1, stage=-1): """ diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 1eb9e24eef8fbc07b39df1d6faa93690f27a11fd..d7f856fde675e8c15125ed5c25fe6c3003a63ded 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1150,25 +1150,6 @@ if(WITH_GLOO) PROPERTIES TIMEOUT 120) endif() -if($ENV{USE_STANDALONE_EXECUTOR}) - # these test will fail in some server due to PR#42149, temporarily set it use old executor. - set_tests_properties(test_apply_pass_to_program - PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_buffer_shared_memory_reuse_pass - PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties( - test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass - PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_imperative_optimizer - PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_imperative_star_gan_with_gradient_penalty - PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_switch_autotune - PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties(test_imperative_mnist_sorted_gradient - PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0) -endif() - set(TEST_CINN_OPS test_softmax_op test_expand_v2_op diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt index 96ed9ecd69e71f8fc1d512ada0a949841168fa24..d8205fdf4528d327760e528ccc0fd229cd730b5d 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt @@ -201,9 +201,7 @@ if(WITH_GPU AND TENSORRT_FOUND) set_tests_properties(test_trt_conv3d_op PROPERTIES TIMEOUT 60) set_tests_properties(test_trt_conv3d_transpose_op PROPERTIES TIMEOUT 60) set_tests_properties(test_trt_nearest_interp_v2_op PROPERTIES TIMEOUT 30) - set_tests_properties( - test_trt_multiclass_nms3_op PROPERTIES TIMEOUT 60 ENVIRONMENT - FLAGS_USE_STANDALONE_EXECUTOR=0) + set_tests_properties(test_trt_multiclass_nms3_op PROPERTIES TIMEOUT 60) if(WITH_MKLDNN AND TENSORRT_FOUND diff --git a/python/paddle/fluid/tests/unittests/test_eager_run_program.py b/python/paddle/fluid/tests/unittests/test_eager_run_program.py index fe038bc71044689b0268490c7b1707a90ee1e058..0dd6b320362eb64b8e323ddf58a34a45dfcea508 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_run_program.py +++ b/python/paddle/fluid/tests/unittests/test_eager_run_program.py @@ -20,10 +20,6 @@ import paddle from paddle import _legacy_C_ops from paddle.fluid import core from paddle.fluid.dygraph.base import switch_to_static_graph -from paddle.fluid.executor import ( - _is_dy2st_enable_standalone_executor, - _is_enable_standalone_executor, -) from paddle.fluid.framework import Variable @@ -140,10 +136,7 @@ class TestRunProgram(unittest.TestCase): [out.name + '@GRAD'], ] - use_interpretorcore = ( - _is_enable_standalone_executor() - and _is_dy2st_enable_standalone_executor() - ) + use_interpretorcore = True attrs.extend(('use_interpretorcore', use_interpretorcore)) if use_interpretorcore: attrs.extend( diff --git a/python/paddle/fluid/tests/unittests/test_run_program_op.py b/python/paddle/fluid/tests/unittests/test_run_program_op.py index 1561f25359d6337f6f4df08bd96d084ef1349824..c54ab122a95b54a9db97bd0a853f299ebde81a6c 100644 --- a/python/paddle/fluid/tests/unittests/test_run_program_op.py +++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py @@ -21,10 +21,6 @@ import paddle from paddle import _legacy_C_ops, fluid from paddle.fluid import core, framework from paddle.fluid.dygraph.base import switch_to_static_graph -from paddle.fluid.executor import ( - _is_dy2st_enable_standalone_executor, - _is_enable_standalone_executor, -) from paddle.fluid.framework import global_var paddle.enable_static() @@ -240,10 +236,7 @@ class RunProgramOpTest(unittest.TestCase): self.program_desc, self.fwd_op_num, len(outputs['Out']) ) - use_interpretorcore = ( - _is_enable_standalone_executor() - and _is_dy2st_enable_standalone_executor() - ) + use_interpretorcore = True self.attrs.extend(('use_interpretorcore', use_interpretorcore)) if use_interpretorcore: self.attrs.extend( @@ -292,10 +285,7 @@ class RunProgramOpTest(unittest.TestCase): self.program_desc, self.fwd_op_num, len(outputs['Out']) ) - use_interpretorcore = ( - _is_enable_standalone_executor() - and _is_dy2st_enable_standalone_executor() - ) + use_interpretorcore = True self.attrs.extend(('use_interpretorcore', use_interpretorcore)) if use_interpretorcore: self.attrs.extend( diff --git a/python/paddle/jit/translated_layer.py b/python/paddle/jit/translated_layer.py index edc0a0cc158e92d531898f173ddeadff0977b85f..4eba9a31dbc2b9f4f20d9b9441850b0219c68b35 100644 --- a/python/paddle/jit/translated_layer.py +++ b/python/paddle/jit/translated_layer.py @@ -21,10 +21,6 @@ import paddle from paddle import _legacy_C_ops from paddle.fluid import backward, core, framework, unique_name from paddle.fluid.dygraph.base import switch_to_static_graph -from paddle.fluid.executor import ( - _is_dy2st_enable_standalone_executor, - _is_enable_standalone_executor, -) from paddle.fluid.framework import OpProtoHolder, _non_static_mode from paddle.jit.dy2static.partial_program import ( LazyInitialized, @@ -976,10 +972,7 @@ def _run_dygraph(instance, input, program_holder): ) ) - use_interpretorcore = ( - _is_enable_standalone_executor() - and _is_dy2st_enable_standalone_executor() - ) + use_interpretorcore = True attrs.extend(('use_interpretorcore', use_interpretorcore)) if use_interpretorcore: attrs.extend( diff --git a/test/custom_op/CMakeLists.txt b/test/custom_op/CMakeLists.txt index d7f32625db4abb0649329a21684da86f761ecaf9..fbdc8f9cc653e4e4e9b3f38bf1ec448fadf13e92 100644 --- a/test/custom_op/CMakeLists.txt +++ b/test/custom_op/CMakeLists.txt @@ -11,15 +11,6 @@ if(WITH_TESTING) set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180) set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180) set_tests_properties(test_context_pool PROPERTIES TIMEOUT 180) - if($ENV{USE_STANDALONE_EXECUTOR}) - # these test will fail in some server due to PR#42149, temporarily set it use old executor. - set_tests_properties( - test_custom_relu_op_setup PROPERTIES ENVIRONMENT - FLAGS_USE_STANDALONE_EXECUTOR=0) - set_tests_properties( - test_custom_relu_model PROPERTIES ENVIRONMENT - FLAGS_USE_STANDALONE_EXECUTOR=0) - endif() endif() if(WITH_GPU AND WITH_DISTRIBUTE) diff --git a/test/standalone_executor/test_standalone_controlflow.py b/test/standalone_executor/test_standalone_controlflow.py index 069240823d8fd5a0da514c1a375149e5f0ce99e5..fcdf7f81587087a6101390fbc0b48593bf2f3c32 100644 --- a/test/standalone_executor/test_standalone_controlflow.py +++ b/test/standalone_executor/test_standalone_controlflow.py @@ -17,7 +17,7 @@ import unittest import numpy as np import paddle -from paddle.fluid import core, framework +from paddle.fluid import core from paddle.fluid.framework import Program, program_guard paddle.enable_static() @@ -25,7 +25,7 @@ paddle.enable_static() # test the compatibility of new executor: run old # and new executor twice and check the result. -# please override the _get_feeds() and build_prgram() +# please override the _get_feeds() and build_prgram(), run_dygraph_once() class TestCompatibility(unittest.TestCase): def setUp(self): self.place = ( @@ -78,26 +78,53 @@ class TestCompatibility(unittest.TestCase): ret.append(exe.run(main_program, feed=feed, fetch_list=fetch_vars)) return ret - def run_raw_executor(self, feed): - with framework._enable_standalone_executor(False): - out = self._run(feed) + def run_dygraph_once(self, feed): + x = paddle.tensor.fill_constant(shape=[1], dtype='float32', value=0.1) + y = paddle.tensor.fill_constant(shape=[1], dtype='float32', value=0.23) + if x < y: + out = [ + paddle.tensor.fill_constant( + shape=[1, 2], dtype='int32', value=1 + ).numpy(), + paddle.tensor.fill_constant( + shape=[2, 3], dtype='bool', value=True + ).numpy(), + ] + else: + out = [ + paddle.tensor.fill_constant( + shape=[3, 4], dtype='float32', value=3 + ).numpy(), + paddle.tensor.fill_constant( + shape=[4, 5], dtype='int64', value=2 + ).numpy(), + ] return out + def run_dygraph(self, feed): + ret = [] + for _ in range(self.iter_run): + ret.append(self.run_dygraph_once(feed)) + return ret + def run_new_executor(self, feed): - with framework._enable_standalone_executor(True): - out = self._run(feed) + out = self._run(feed) return out def test_with_feed(self): feed = self._get_feed() + paddle.enable_static() res = self.run_new_executor(feed) - gt = self.run_raw_executor(feed) + paddle.disable_static() + + gt = self.run_dygraph(feed) + for x, y in zip(gt, res): if isinstance(x, list): for tx, ty in zip(x, y): np.testing.assert_array_equal(tx, ty) elif isinstance(x, np.ndarray): - np.testing.assert_array_equal(tx, ty) + np.testing.assert_array_equal(x, y) else: raise Exception("Not Implement!") @@ -129,6 +156,12 @@ class TestWhile(TestCompatibility): exe = paddle.static.Executor(paddle.CPUPlace()) return main_program, startup_program, i + def run_dygraph_once(self, feed): + i = 1 + while i < 10: + i = i + 1 + return [i] + if __name__ == "__main__": unittest.main() diff --git a/test/standalone_executor/test_standalone_executor.py b/test/standalone_executor/test_standalone_executor.py index c208044722f52a5c09a49dae269b9d8294df6a11..08b150c84c342352f8aa9661f603453a0af6b143 100644 --- a/test/standalone_executor/test_standalone_executor.py +++ b/test/standalone_executor/test_standalone_executor.py @@ -23,7 +23,7 @@ import unittest import numpy as np import paddle -from paddle.fluid import core, framework +from paddle.fluid import core from paddle.fluid.core import StandaloneExecutor from paddle.profiler import profiler @@ -143,16 +143,15 @@ class ExecutorStatisticsTestCase(unittest.TestCase): scope = paddle.static.Scope() with paddle.static.scope_guard(scope): - with framework._enable_standalone_executor(enable): - exe = paddle.static.Executor(self.place) - helper_profiler = profiler.Profiler( - targets=[profiler.ProfilerTarget.CPU], scheduler=(1, 2) - ) - helper_profiler.start() - for i in range(self.iter_n): - exe.run(main_program, fetch_list=fetch_list) - helper_profiler.step() - helper_profiler.stop() + exe = paddle.static.Executor(self.place) + helper_profiler = profiler.Profiler( + targets=[profiler.ProfilerTarget.CPU], scheduler=(1, 2) + ) + helper_profiler.start() + for i in range(self.iter_n): + exe.run(main_program, fetch_list=fetch_list) + helper_profiler.step() + helper_profiler.stop() self.assertTrue(os.path.exists(self.perf_path)) with open(self.perf_path, 'r') as load_f: @@ -183,15 +182,14 @@ class MultiStreamModelTestCase(unittest.TestCase): paddle.seed(2020) main_program, startup_program, fetch_list = build_program() - with framework._enable_standalone_executor(use_new_executor): - scope = core.Scope() - exe = paddle.static.Executor(self.place) - outs = [] - for i in range(self.iter_n): - outs.append( - exe.run(main_program, scope=scope, fetch_list=fetch_list) - ) - print(outs) + scope = core.Scope() + exe = paddle.static.Executor(self.place) + outs = [] + for i in range(self.iter_n): + outs.append( + exe.run(main_program, scope=scope, fetch_list=fetch_list) + ) + print(outs) return outs @@ -249,30 +247,46 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase): return outs - def run_raw_executor(self, feed, use_compiled=False): - with framework._enable_standalone_executor(False): - # run construct program 1 - out1 = self._run( - feed, use_str=False, is_double=False, use_compiled=use_compiled - ) - # run construct program 2 with same executor - out2 = self._run( - feed, use_str=True, is_double=True, use_compiled=use_compiled - ) + def run_dygraph(self, feed): + def run_once(is_double): + paddle.seed(2020) + a = feed['a'] + a = paddle.to_tensor(a, dtype='float32') + b = paddle.ones([2, 2]) * 2 + t = paddle.nn.Linear(2, 2)(a) + c = t + b + if is_double: + c = c + c + return c.numpy() - return [out1, out2] + out1 = [] + for i in range(self.iter_run): + out1.append(run_once(False)) + out2 = [] + for i in range(self.iter_run): + out2.append(run_once(True)) + return [out1, out2] def run_new_executor(self, feed, use_compiled=False): - with framework._enable_standalone_executor(): - out = self.run_raw_executor(feed, use_compiled=use_compiled) - return out + # run construct program 1 + out1 = self._run( + feed, use_str=False, is_double=False, use_compiled=use_compiled + ) + # run construct program 2 with same executor + out2 = self._run( + feed, use_str=True, is_double=True, use_compiled=use_compiled + ) + + return [out1, out2] def test_with_feed(self): data = np.ones([2, 2], dtype="float32") feed = {"a": data, 'fake_input': data} - res = self.run_new_executor(feed) - gt = self.run_raw_executor(feed) + with paddle.fluid.framework._static_guard(): + res = self.run_new_executor(feed) + with paddle.fluid.dygraph.guard(): + gt = self.run_dygraph(feed) for x, y in zip(gt, res): np.testing.assert_array_equal(x, y) @@ -280,8 +294,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase): feed = [{'a': np.ones([2, 2], dtype="float32")}] with self.assertRaises(TypeError): - with framework._enable_standalone_executor(): - self._run(feed[0], add_wrong_fetch=True) + self._run(feed[0], add_wrong_fetch=True) def test_empty_program(self): program = paddle.static.Program() @@ -291,8 +304,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase): for i in range(10): print(i, flush=1) - with framework._enable_standalone_executor(): - out = exe.run(program, feed=None) + out = exe.run(program, feed=None) class TestException(unittest.TestCase): @@ -328,8 +340,7 @@ class TestException(unittest.TestCase): return out def run_new_executor(self, feed): - with framework._enable_standalone_executor(): - out = self._run(feed) + out = self._run(feed) return out def test_exception(self): @@ -399,13 +410,11 @@ class TestInplaceApiWithDataTransform(unittest.TestCase): with paddle.fluid.device_guard("cpu"): x = paddle.increment(x) exe = paddle.static.Executor(paddle.CUDAPlace(0)) - with framework._enable_standalone_executor(): - - for i in range(10): - (a,) = exe.run( - paddle.static.default_main_program(), fetch_list=[x] - ) - self.assertEqual(a[0], 1) + for i in range(10): + (a,) = exe.run( + paddle.static.default_main_program(), fetch_list=[x] + ) + self.assertEqual(a[0], 1) if __name__ == "__main__": diff --git a/test/standalone_executor/test_standalone_multiply_write.py b/test/standalone_executor/test_standalone_multiply_write.py index d876606c57a51461919a3ac4c80a35a1ebb7c91b..96e35a336951c8da16bf72557c059ca6cda29443 100644 --- a/test/standalone_executor/test_standalone_multiply_write.py +++ b/test/standalone_executor/test_standalone_multiply_write.py @@ -39,6 +39,14 @@ class TestMultiplyWrite(TestCompatibility): paddle.assign(inp2, out) return main_program, startup_program, out + def run_dygraph_once(self, feed): + out = paddle.full((1,), 1) + inp1 = paddle.full((1,), 2) + inp2 = paddle.full((1,), 3) + paddle.assign(inp1, out) + paddle.assign(inp2, out) + return [out.numpy()] + def setUp(self): self.place = paddle.CPUPlace() self.iter_run = 5