From 7ee3eba9386828543fd0ef7fee43c71d5fa35d24 Mon Sep 17 00:00:00 2001 From: kangguangli Date: Fri, 10 Mar 2023 15:35:49 +0800 Subject: [PATCH] remove with_data_parallel and return_merged (#51374) --- python/paddle/fluid/executor.py | 101 ++++---------------------------- 1 file changed, 13 insertions(+), 88 deletions(-) diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index c10fd9c9029..93ff85e1076 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -975,22 +975,8 @@ class Executor: # Or, compiled the program and run. See `CompiledProgram` # for more details. - # NOTE: If you use CPU to run the program or Paddle is - # CPU version, you need to specify the CPU_NUM, otherwise, - # PaddlePaddle will use all the number of the logic core as - # the CPU_NUM, in that case, the batch size of the input - # should be greater than CPU_NUM, if not, the process will be - # failed by an exception. - - # Set place explicitly. - # if not use_cuda: - # os.environ['CPU_NUM'] = str(2) - - # If you don't set place and PaddlePaddle is CPU version - os.environ['CPU_NUM'] = str(2) - compiled_prog = paddle.static.CompiledProgram( - train_program).with_data_parallel(loss_name=loss.name) + train_program) loss_data, = exe.run(compiled_prog, feed={"X": x}, fetch_list=[loss.name]) """ @@ -1310,14 +1296,7 @@ class Executor: self._default_executor.close() def _run_parallel( - self, - program, - scope, - feed, - fetch_list, - fetch_var_name, - return_numpy, - return_merged, + self, program, scope, feed, fetch_list, fetch_var_name, return_numpy ): from paddle.optimizer.lr import LRScheduler @@ -1388,7 +1367,7 @@ class Executor: ) fetch_var_names = list(map(_to_name_str, fetch_list)) - tensors = exe.run(fetch_var_names, return_merged)._move_to_list() + tensors = exe.run(fetch_var_names, True)._move_to_list() return as_numpy(tensors) if return_numpy else tensors def run( @@ -1401,7 +1380,6 @@ class Executor: scope=None, return_numpy=True, use_program_cache=False, - return_merged=True, use_prune=False, ): """ @@ -1442,17 +1420,6 @@ class Executor: the input program is :code:`paddle.static.Program`, and the parameters(program, feed Tensor name and fetch_list Tensor) of this interface remains unchanged during running. The default is False. - return_merged(bool): This parameter indicates whether fetched Tensors (the Tensors - specified in the fetch list) should be merged according to the execution device dimension. - If :code:`return_merged` is False, the type of the return value is a two-dimensional list - of :code:`Tensor` / :code:`LoDTensorArray` ( :code:`return_numpy` is False) or a two-dimensional - list of :code:`numpy.ndarray` ( :code:`return_numpy` is True). If :code:`return_merged` is True, - the type of the return value is an one-dimensional list of :code:`Tensor` / :code:`LoDTensorArray` - ( :code:`return_numpy` is False) or an one-dimensional list of :code:`numpy.ndarray` - ( :code:`return_numpy` is True). Please see Examples 2 for more details. If the lengths of fetched - results are variant, please set :code:`return_merged` as False, which denotes that the fetched - results will not be merged. The default is True, but it is just for the compatibility, and may - use False as default value in the future version. use_prune(bool): This parameter indicates whether the input :code:`Program` will be pruned. If the parameter is True, the program will be pruned accroding to the given feed and fetch_list, which means the operators and variables in program that generate :code:`feed` and are not @@ -1465,20 +1432,6 @@ class Executor: List: The fetched result list. - NOTES: - 1. If it is multi-card running and the feed parameter is dict type, the input data - will be evenly sent to different cards. For example, using two GPUs to run the model, - the input sample number is 3, that is, [0, 1, 2], the sample number on GPU0 is 1, - that is, [0], and the sample number on GPU1 is 2, that is, [1, 2]. - If the number of samples is less than the number of devices, the program will - throw an exception, so when running the model, you should make sure that the - number of samples of the last batch of the data set should be greater than the - number of CPU cores or GPU cards, if it is less than, it is recommended that - the batch be discarded. - 2. If the number of CPU cores or GPU cards available is greater than 1, the fetch - results are spliced together in dimension 0 for the same Tensor values - (Tensors in fetch_list) on different devices. - Examples: .. code-block:: python :name: code-example-1 @@ -1531,43 +1484,21 @@ class Executor: exe.run(paddle.static.default_startup_program()) build_strategy = paddle.static.BuildStrategy() binary = paddle.static.CompiledProgram( - paddle.static.default_main_program()).with_data_parallel( - loss_name=loss.name, build_strategy=build_strategy) + paddle.static.default_main_program(), build_strategy=build_strategy) batch_size = 6 x = np.random.random(size=(batch_size, 1)).astype('float32') - # Set return_merged as False to fetch unmerged results: - unmerged_prediction, = exe.run(binary, - feed={'X': x}, - fetch_list=[prediction.name], - return_merged=False) - # If the user uses two GPU cards to run this python code, the printed result will be - # (2, 3, class_dim). The first dimension value of the printed result is the number of used - # GPU cards, and the second dimension value is the quotient of batch_size and the - # number of used GPU cards. - print("The unmerged prediction shape: {}".format( - np.array(unmerged_prediction).shape)) - print(unmerged_prediction) - - # Set return_merged as True to fetch merged results: - merged_prediction, = exe.run(binary, - feed={'X': x}, - fetch_list=[prediction.name], - return_merged=True) + prediction, = exe.run(binary, + feed={'X': x}, + fetch_list=[prediction.name]) # If the user uses two GPU cards to run this python code, the printed result will be # (6, class_dim). The first dimension value of the printed result is the batch_size. - print("The merged prediction shape: {}".format( - np.array(merged_prediction).shape)) - print(merged_prediction) + print("The prediction shape: {}".format( + np.array(prediction).shape)) + print(prediction) # Out: - # The unmerged prediction shape: (2, 3, 2) - # [array([[-0.37620035, -0.19752218], - # [-0.3561043 , -0.18697084], - # [-0.24129935, -0.12669306]], dtype=float32), array([[-0.24489994, -0.12858354], - # [-0.49041364, -0.25748932], - # [-0.44331917, -0.23276259]], dtype=float32)] - # The merged prediction shape: (6, 2) + # The prediction shape: (6, 2) # [[-0.37789783 -0.19921964] # [-0.3577645 -0.18863106] # [-0.24274671 -0.12814042] @@ -1600,7 +1531,6 @@ class Executor: return_numpy=return_numpy, use_program_cache=use_program_cache, use_prune=use_prune, - return_merged=return_merged, ) core.update_autotune_status() return res @@ -1615,7 +1545,6 @@ class Executor: scope, return_numpy, use_program_cache, - return_merged, use_prune, ): if self._closed: @@ -1806,10 +1735,8 @@ class Executor: return False return True - if ( - return_merged - and self._enable_interpreter_core - and _can_use_interpreter_core(program, self.place) + if self._enable_interpreter_core and _can_use_interpreter_core( + program, self.place ): if feed is None: @@ -1907,7 +1834,6 @@ class Executor: fetch_list=fetch_list, fetch_var_name=fetch_var_name, return_numpy=return_numpy, - return_merged=return_merged, ) return self._run_program( @@ -1932,7 +1858,6 @@ class Executor: fetch_list=fetch_list, fetch_var_name=fetch_var_name, return_numpy=return_numpy, - return_merged=return_merged, ) def _run_program( -- GitLab