未验证 提交 7ee3eba9 编写于 作者: K kangguangli 提交者: GitHub

remove with_data_parallel and return_merged (#51374)

上级 ac495981
...@@ -975,22 +975,8 @@ class Executor: ...@@ -975,22 +975,8 @@ class Executor:
# Or, compiled the program and run. See `CompiledProgram` # Or, compiled the program and run. See `CompiledProgram`
# for more details. # for more details.
# NOTE: If you use CPU to run the program or Paddle is
# CPU version, you need to specify the CPU_NUM, otherwise,
# PaddlePaddle will use all the number of the logic core as
# the CPU_NUM, in that case, the batch size of the input
# should be greater than CPU_NUM, if not, the process will be
# failed by an exception.
# Set place explicitly.
# if not use_cuda:
# os.environ['CPU_NUM'] = str(2)
# If you don't set place and PaddlePaddle is CPU version
os.environ['CPU_NUM'] = str(2)
compiled_prog = paddle.static.CompiledProgram( compiled_prog = paddle.static.CompiledProgram(
train_program).with_data_parallel(loss_name=loss.name) train_program)
loss_data, = exe.run(compiled_prog, feed={"X": x}, fetch_list=[loss.name]) loss_data, = exe.run(compiled_prog, feed={"X": x}, fetch_list=[loss.name])
""" """
...@@ -1310,14 +1296,7 @@ class Executor: ...@@ -1310,14 +1296,7 @@ class Executor:
self._default_executor.close() self._default_executor.close()
def _run_parallel( def _run_parallel(
self, self, program, scope, feed, fetch_list, fetch_var_name, return_numpy
program,
scope,
feed,
fetch_list,
fetch_var_name,
return_numpy,
return_merged,
): ):
from paddle.optimizer.lr import LRScheduler from paddle.optimizer.lr import LRScheduler
...@@ -1388,7 +1367,7 @@ class Executor: ...@@ -1388,7 +1367,7 @@ class Executor:
) )
fetch_var_names = list(map(_to_name_str, fetch_list)) fetch_var_names = list(map(_to_name_str, fetch_list))
tensors = exe.run(fetch_var_names, return_merged)._move_to_list() tensors = exe.run(fetch_var_names, True)._move_to_list()
return as_numpy(tensors) if return_numpy else tensors return as_numpy(tensors) if return_numpy else tensors
def run( def run(
...@@ -1401,7 +1380,6 @@ class Executor: ...@@ -1401,7 +1380,6 @@ class Executor:
scope=None, scope=None,
return_numpy=True, return_numpy=True,
use_program_cache=False, use_program_cache=False,
return_merged=True,
use_prune=False, use_prune=False,
): ):
""" """
...@@ -1442,17 +1420,6 @@ class Executor: ...@@ -1442,17 +1420,6 @@ class Executor:
the input program is :code:`paddle.static.Program`, and the parameters(program, feed Tensor name the input program is :code:`paddle.static.Program`, and the parameters(program, feed Tensor name
and fetch_list Tensor) of this interface remains unchanged during running. and fetch_list Tensor) of this interface remains unchanged during running.
The default is False. The default is False.
return_merged(bool): This parameter indicates whether fetched Tensors (the Tensors
specified in the fetch list) should be merged according to the execution device dimension.
If :code:`return_merged` is False, the type of the return value is a two-dimensional list
of :code:`Tensor` / :code:`LoDTensorArray` ( :code:`return_numpy` is False) or a two-dimensional
list of :code:`numpy.ndarray` ( :code:`return_numpy` is True). If :code:`return_merged` is True,
the type of the return value is an one-dimensional list of :code:`Tensor` / :code:`LoDTensorArray`
( :code:`return_numpy` is False) or an one-dimensional list of :code:`numpy.ndarray`
( :code:`return_numpy` is True). Please see Examples 2 for more details. If the lengths of fetched
results are variant, please set :code:`return_merged` as False, which denotes that the fetched
results will not be merged. The default is True, but it is just for the compatibility, and may
use False as default value in the future version.
use_prune(bool): This parameter indicates whether the input :code:`Program` will be pruned. use_prune(bool): This parameter indicates whether the input :code:`Program` will be pruned.
If the parameter is True, the program will be pruned accroding to the given feed and fetch_list, If the parameter is True, the program will be pruned accroding to the given feed and fetch_list,
which means the operators and variables in program that generate :code:`feed` and are not which means the operators and variables in program that generate :code:`feed` and are not
...@@ -1465,20 +1432,6 @@ class Executor: ...@@ -1465,20 +1432,6 @@ class Executor:
List: The fetched result list. List: The fetched result list.
NOTES:
1. If it is multi-card running and the feed parameter is dict type, the input data
will be evenly sent to different cards. For example, using two GPUs to run the model,
the input sample number is 3, that is, [0, 1, 2], the sample number on GPU0 is 1,
that is, [0], and the sample number on GPU1 is 2, that is, [1, 2].
If the number of samples is less than the number of devices, the program will
throw an exception, so when running the model, you should make sure that the
number of samples of the last batch of the data set should be greater than the
number of CPU cores or GPU cards, if it is less than, it is recommended that
the batch be discarded.
2. If the number of CPU cores or GPU cards available is greater than 1, the fetch
results are spliced together in dimension 0 for the same Tensor values
(Tensors in fetch_list) on different devices.
Examples: Examples:
.. code-block:: python .. code-block:: python
:name: code-example-1 :name: code-example-1
...@@ -1531,43 +1484,21 @@ class Executor: ...@@ -1531,43 +1484,21 @@ class Executor:
exe.run(paddle.static.default_startup_program()) exe.run(paddle.static.default_startup_program())
build_strategy = paddle.static.BuildStrategy() build_strategy = paddle.static.BuildStrategy()
binary = paddle.static.CompiledProgram( binary = paddle.static.CompiledProgram(
paddle.static.default_main_program()).with_data_parallel( paddle.static.default_main_program(), build_strategy=build_strategy)
loss_name=loss.name, build_strategy=build_strategy)
batch_size = 6 batch_size = 6
x = np.random.random(size=(batch_size, 1)).astype('float32') x = np.random.random(size=(batch_size, 1)).astype('float32')
# Set return_merged as False to fetch unmerged results: prediction, = exe.run(binary,
unmerged_prediction, = exe.run(binary, feed={'X': x},
feed={'X': x}, fetch_list=[prediction.name])
fetch_list=[prediction.name],
return_merged=False)
# If the user uses two GPU cards to run this python code, the printed result will be
# (2, 3, class_dim). The first dimension value of the printed result is the number of used
# GPU cards, and the second dimension value is the quotient of batch_size and the
# number of used GPU cards.
print("The unmerged prediction shape: {}".format(
np.array(unmerged_prediction).shape))
print(unmerged_prediction)
# Set return_merged as True to fetch merged results:
merged_prediction, = exe.run(binary,
feed={'X': x},
fetch_list=[prediction.name],
return_merged=True)
# If the user uses two GPU cards to run this python code, the printed result will be # If the user uses two GPU cards to run this python code, the printed result will be
# (6, class_dim). The first dimension value of the printed result is the batch_size. # (6, class_dim). The first dimension value of the printed result is the batch_size.
print("The merged prediction shape: {}".format( print("The prediction shape: {}".format(
np.array(merged_prediction).shape)) np.array(prediction).shape))
print(merged_prediction) print(prediction)
# Out: # Out:
# The unmerged prediction shape: (2, 3, 2) # The prediction shape: (6, 2)
# [array([[-0.37620035, -0.19752218],
# [-0.3561043 , -0.18697084],
# [-0.24129935, -0.12669306]], dtype=float32), array([[-0.24489994, -0.12858354],
# [-0.49041364, -0.25748932],
# [-0.44331917, -0.23276259]], dtype=float32)]
# The merged prediction shape: (6, 2)
# [[-0.37789783 -0.19921964] # [[-0.37789783 -0.19921964]
# [-0.3577645 -0.18863106] # [-0.3577645 -0.18863106]
# [-0.24274671 -0.12814042] # [-0.24274671 -0.12814042]
...@@ -1600,7 +1531,6 @@ class Executor: ...@@ -1600,7 +1531,6 @@ class Executor:
return_numpy=return_numpy, return_numpy=return_numpy,
use_program_cache=use_program_cache, use_program_cache=use_program_cache,
use_prune=use_prune, use_prune=use_prune,
return_merged=return_merged,
) )
core.update_autotune_status() core.update_autotune_status()
return res return res
...@@ -1615,7 +1545,6 @@ class Executor: ...@@ -1615,7 +1545,6 @@ class Executor:
scope, scope,
return_numpy, return_numpy,
use_program_cache, use_program_cache,
return_merged,
use_prune, use_prune,
): ):
if self._closed: if self._closed:
...@@ -1806,10 +1735,8 @@ class Executor: ...@@ -1806,10 +1735,8 @@ class Executor:
return False return False
return True return True
if ( if self._enable_interpreter_core and _can_use_interpreter_core(
return_merged program, self.place
and self._enable_interpreter_core
and _can_use_interpreter_core(program, self.place)
): ):
if feed is None: if feed is None:
...@@ -1907,7 +1834,6 @@ class Executor: ...@@ -1907,7 +1834,6 @@ class Executor:
fetch_list=fetch_list, fetch_list=fetch_list,
fetch_var_name=fetch_var_name, fetch_var_name=fetch_var_name,
return_numpy=return_numpy, return_numpy=return_numpy,
return_merged=return_merged,
) )
return self._run_program( return self._run_program(
...@@ -1932,7 +1858,6 @@ class Executor: ...@@ -1932,7 +1858,6 @@ class Executor:
fetch_list=fetch_list, fetch_list=fetch_list,
fetch_var_name=fetch_var_name, fetch_var_name=fetch_var_name,
return_numpy=return_numpy, return_numpy=return_numpy,
return_merged=return_merged,
) )
def _run_program( def _run_program(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册