remove with_data_parallel and return_merged (#51374)

7ee3eba9 · kangguangli · GitHub · ac495981 · 7ee3eba9
显示空白变更内容
内联并排

Showing with 13 addition and 88 deletion

python/paddle/fluid/executor.py python/paddle/fluid/executor.py +13 -88

未找到文件。
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -975,22 +975,8 @@ class Executor:
            # Or, compiled the program and run. See `CompiledProgram`
            # for more details.
-            # NOTE: If you use CPU to run the program or Paddle is
-            # CPU version, you need to specify the CPU_NUM, otherwise,
-            # PaddlePaddle will use all the number of the logic core as
-            # the CPU_NUM, in that case, the batch size of the input
-            # should be greater than CPU_NUM, if not, the process will be
-            # failed by an exception.
-            # Set place explicitly.
-            # if not use_cuda:
-            #     os.environ['CPU_NUM'] = str(2)
-            # If you don't set place and PaddlePaddle is CPU version
-            os.environ['CPU_NUM'] = str(2)
            compiled_prog = paddle.static.CompiledProgram(
-                train_program).with_data_parallel(loss_name=loss.name)
+                train_program)
            loss_data, = exe.run(compiled_prog, feed={"X": x}, fetch_list=[loss.name])
    """
@@ -1310,14 +1296,7 @@ class Executor:
            self._default_executor.close()
    def _run_parallel(
-        self,
+        self, program, scope, feed, fetch_list, fetch_var_name, return_numpy
-        program,
-        scope,
-        feed,
-        fetch_list,
-        fetch_var_name,
-        return_numpy,
-        return_merged,
    ):
        from paddle.optimizer.lr import LRScheduler
@@ -1388,7 +1367,7 @@ class Executor:
                )
        fetch_var_names = list(map(_to_name_str, fetch_list))
-        tensors = exe.run(fetch_var_names, return_merged)._move_to_list()
+        tensors = exe.run(fetch_var_names, True)._move_to_list()
        return as_numpy(tensors) if return_numpy else tensors
    def run(
@@ -1401,7 +1380,6 @@ class Executor:
        scope=None,
        return_numpy=True,
        use_program_cache=False,
-        return_merged=True,
        use_prune=False,
    ):
        """
@@ -1442,17 +1420,6 @@ class Executor:
                the input program is :code:`paddle.static.Program`, and the parameters(program, feed Tensor name
                and fetch_list Tensor) of this interface remains unchanged during running.
                The default is False.
-            return_merged(bool): This parameter indicates whether fetched Tensors (the Tensors
-                specified in the fetch list) should be merged according to the execution device dimension.
-                If :code:`return_merged` is False, the type of the return value is a two-dimensional list
-                of :code:`Tensor` / :code:`LoDTensorArray` ( :code:`return_numpy` is False) or a two-dimensional
-                list of :code:`numpy.ndarray` ( :code:`return_numpy` is True). If :code:`return_merged` is True,
-                the type of the return value is an one-dimensional list of :code:`Tensor` / :code:`LoDTensorArray`
-                ( :code:`return_numpy` is False) or an one-dimensional list of :code:`numpy.ndarray`
-                ( :code:`return_numpy` is True). Please see Examples 2 for more details. If the lengths of fetched
-                results are variant, please set :code:`return_merged` as False, which denotes that the fetched
-                results will not be merged. The default is True, but it is just for the compatibility, and may
-                use False as default value in the future version.
            use_prune(bool): This parameter indicates whether the input :code:`Program` will be pruned.
                If the parameter is True, the program will be pruned accroding to the given feed and fetch_list,
                which means the operators and variables in program that generate :code:`feed` and are not
@@ -1465,20 +1432,6 @@ class Executor:
            List: The fetched result list.
-        NOTES:
-            1. If it is multi-card running and the feed parameter is dict type, the input data
-               will be evenly sent to different cards. For example, using two GPUs to run the model,
-               the input sample number is 3, that is, [0, 1, 2], the sample number on GPU0 is 1,
-               that is, [0], and the sample number on GPU1 is 2, that is, [1, 2].
-               If the number of samples is less than the number of devices, the program will
-               throw an exception, so when running the model, you should make sure that the
-               number of samples of the last batch of the data set should be greater than the
-               number of CPU cores or GPU cards, if it is less than, it is recommended that
-               the batch be discarded.
-            2. If the number of CPU cores or GPU cards available is greater than 1, the fetch
-               results are spliced together in dimension 0 for the same Tensor values
-               (Tensors in fetch_list) on different devices.
        Examples:
            .. code-block:: python
                :name: code-example-1
@@ -1531,43 +1484,21 @@ class Executor:
                exe.run(paddle.static.default_startup_program())
                build_strategy = paddle.static.BuildStrategy()
                binary = paddle.static.CompiledProgram(
-                    paddle.static.default_main_program()).with_data_parallel(
+                    paddle.static.default_main_program(), build_strategy=build_strategy)
-                        loss_name=loss.name, build_strategy=build_strategy)
                batch_size = 6
                x = np.random.random(size=(batch_size, 1)).astype('float32')
-                # Set return_merged as False to fetch unmerged results:
+                prediction, = exe.run(binary,
-                unmerged_prediction, = exe.run(binary,
                                      feed={'X': x},
-                                               fetch_list=[prediction.name],
+                                    fetch_list=[prediction.name])
-                                               return_merged=False)
-                # If the user uses two GPU cards to run this python code, the printed result will be
-                # (2, 3, class_dim). The first dimension value of the printed result is the number of used
-                # GPU cards, and the second dimension value is the quotient of batch_size and the
-                # number of used GPU cards.
-                print("The unmerged prediction shape: {}".format(
-                    np.array(unmerged_prediction).shape))
-                print(unmerged_prediction)
-                # Set return_merged as True to fetch merged results:
-                merged_prediction, = exe.run(binary,
-                                             feed={'X': x},
-                                             fetch_list=[prediction.name],
-                                             return_merged=True)
                # If the user uses two GPU cards to run this python code, the printed result will be
                # (6, class_dim). The first dimension value of the printed result is the batch_size.
-                print("The merged prediction shape: {}".format(
+                print("The prediction shape: {}".format(
-                    np.array(merged_prediction).shape))
+                    np.array(prediction).shape))
-                print(merged_prediction)
+                print(prediction)
                # Out:
-                # The unmerged prediction shape: (2, 3, 2)
+                # The prediction shape: (6, 2)
-                # [array([[-0.37620035, -0.19752218],
-                #        [-0.3561043 , -0.18697084],
-                #        [-0.24129935, -0.12669306]], dtype=float32), array([[-0.24489994, -0.12858354],
-                #        [-0.49041364, -0.25748932],
-                #        [-0.44331917, -0.23276259]], dtype=float32)]
-                # The merged prediction shape: (6, 2)
                # [[-0.37789783 -0.19921964]
                #  [-0.3577645  -0.18863106]
                #  [-0.24274671 -0.12814042]
@@ -1600,7 +1531,6 @@ class Executor:
            return_numpy=return_numpy,
            use_program_cache=use_program_cache,
            use_prune=use_prune,
-            return_merged=return_merged,
        )
        core.update_autotune_status()
        return res
@@ -1615,7 +1545,6 @@ class Executor:
        scope,
        return_numpy,
        use_program_cache,
-        return_merged,
        use_prune,
    ):
        if self._closed:
@@ -1806,10 +1735,8 @@ class Executor:
                    return False
            return True
-        if (
+        if self._enable_interpreter_core and _can_use_interpreter_core(
-            return_merged
+            program, self.place
-            and self._enable_interpreter_core
-            and _can_use_interpreter_core(program, self.place)
        ):
            if feed is None:
@@ -1907,7 +1834,6 @@ class Executor:
                    fetch_list=fetch_list,
                    fetch_var_name=fetch_var_name,
                    return_numpy=return_numpy,
-                    return_merged=return_merged,
                )
            return self._run_program(
@@ -1932,7 +1858,6 @@ class Executor:
                fetch_list=fetch_list,
                fetch_var_name=fetch_var_name,
                return_numpy=return_numpy,
-                return_merged=return_merged,
            )
    def _run_program(