From 22861a9ecd465da94a31b861c28dbfac7012d168 Mon Sep 17 00:00:00 2001
From: gfwm0502 <guofeiworking@126.com>
Date: Tue, 21 Apr 2020 16:19:48 +0800
Subject: [PATCH] Modify Chinese documents of executor

test=develop
---
 doc/fluid/api_cn/executor_cn/Executor_cn.rst | 67 ++++++++++++++++++--
 doc/fluid/api_cn/fluid_cn/Executor_cn.rst    | 11 +++-
 2 files changed, 71 insertions(+), 7 deletions(-)

diff --git a/doc/fluid/api_cn/executor_cn/Executor_cn.rst b/doc/fluid/api_cn/executor_cn/Executor_cn.rst
index 77cf8d127..630658ae2 100644
--- a/doc/fluid/api_cn/executor_cn/Executor_cn.rst
+++ b/doc/fluid/api_cn/executor_cn/Executor_cn.rst
@@ -105,6 +105,7 @@ Executor支持单GPU、多GPU以及CPU运行。
   - **scope** (Scope) – 该参数表示执行当前program所使用的作用域，用户可以为不同的program指定不同的作用域。默认值：fluid.global_scope()。
   - **return_numpy** (bool) – 该参数表示是否将返回返回的计算结果（fetch list中指定的变量）转化为numpy；如果为False，则每个变量返回的类型为LoDTensor，否则返回变量的类型为numpy.ndarray。默认为：True。
   - **use_program_cache** (bool) – 该参数表示是否对输入的Program进行缓存。如果该参数为True，在以下情况时，模型运行速度可能会更快：输入的program为 ``fluid.Program`` ，并且模型运行过程中，调用该接口的参数（program、 feed变量名和fetch_list变量）名始终不变。默认为：False。
+  - **return_merged** (bool) - 该参数表示是否按照执行设备维度将返回的计算结果（fetch list中指定的变量）进行合并。如果 ``return_merged`` 设为False，返回值类型是一个Tensor/LoDTensorArray的二维列表（ ``return_numpy`` 设为Fasle时）或者一个numpy.ndarray的二维列表（ ``return_numpy`` 设为True时）。如果 ``return_merged`` 设为True，返回值类型是一个Tensor/LoDTensorArray的一维列表（ ``return_numpy`` 设为Fasle时）或者一个numpy.ndarray的一维列表（ ``return_numpy`` 设为True时）。更多细节请参考示例代码2。如果返回的计算结果是变长的，请设置 ``return_merged`` 为False，即不按照执行设备维度合并返回的计算结果。该参数的默认值为True，但这仅是为了兼容性考虑，在未来的版本中默认值可能会更改为False。
   - **use_prune** (bool) – 该参数表示是否对输入的Program进行剪枝。如果该参数为True，输入的Program会在run之前根据 ``feed`` 和 ``fetch_list`` 进行剪枝，剪枝的逻辑是将产生 ``feed`` 的 ``Variable`` 和 ``Operator`` 以及不产生 ``fetch_list`` 的 ``Variable`` 和 ``Operator`` 进行裁剪。默认为：False，表示不进行剪枝。请注意，如果将 ``Optimizer.minimize()`` 方法返回的 ``tuple`` 传入 ``fetch_list`` 中，则 ``use_prune`` 会被重写为True，并且会开启剪枝。
   
 返回：返回fetch_list中指定的变量值
@@ -116,7 +117,7 @@ Executor支持单GPU、多GPU以及CPU运行。
      2. 如果可用的CPU核数或GPU卡数大于1，则fetch出来的结果为不同设备上的相同变量值（fetch_list中的变量）在第0维拼接在一起。
 
 
-**示例代码**
+**示例代码 1**
 
 .. code-block:: python
 
@@ -132,14 +133,72 @@ Executor支持单GPU、多GPU以及CPU运行。
             loss = fluid.layers.mean(hidden)
             adam = fluid.optimizer.Adam()
             adam.minimize(loss)
-     
+            i = fluid.layers.zeros(shape=[1], dtype='int64')
+            array = fluid.layers.array_write(x=loss, i=i)
+
             #仅运行startup程序一次
             exe.run(fluid.default_startup_program())
 
             x = numpy.random.random(size=(10, 1)).astype('float32')
-            outs = exe.run(feed={'X': x},
-                           fetch_list=[loss.name])
+            loss_val, array_val = exe.run(feed={'X': x},
+                                          fetch_list=[loss.name, array.name])
+            print(array_val)
+            # [array([0.02153828], dtype=float32)]
+
+
+**示例代码 2**
+
+.. code-block:: python
 
+            import paddle.fluid as fluid
+            import numpy as np
+            # 创建Executor对象
+            place = fluid.CUDAPlace(0)
+            exe = fluid.Executor(place)
+            data = fluid.data(name='X', shape=[None, 1], dtype='float32')
+            class_dim = 2
+            prediction = fluid.layers.fc(input=data, size=class_dim)
+            loss = fluid.layers.mean(prediction)
+            adam = fluid.optimizer.Adam()
+            adam.minimize(loss)
+            # 运行且仅运行一次startup program
+            exe.run(fluid.default_startup_program())
+            build_strategy = fluid.BuildStrategy()
+            binary = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(
+                loss_name=loss.name, build_strategy=build_strategy)
+            batch_size = 6
+            x = np.random.random(size=(batch_size, 1)).astype('float32')
+            # 1) 设置 return_merged 参数为False以获取不合并的计算结果：
+            unmerged_prediction, = exe.run(binary, feed={'X': x},
+                fetch_list=[prediction.name],
+                return_merged=False)
+            # 如果用户使用两个GPU卡来运行此python代码示例，输出结果将为(2, 3, class_dim)。
+            # 输出结果中第一个维度值代表所使用的GPU卡数，而第二个维度值代表batch_size和所使用
+            # 的GPU卡数之商。
+            print("The unmerged prediction shape: {}".format(np.array(unmerged_prediction).shape))
+            print(unmerged_prediction)
+            # 2) 设置 return_merged 参数为True以获取合并的计算结果：
+            merged_prediction, = exe.run(binary, feed={'X': x},
+                fetch_list=[prediction.name],
+                return_merged=True)
+            # 如果用户使用两个GPU卡来运行此python代码示例，输出结果将为(6, class_dim)。输出结果
+            # 中第一个维度值代表batch_size值。
+            print("The merged prediction shape: {}".format(np.array(merged_prediction).shape))
+            print(merged_prediction)
+            # 输出:
+            # The unmerged prediction shape: (2, 3, 2)
+            # [array([[-0.37620035, -0.19752218],
+            #        [-0.3561043 , -0.18697084],
+            #        [-0.24129935, -0.12669306]], dtype=float32), array([[-0.24489994, -0.12858354],
+            #        [-0.49041364, -0.25748932],
+            #        [-0.44331917, -0.23276259]], dtype=float32)]
+            # The merged prediction shape: (6, 2)
+            # [[-0.37789783 -0.19921964]
+            #  [-0.3577645  -0.18863106]
+            #  [-0.24274671 -0.12814042]
+            #  [-0.24635398 -0.13003758]
+            #  [-0.49232286 -0.25939852]
+            #  [-0.44514108 -0.2345845 ]]
 
 .. py:method:: train_from_dataset(program=None, dataset=None, scope=None, thread=0, debug=False, fetch_list=None, fetch_info=None, print_period=100)
 
diff --git a/doc/fluid/api_cn/fluid_cn/Executor_cn.rst b/doc/fluid/api_cn/fluid_cn/Executor_cn.rst
index cd932f329..cecd05a16 100644
--- a/doc/fluid/api_cn/fluid_cn/Executor_cn.rst
+++ b/doc/fluid/api_cn/fluid_cn/Executor_cn.rst
@@ -106,7 +106,8 @@ Executor支持单GPU、多GPU以及CPU运行。
   - **scope** (Scope) – 该参数表示执行当前program所使用的作用域，用户可以为不同的program指定不同的作用域。默认值：fluid.global_scope()。
   - **return_numpy** (bool) – 该参数表示是否将返回的计算结果（fetch list中指定的变量）转化为numpy；如果为False，则每个变量返回的类型为LoDTensor，否则返回变量的类型为numpy.ndarray。默认为：True。
   - **use_program_cache** (bool) – 该参数表示是否对输入的Program进行缓存。如果该参数为True，在以下情况时，模型运行速度可能会更快：输入的program为 ``fluid.Program`` ，并且模型运行过程中，调用该接口的参数（program、 feed变量名和fetch_list变量）名始终不变。默认为：False。
-  - **return_merged** (bool) – 该参数表示是否按照执行设备维度将返回的计算结果（fetch list中指定的变量）进行合并。如果 ``return_merged`` 设为False，返回值类型是一个Tensor的二维列表（ ``return_numpy`` 设为Fasle时）或者一个numpy.ndarray的二维列表（ ``return_numpy`` 设为True时）。如果 ``return_merged`` 设为True，返回值类型是一个Tensor的一维列表（ ``return_numpy`` 设为Fasle时）或者一个numpy.ndarray的一维列表（ ``return_numpy`` 设为True时）。更多细节请参考示例代码2。如果返回的计算结果是变长的，请设置 ``return_merged`` 为False，即不按照执行设备维度合并返回的计算结果。该参数的默认值为True，但这仅是为了兼容性考虑，在未来的版本中默认值可能会更改为False。
+  - **return_merged** (bool) – 该参数表示是否按照执行设备维度将返回的计算结果（fetch list中指定的变量）进行合并。如果 ``return_merged`` 设为False，返回值类型是一个Tensor/LoDTensorArray的二维列表（ ``return_numpy`` 设为Fasle时）或者一个numpy.ndarray的二维列表（ ``return_numpy`` 设为True时）。如果 ``return_merged`` 设为True，返回值类型是一个Tensor/LoDTensorArray的一维列表（ ``return_numpy`` 设为Fasle时）或者一个numpy.ndarray的一维列表（ ``return_numpy`` 设为True时）。更多细节请参考示例代码2。如果返回的计算结果是变长的，请设置 ``return_merged`` 为False，即不按照执行设备维度合并返回的计算结果。该参数的默认值为True，但这仅是为了兼容性考虑，在未来的版本中默认值可能会更改为False。
+  - **use_prune** (bool) - 该参数表示是否对输入的Program进行剪枝。如果该参数为True，输入的Program会在run之前根据 ``feed`` 和 ``fetch_list`` 进行剪枝，剪枝的逻辑是将产生 ``feed`` 的 ``Variable`` 和 ``Operator`` 以及不产生 ``fetch_list`` 的 ``Variable`` 和 ``Operator`` 进行裁剪。默认为：False，表示不进行剪枝。请注意，如果将 ``Optimizer.minimize()`` 方法返回的 ``tuple`` 传入 ``fetch_list`` 中，则 ``use_prune`` 会被重写为True，并且会开启剪枝。
 
 返回：返回fetch_list中指定的变量值
 
@@ -133,13 +134,17 @@ Executor支持单GPU、多GPU以及CPU运行。
             loss = fluid.layers.mean(hidden)
             adam = fluid.optimizer.Adam()
             adam.minimize(loss)
+            i = fluid.layers.zeros(shape=[1], dtype='int64')
+            array = fluid.layers.array_write(x=loss, i=i)
      
             #仅运行startup程序一次
             exe.run(fluid.default_startup_program())
 
             x = numpy.random.random(size=(10, 1)).astype('float32')
-            outs = exe.run(feed={'X': x},
-                           fetch_list=[loss.name])
+            loss_val, array_val = exe.run(feed={'X': x},
+                                          fetch_list=[loss.name, array.name])
+            print(array_val)
+            # [array([0.02153828], dtype=float32)]
 
 
 **示例代码2**
-- 
GitLab