Polish api BuildStrategy/ExecutionStrategy doc & code example (#27662)

* polish BuildStrategy api doc & example * polish ExecutionStrategy api doc & example * polish details

Polish api BuildStrategy/ExecutionStrategy doc & code example (#27662)
* polish BuildStrategy api doc & example * polish ExecutionStrategy api doc & example * polish details
b14ecb86 · Chen Weihang · GitHub · cc2fc938 · b14ecb86 · b14ecb86
隐藏空白更改
内联并排

Showing with 180 addition and 78 deletion

paddle/fluid/pybind/pybind.cc paddle/fluid/pybind/pybind.cc +177 -77

python/paddle/static/__init__.py python/paddle/static/__init__.py +3 -1

未找到文件。
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -1980,27 +1980,34 @@ All parameter, weight, gradient are variables in Paddle.
    ExecutionStrategy allows the user to more preciously control how to run
    the program in ParallelExecutor by setting the property.
+    Returns:
+        ExecutionStrategy: An ExecutionStrategy object.
    Examples:
        .. code-block:: python
-          import paddle.fluid as fluid
+          import paddle
-          x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+          import paddle.static as static
-          y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+          import paddle.nn.functional as F
-          y_predict = fluid.layers.fc(input=x, size=1, act=None)
+          paddle.enable_static()
+          x = static.data(name='x', shape=[None, 13], dtype='float32')
+          y = static.data(name='y', shape=[None, 1], dtype='float32')
+          y_predict = static.nn.fc(input=x, size=1, act=None)
-          cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+          cost = F.square_error_cost(input=y_predict, label=y)
-          avg_loss = fluid.layers.mean(cost)
+          avg_loss = paddle.mean(cost)
-          sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+          sgd_optimizer = paddle.optimizer.SGD(learning_rate=0.001)
          sgd_optimizer.minimize(avg_loss)
-          exec_strategy = fluid.ExecutionStrategy()
+          exec_strategy = static.ExecutionStrategy()
          exec_strategy.num_threads = 4
-          train_exe = fluid.ParallelExecutor(use_cuda=False,
+          train_exe = static.ParallelExecutor(use_cuda=False,
-                                             loss_name=avg_loss.name,
+                                              loss_name=avg_loss.name,
-                                             exec_strategy=exec_strategy)
+                                              exec_strategy=exec_strategy)
        )DOC");
  exec_strategy.def(py::init())
@@ -2010,7 +2017,8 @@ All parameter, weight, gradient are variables in Paddle.
          [](ExecutionStrategy &self, size_t num_threads) {
            self.num_threads_ = num_threads;
          },
-          R"DOC(The type is INT, num_threads represents the size of thread pool that
+          R"DOC(
+            The type is INT, num_threads represents the size of thread pool that
            used to run the operators of the current program in ParallelExecutor.
            If :math:`num\_threads=1`, all the operators will execute one by one,
            but the order maybe difference between iterations.
@@ -2018,7 +2026,19 @@ All parameter, weight, gradient are variables in Paddle.
            device type and device count, for GPU, :math:`num\_threads=device\_count*4`, for CPU,
            :math:`num\_threads=CPU\_NUM*4`, the explanation of:math:`CPU\_NUM` is in ParallelExecutor.
            if it is not set, ParallelExecutor will get the cpu count by calling
-            `multiprocessing.cpu_count()`. Default 0.)DOC")
+            `multiprocessing.cpu_count()`. Default 0.
+            Examples:
+                .. code-block:: python
+                    import paddle
+                    import paddle.static as static
+                    paddle.enable_static()
+                    exec_strategy = static.ExecutionStrategy()
+                    exec_strategy.num_threads = 4
+            )DOC")
      .def_property(
          "use_cuda",
          [](const ExecutionStrategy &self) { return self.use_cuda_; },
@@ -2050,13 +2070,24 @@ All parameter, weight, gradient are variables in Paddle.
                many iterations to clean up the temp variables which
                is generated during execution. It may make the execution faster,
                because the temp variable's shape maybe the same between two iterations.
-                Default 1.
+                Default 100.
+                .. note::
+                    1. If you fetch data when calling the 'run', the ParallelExecutor 
+                    will clean up the temp variables at the end of the current iteration. 
+                    2. In some NLP model, it may cause the GPU memory is insufficient, 
+                    in this case, you should reduce `num_iteration_per_drop_scope`.
+                Examples:
+                    .. code-block:: python
-                NOTES:
+                        import paddle
-                    1. If you fetch data when calling the 'run', the ParallelExecutor
+                        import paddle.static as static
-                       will clean up the temp variables at the end of the current iteration.
-                    2. In some NLP model, it may cause the GPU memory is insufficient,
+                        paddle.enable_static()
-                       in this case, you should reduce `num_iteration_per_drop_scope`.
+                        exec_strategy = static.ExecutionStrategy()
+                        exec_strategy.num_iteration_per_drop_scope = 10
              )DOC")
      .def_property(
          "num_iteration_per_run",
@@ -2067,7 +2098,18 @@ All parameter, weight, gradient are variables in Paddle.
            self.num_iteration_per_run_ = num_iteration_per_run;
          },
          R"DOC(This config that how many iteration the executor will run when
-                user call exe.run() in python
+                user call exe.run() in python。Default: 1.
+                Examples:
+                    .. code-block:: python
+                        import paddle
+                        import paddle.static as static
+                        paddle.enable_static()
+                        exec_strategy = static.ExecutionStrategy()
+                        exec_strategy.num_iteration_per_run = 10
              )DOC")
      .def_property(
          "use_thread_barrier",
@@ -2097,29 +2139,34 @@ All parameter, weight, gradient are variables in Paddle.
    BuildStrategy allows the user to more preciously control how to
    build the SSA Graph in ParallelExecutor by setting the property.
+    Returns:
+        BuildStrategy: An BuildStrategy object.
    Examples:
        .. code-block:: python
            import os
-            import numpy as np
+            import paddle
-            import paddle.fluid as fluid
+            import paddle.static as static
+            paddle.enable_static()
-            os.environ["CPU_NUM"] = '2'
+            os.environ['CPU_NUM'] = str(2)
-            places = fluid.cpu_places()
+            places = static.cpu_places()
-            data = fluid.layers.data(name="x", shape=[1], dtype="float32")
+            data = static.data(name="x", shape=[None, 1], dtype="float32")
-            hidden = fluid.layers.fc(input=data, size=10)
+            hidden = static.nn.fc(input=data, size=10)
-            loss = fluid.layers.mean(hidden)
+            loss = paddle.mean(hidden)
-            fluid.optimizer.SGD(learning_rate=0.01).minimize(loss)
+            paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)
-            build_strategy = fluid.BuildStrategy()
+            build_strategy = static.BuildStrategy()
            build_strategy.enable_inplace = True
            build_strategy.memory_optimize = True
-            build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
+            build_strategy.reduce_strategy = static.BuildStrategy.ReduceStrategy.Reduce
-            program = fluid.compiler.CompiledProgram(fluid.default_main_program())
+            program = static.CompiledProgram(static.default_main_program())
            program = program.with_data_parallel(loss_name=loss.name,
-                                                 build_strategy=build_strategy,
+                                                  build_strategy=build_strategy,
-                                                 places=places)
+                                                  places=places)
 )DOC");
  py::enum_<BuildStrategy::ReduceStrategy>(build_strategy, "ReduceStrategy")
@@ -2154,9 +2201,13 @@ All parameter, weight, gradient are variables in Paddle.
                Examples:
                    .. code-block:: python
-                        import paddle.fluid as fluid
+                        import paddle
-                        build_strategy = fluid.BuildStrategy()
+                        import paddle.static as static
-                        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
+                        paddle.enable_static()
+                        build_strategy = static.BuildStrategy()
+                        build_strategy.reduce_strategy = static.BuildStrategy.ReduceStrategy.Reduce
                  )DOC")
      .def_property(
          "gradient_scale_strategy",
@@ -2178,50 +2229,51 @@ All parameter, weight, gradient are variables in Paddle.
                Examples:
                    .. code-block:: python
-                        import paddle.fluid as fluid
-                        import paddle.fluid.compiler as compiler
                        import numpy
                        import os
+                        import paddle
+                        import paddle.static as static
+                        paddle.enable_static()
                        use_cuda = True
-                        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+                        place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
-                        exe = fluid.Executor(place)
+                        exe = static.Executor(place)
                        # NOTE: If you use CPU to run the program, you need
-                        # to specify the CPU_NUM, otherwise, fluid will use
+                        # to specify the CPU_NUM, otherwise, paddle will use
                        # all the number of the logic core as the CPU_NUM,
                        # in that case, the batch size of the input should be
                        # greater than CPU_NUM, if not, the process will be
                        # failed by an exception.
                        if not use_cuda:
                            os.environ['CPU_NUM'] = str(2)
-                            places = fluid.cpu_places()
+                            places = static.cpu_places()
                        else:
-                            places = places = fluid.cuda_places()
+                            places = static.cuda_places()
-                        data = fluid.layers.data(name='X', shape=[1], dtype='float32')
+                        data = static.data(name='X', shape=[None, 1], dtype='float32')
-                        hidden = fluid.layers.fc(input=data, size=10)
+                        hidden = static.nn.fc(input=data, size=10)
-                        loss = fluid.layers.mean(hidden)
+                        loss = paddle.mean(hidden)
-                        fluid.optimizer.SGD(learning_rate=0.01).minimize(loss)
+                        paddle.optimizer.SGD(learning_rate=0.01).minimize(loss)
-                        fluid.default_startup_program().random_seed=1
+                        exe.run(static.default_startup_program())
-                        exe.run(fluid.default_startup_program())
-                        build_strategy = fluid.BuildStrategy()
+                        build_strategy = static.BuildStrategy()
                        build_strategy.gradient_scale_strategy = \
-                                 fluid.BuildStrategy.GradientScaleStrategy.Customized
+                                  static.BuildStrategy.GradientScaleStrategy.Customized
-                        compiled_prog = compiler.CompiledProgram(
+                        compiled_prog = static.CompiledProgram(
-                                 fluid.default_main_program()).with_data_parallel(
+                                  static.default_main_program()).with_data_parallel(
                                          loss_name=loss.name, build_strategy=build_strategy,
-                                          places = places)
+                                          places=places)
                        dev_count =  len(places)
                        x = numpy.random.random(size=(10, 1)).astype('float32')
                        loss_grad = numpy.ones((dev_count)).astype("float32") * 0.01
                        loss_grad_name = loss.name+"@GRAD"
                        loss_data = exe.run(compiled_prog,
-                                             feed={"X": x, loss_grad_name : loss_grad},
+                                              feed={"X": x, loss_grad_name : loss_grad},
-                                             fetch_list=[loss.name, loss_grad_name])
+                                              fetch_list=[loss.name, loss_grad_name])
                   )DOC")
      .def_property(
          "debug_graphviz_path",
@@ -2240,10 +2292,13 @@ All parameter, weight, gradient are variables in Paddle.
                Examples:
                    .. code-block:: python
-                        import paddle.fluid as fluid
+                        import paddle
-                        build_strategy = fluid.BuildStrategy()
+                        import paddle.static as static
-                        build_strategy.debug_graphviz_path = "./graph"
+                        paddle.enable_static()
+                        build_strategy = static.BuildStrategy()
+                        build_strategy.debug_graphviz_path = "./graph"
                    )DOC")
      .def_property(
          "enable_sequential_execution",
@@ -2263,8 +2318,12 @@ All parameter, weight, gradient are variables in Paddle.
                Examples:
                    .. code-block:: python
-                        import paddle.fluid as fluid
+                        import paddle
-                        build_strategy = fluid.BuildStrategy()
+                        import paddle.static as static
+                        paddle.enable_static()
+                        build_strategy = static.BuildStrategy()
                        build_strategy.enable_sequential_execution = True
          )DOC")
      .def_property(
@@ -2285,8 +2344,12 @@ All parameter, weight, gradient are variables in Paddle.
                Examples:
                    .. code-block:: python
-                        import paddle.fluid as fluid
+                        import paddle
-                        build_strategy = fluid.BuildStrategy()
+                        import paddle.static as static
+                        paddle.enable_static()
+                        build_strategy = static.BuildStrategy()
                        build_strategy.remove_unnecessary_lock = True
          )DOC")
      .def_property(
@@ -2351,8 +2414,12 @@ All parameter, weight, gradient are variables in Paddle.
                Examples:
                    .. code-block:: python
-                        import paddle.fluid as fluid
+                        import paddle
-                        build_strategy = fluid.BuildStrategy()
+                        import paddle.static as static
+                        paddle.enable_static()
+                        build_strategy = static.BuildStrategy()
                        build_strategy.fuse_elewise_add_act_ops = True
                     )DOC")
      .def_property(
@@ -2372,8 +2439,12 @@ All parameter, weight, gradient are variables in Paddle.
                Examples:
                    .. code-block:: python
-                        import paddle.fluid as fluid
+                        import paddle
-                        build_strategy = fluid.BuildStrategy()
+                        import paddle.static as static
+                        paddle.enable_static()
+                        build_strategy = static.BuildStrategy()
                        build_strategy.fuse_bn_act_ops = True
                     )DOC")
      .def_property(
@@ -2394,8 +2465,12 @@ All parameter, weight, gradient are variables in Paddle.
                Examples:
                    .. code-block:: python
-                        import paddle.fluid as fluid
+                        import paddle
-                        build_strategy = fluid.BuildStrategy()
+                        import paddle.static as static
+                        paddle.enable_static()
+                        build_strategy = static.BuildStrategy()
                        build_strategy.enable_auto_fusion = True
                    )DOC")
      .def_property(
@@ -2419,8 +2494,12 @@ All parameter, weight, gradient are variables in Paddle.
                Examples:
                    .. code-block:: python
-                        import paddle.fluid as fluid
+                        import paddle
-                        build_strategy = fluid.BuildStrategy()
+                        import paddle.static as static
+                        paddle.enable_static()
+                        build_strategy = static.BuildStrategy()
                        build_strategy.fuse_relu_depthwise_conv = True
          )DOC")
      .def_property("fuse_broadcast_ops",
@@ -2445,8 +2524,12 @@ All parameter, weight, gradient are variables in Paddle.
                      Examples:
                          .. code-block:: python
-                              import paddle.fluid as fluid
+                              import paddle
-                              build_strategy = fluid.BuildStrategy()
+                              import paddle.static as static
+                              paddle.enable_static()
+                              build_strategy = static.BuildStrategy()
                              build_strategy.fuse_broadcast_ops = True
                    )DOC")
      .def_property("fuse_all_optimizer_ops",
@@ -2481,8 +2564,12 @@ All parameter, weight, gradient are variables in Paddle.
                Examples:
                    .. code-block:: python
-                        import paddle.fluid as fluid
+                        import paddle
-                        build_strategy = fluid.BuildStrategy()
+                        import paddle.static as static
+                        paddle.enable_static()
+                        build_strategy = static.BuildStrategy()
                        build_strategy.sync_batch_norm = True
                )DOC")
      .def_property(
@@ -2512,7 +2599,20 @@ All parameter, weight, gradient are variables in Paddle.
                Default None. None means framework would choose to use or not use 
                this strategy automatically. Currently, None means that it is 
                enabled when GC is disabled, and disabled when GC is enabled. 
-                True means enabling and False means disabling. Default is None.)DOC")
+                True means enabling and False means disabling. Default is None.
+                Examples:
+                    .. code-block:: python
+                        import paddle
+                        import paddle.static as static
+                        paddle.enable_static()
+                        build_strategy = static.BuildStrategy()
+                        build_strategy.memory_optimize = True
+                )DOC")
      .def_property(
          "is_distribution",
          [](const BuildStrategy &self) { return self.is_distribution_; },

--- a/python/paddle/static/__init__.py
+++ b/python/paddle/static/__init__.py
@@ -19,7 +19,7 @@ __all__ = [
    'name_scope', 'ParallelExecutor', 'program_guard', 'WeightNormParamAttr',
    'default_main_program', 'default_startup_program', 'Program', 'data',
    'InputSpec', 'save', 'load', 'save_inference_model', 'load_inference_model',
-    'load_program_state', 'set_program_state'
+    'load_program_state', 'set_program_state', 'cpu_places', 'cuda_places'
 ]
 from . import nn
@@ -38,6 +38,8 @@ from ..fluid.framework import default_startup_program  #DEFINE_ALIAS
 from ..fluid.framework import Program  #DEFINE_ALIAS
 from ..fluid.framework import name_scope  #DEFINE_ALIAS
 from ..fluid.framework import program_guard  #DEFINE_ALIAS
+from ..fluid.framework import cpu_places  #DEFINE_ALIAS
+from ..fluid.framework import cuda_places  #DEFINE_ALIAS
 from ..fluid.layers.control_flow import Print  #DEFINE_ALIAS
 from ..fluid.layers.nn import py_func  #DEFINE_ALIAS
 from ..fluid.parallel_executor import ParallelExecutor  #DEFINE_ALIAS