fix build strategy doc (#18725)

test=develop

fix build strategy doc (#18725)
test=develop
292dfbce · chengduo · GitHub · c167a4b4 · 292dfbce
显示空白变更内容
内联并排

Showing with 62 addition and 17 deletion

paddle/fluid/pybind/pybind.cc paddle/fluid/pybind/pybind.cc +62 -17

未找到文件。
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -1283,12 +1283,13 @@ All parameter, weight, gradient are variables in Paddle.
            PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized.");
            self.reduce_ = strategy;
          },
-          R"DOC(The type is STR, there are two reduce strategies in ParallelExecutor,
+          R"DOC(The type is fluid.BuildStrategy.ReduceStrategy, there are two reduce
-                'AllReduce' and 'Reduce'. If you want that all the parameters'
+                strategies in ParallelExecutor, AllReduce and Reduce. If you want
-                optimization are done on all devices independently, you should choose 'AllReduce';
+                that all the parameters' optimization are done on all devices independently,
-                if you choose 'Reduce', all the parameters' optimization will be evenly distributed
+                you should choose AllReduce; if you choose Reduce, all the parameters'
-                to different devices, and then broadcast the optimized parameter to other devices.
+                optimization will be evenly distributed to different devices, and then
-                In some models, `Reduce` is faster. Default 'AllReduce'.
+                broadcast the optimized parameter to other devices.
+                Default 'AllReduce'.
                Examples:
                    .. code-block:: python
@@ -1302,21 +1303,62 @@ All parameter, weight, gradient are variables in Paddle.
          [](const BuildStrategy &self) { return self.gradient_scale_; },
          [](BuildStrategy &self,
             BuildStrategy::GradientScaleStrategy strategy) {
-            PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized.");
+            PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finalized.");
            self.gradient_scale_ = strategy;
          },
-          R"DOC(The type is STR, there are three ways of defining :math:`loss@grad` in
+          R"DOC(The type is fluid.BuildStrategy.GradientScaleStrategy, there are three
-                ParallelExecutor, 'CoeffNumDevice', 'One' and 'Customized'. By default,
+                ways of defining :math:`loss@grad` in ParallelExecutor, CoeffNumDevice,
-                ParallelExecutor sets the :math:`loss@grad` according to the number of devices.
+                One and Customized. By default, ParallelExecutor sets the :math:`loss@grad`
-                If you want to customize :math:`loss@grad`, you can choose 'Customized'.
+                according to the number of devices. If you want to customize :math:`loss@grad`,
-                Default 'CoeffNumDevice'.
+                you can choose Customized. Default 'CoeffNumDevice'.
                Examples:
                    .. code-block:: python
                        import paddle.fluid as fluid
+                        import paddle.fluid.compiler as compiler
+                        import numpy
+                        import os
+                        use_cuda = True
+                        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+                        exe = fluid.Executor(place)
+                        # NOTE: If you use CPU to run the program, you need
+                        # to specify the CPU_NUM, otherwise, fluid will use
+                        # all the number of the logic core as the CPU_NUM,
+                        # in that case, the batch size of the input should be
+                        # greater than CPU_NUM, if not, the process will be
+                        # failed by an exception.
+                        if not use_cuda:
+                            os.environ['CPU_NUM'] = str(2)
+                            places = fluid.cpu_places()
+                        else:
+                            places = places = fluid.cuda_places()
+                        data = fluid.layers.data(name='X', shape=[1], dtype='float32')
+                        hidden = fluid.layers.fc(input=data, size=10)
+                        loss = fluid.layers.mean(hidden)
+                        fluid.optimizer.SGD(learning_rate=0.01).minimize(loss)
+                        fluid.default_startup_program().random_seed=1
+                        exe.run(fluid.default_startup_program())
                        build_strategy = fluid.BuildStrategy()
-                        build_strategy.gradient_scale_strategy = True
+                        build_strategy.gradient_scale_strategy = \
+                                 fluid.BuildStrategy.GradientScaleStrategy.Customized
+                        compiled_prog = compiler.CompiledProgram(
+                                 fluid.default_main_program()).with_data_parallel(
+                                          loss_name=loss.name, build_strategy=build_strategy,
+                                          places = places)
+                        dev_count =  len(places)
+                        x = numpy.random.random(size=(10, 1)).astype('float32')
+                        loss_grad = numpy.ones((dev_count)).astype("float32") * 0.01
+                        loss_grad_name = loss.name+"@GRAD"
+                        loss_data = exe.run(compiled_prog,
+                                             feed={"X": x, loss_grad_name : loss_grad},
+                                             fetch_list=[loss.name, loss_grad_name])
                   )DOC")
      .def_property(
          "debug_graphviz_path",
@@ -1325,7 +1367,7 @@ All parameter, weight, gradient are variables in Paddle.
            PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized.");
            self.debug_graphviz_path_ = path;
          },
-          R"DOC(The type is STR, debug_graphviz_path indicate the path that
+          R"DOC(The type is STR, debug_graphviz_path indicates the path that
                writing the SSA Graph to file in the form of graphviz.
                It is useful for debugging. Default ""
@@ -1334,7 +1376,8 @@ All parameter, weight, gradient are variables in Paddle.
                        import paddle.fluid as fluid
                        build_strategy = fluid.BuildStrategy()
-                        build_strategy.debug_graphviz_path = ""
+                        build_strategy.debug_graphviz_path = "./graph"
                    )DOC")
      .def_property(
          "enable_sequential_execution",
@@ -1345,7 +1388,8 @@ All parameter, weight, gradient are variables in Paddle.
            PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized.");
            self.enable_sequential_execution_ = b;
          },
-          R"DOC(The type is BOOL. If set True, the execution order of ops would be the same as what is in the program. Default False.
+          R"DOC(The type is BOOL. If set True, the execution order of ops would
+                be the same as what is in the program. Default False.
                Examples:
                    .. code-block:: python
@@ -1363,7 +1407,8 @@ All parameter, weight, gradient are variables in Paddle.
            PADDLE_ENFORCE(!self.IsFinalized(), "BuildStrategy is finlaized.");
            self.remove_unnecessary_lock_ = b;
          },
-          R"DOC(The type is BOOL. If set True, some locks in GPU ops would be released and ParallelExecutor would run faster. Default True.
+          R"DOC(The type is BOOL. If set True, some locks in GPU ops would be
+                released and ParallelExecutor would run faster. Default True.
                Examples:
                    .. code-block:: python