From 7912e6caa11fa2004eafac3df11421f0fdbd87c8 Mon Sep 17 00:00:00 2001
From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com>
Date: Wed, 25 Sep 2019 18:09:19 +0800
Subject: [PATCH] Expose set_gradient_clip API (#19869)

* expose set_gradient_clip, test=develop, test=document_preview, test=preview

* expose gradient clip, test=develop, test=document_fix

* refine doc, test=develop

* follow lanxiang's comments, test=develop, test=document_fix
---
 paddle/fluid/API.spec       |  1 +
 python/paddle/fluid/clip.py | 56 ++++++++++++++++++++++++++++++++++---
 2 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index 1fd4880c16..32025d93fa 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -1061,6 +1061,7 @@ paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'p
 paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'a0ed5ce816b5d603cb595aacb922335a'))
 paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', 'ce65fe1d81dcd7067d5092a5667f35cc'))
 paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '334c6af750941a4397a2dd2ea8a4d76f'))
+paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '77ca02bb37b70d226510df9cf5e45965'))
 paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', 'e6f815a03be88dee2537707d9e6b9209'))
 paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.clip.GradientClipByValue ('paddle.fluid.clip.GradientClipByValue', ('document', 'b7a22f687269cae0c338ef3866322db7'))
diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py
index 09d7975a6b..95d547f2f4 100644
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -24,6 +24,7 @@ from . import core
 from .dygraph.base import _not_support
 
 __all__ = [
+    'set_gradient_clip',
     'ErrorClipByValue',
     'GradientClipByValue',
     'GradientClipByNorm',
@@ -343,12 +344,59 @@ def set_gradient_clip(clip, param_list=None, program=None):
 
     Args:
         clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
+                for example :ref:`api_fluid_clip_GradientClipByGlobalNorm` ,
                 which describes the type and detailed attributes of required gradient clip.
-        param_list(list(Variable)): Parameters that require gradient clip.
+        param_list(list(Variable), optional): Parameters that require gradient clip.
                 It can be a list of parameter or a list of parameter's name.
-                When it's None, all parameters in the program will be included.
-        program(Program): The program where parameters are.
-                Will be the default main program when assigned with None.
+                Default None, meaning that all parameters in the program will be included.
+        program(Program, optional): The program where parameters are located.
+                Default None, meaning that using :ref:`api_fluid_default_main_program` .
+
+    Returns:
+        None
+
+    Examples:
+        .. code-block:: python
+            
+            import paddle.fluid as fluid
+
+            def network():
+                image = fluid.layers.data(name='image', shape=[28], dtype='float32')
+                param_attr1 = fluid.ParamAttr("fc1_param")
+                fc1 = fluid.layers.fc(image, size=10, param_attr=param_attr1)
+                param_attr2 = fluid.ParamAttr("fc2_param")
+                fc2 = fluid.layers.fc(fc1, size=10, param_attr=param_attr2)
+                loss = fluid.layers.reduce_mean(fc2)
+                return loss
+
+
+            # network 1: clip all parameter gradient
+            with fluid.program_guard(fluid.Program(), fluid.Program()):
+                loss = network()
+                fluid.clip.set_gradient_clip(
+                    fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
+                sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+                sgd.minimize(loss)
+
+            # network 2: clip parameter gradient by name
+            with fluid.program_guard(fluid.Program(), fluid.Program()):
+                loss = network()
+                fluid.clip.set_gradient_clip(
+                    fluid.clip.GradientClipByValue(min=-1.0, max=1.0),
+                    param_list=["fc1_param", "fc2_param"])
+                sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+                sgd.minimize(loss)
+
+            # network 3: clip parameter gradient by var
+            with fluid.program_guard(fluid.Program(), fluid.Program()):
+                loss = network()
+                param_var1 = fluid.default_main_program().global_block().var("fc1_param")
+                param_var2 = fluid.default_main_program().global_block().var("fc2_param")
+                fluid.clip.set_gradient_clip(
+                    fluid.clip.GradientClipByValue(min=-1.0, max=1.0),
+                    param_list=[param_var1, param_var2])
+                sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+                sgd.minimize(loss)
     """
     if not isinstance(clip, BaseGradientClipAttr):
         raise TypeError(
-- 
GitLab