From 7912e6caa11fa2004eafac3df11421f0fdbd87c8 Mon Sep 17 00:00:00 2001 From: Zeng Jinle <32832641+sneaxiy@users.noreply.github.com> Date: Wed, 25 Sep 2019 18:09:19 +0800 Subject: [PATCH] Expose set_gradient_clip API (#19869) * expose set_gradient_clip, test=develop, test=document_preview, test=preview * expose gradient clip, test=develop, test=document_fix * refine doc, test=develop * follow lanxiang's comments, test=develop, test=document_fix --- paddle/fluid/API.spec | 1 + python/paddle/fluid/clip.py | 56 ++++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 1fd4880c16..32025d93fa 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -1061,6 +1061,7 @@ paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'p paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)), ('document', 'a0ed5ce816b5d603cb595aacb922335a')) paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', 'ce65fe1d81dcd7067d5092a5667f35cc')) paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '334c6af750941a4397a2dd2ea8a4d76f')) +paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '77ca02bb37b70d226510df9cf5e45965')) paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', 'e6f815a03be88dee2537707d9e6b9209')) paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.clip.GradientClipByValue ('paddle.fluid.clip.GradientClipByValue', ('document', 'b7a22f687269cae0c338ef3866322db7')) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 09d7975a6b..95d547f2f4 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -24,6 +24,7 @@ from . import core from .dygraph.base import _not_support __all__ = [ + 'set_gradient_clip', 'ErrorClipByValue', 'GradientClipByValue', 'GradientClipByNorm', @@ -343,12 +344,59 @@ def set_gradient_clip(clip, param_list=None, program=None): Args: clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr, + for example :ref:`api_fluid_clip_GradientClipByGlobalNorm` , which describes the type and detailed attributes of required gradient clip. - param_list(list(Variable)): Parameters that require gradient clip. + param_list(list(Variable), optional): Parameters that require gradient clip. It can be a list of parameter or a list of parameter's name. - When it's None, all parameters in the program will be included. - program(Program): The program where parameters are. - Will be the default main program when assigned with None. + Default None, meaning that all parameters in the program will be included. + program(Program, optional): The program where parameters are located. + Default None, meaning that using :ref:`api_fluid_default_main_program` . + + Returns: + None + + Examples: + .. code-block:: python + + import paddle.fluid as fluid + + def network(): + image = fluid.layers.data(name='image', shape=[28], dtype='float32') + param_attr1 = fluid.ParamAttr("fc1_param") + fc1 = fluid.layers.fc(image, size=10, param_attr=param_attr1) + param_attr2 = fluid.ParamAttr("fc2_param") + fc2 = fluid.layers.fc(fc1, size=10, param_attr=param_attr2) + loss = fluid.layers.reduce_mean(fc2) + return loss + + + # network 1: clip all parameter gradient + with fluid.program_guard(fluid.Program(), fluid.Program()): + loss = network() + fluid.clip.set_gradient_clip( + fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0)) + sgd = fluid.optimizer.SGD(learning_rate=1e-3) + sgd.minimize(loss) + + # network 2: clip parameter gradient by name + with fluid.program_guard(fluid.Program(), fluid.Program()): + loss = network() + fluid.clip.set_gradient_clip( + fluid.clip.GradientClipByValue(min=-1.0, max=1.0), + param_list=["fc1_param", "fc2_param"]) + sgd = fluid.optimizer.SGD(learning_rate=1e-3) + sgd.minimize(loss) + + # network 3: clip parameter gradient by var + with fluid.program_guard(fluid.Program(), fluid.Program()): + loss = network() + param_var1 = fluid.default_main_program().global_block().var("fc1_param") + param_var2 = fluid.default_main_program().global_block().var("fc2_param") + fluid.clip.set_gradient_clip( + fluid.clip.GradientClipByValue(min=-1.0, max=1.0), + param_list=[param_var1, param_var2]) + sgd = fluid.optimizer.SGD(learning_rate=1e-3) + sgd.minimize(loss) """ if not isinstance(clip, BaseGradientClipAttr): raise TypeError( -- GitLab