From afe0933fc0e3cf04dc5be5653e3977303477ff76 Mon Sep 17 00:00:00 2001 From: Yuan Shuai Date: Sat, 12 Oct 2019 12:19:44 +0800 Subject: [PATCH] Fix ErrorClipByValue, GradientClipByGlobalNorm, GradientClipByValue (#20279) (#20528) * Fix ErrorClipByValue, GradientClipByGlobalNorm, GradientClipByValue * add API.spec --- paddle/fluid/API.spec | 6 ++--- python/paddle/fluid/clip.py | 52 ++++++++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 4ffec07576a..298f399a267 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -1106,13 +1106,13 @@ paddle.fluid.DataFeeder.decorate_reader (ArgSpec(args=['self', 'reader', 'multi_ paddle.fluid.DataFeeder.feed (ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None), ('document', 'ce65fe1d81dcd7067d5092a5667f35cc')) paddle.fluid.DataFeeder.feed_parallel (ArgSpec(args=['self', 'iterable', 'num_places'], varargs=None, keywords=None, defaults=(None,)), ('document', '334c6af750941a4397a2dd2ea8a4d76f')) paddle.fluid.clip.set_gradient_clip (ArgSpec(args=['clip', 'param_list', 'program'], varargs=None, keywords=None, defaults=(None, None)), ('document', '7a0f76a77dd88a74f24485a103a22fc1')) -paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', 'e6f815a03be88dee2537707d9e6b9209')) +paddle.fluid.clip.ErrorClipByValue ('paddle.fluid.clip.ErrorClipByValue', ('document', '629b07558971a8ab5e954d9a77457656')) paddle.fluid.clip.ErrorClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.clip.GradientClipByValue ('paddle.fluid.clip.GradientClipByValue', ('document', 'b7a22f687269cae0c338ef3866322db7')) +paddle.fluid.clip.GradientClipByValue ('paddle.fluid.clip.GradientClipByValue', ('document', 'b6eb70fb2a39db5c00534f20d62f5741')) paddle.fluid.clip.GradientClipByValue.__init__ (ArgSpec(args=['self', 'max', 'min'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.clip.GradientClipByNorm ('paddle.fluid.clip.GradientClipByNorm', ('document', '93d62f284d2cdb87f2723fcc63d818f9')) paddle.fluid.clip.GradientClipByNorm.__init__ (ArgSpec(args=['self', 'clip_norm'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) -paddle.fluid.clip.GradientClipByGlobalNorm ('paddle.fluid.clip.GradientClipByGlobalNorm', ('document', 'ef50acbe212101121d4b82f693ec1733')) +paddle.fluid.clip.GradientClipByGlobalNorm ('paddle.fluid.clip.GradientClipByGlobalNorm', ('document', '025b2f323f59c882e2245c2fb39c66bb')) paddle.fluid.clip.GradientClipByGlobalNorm.__init__ (ArgSpec(args=['self', 'clip_norm', 'group_name'], varargs=None, keywords=None, defaults=('default_group',)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.dygraph_grad_clip.GradClipByValue ('paddle.fluid.dygraph_grad_clip.GradClipByValue', ('document', '6971a42222de0387a7ee9c59671dd2e3')) paddle.fluid.dygraph_grad_clip.GradClipByValue.__init__ (ArgSpec(args=['self', 'min_value', 'max_value'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index d2925f2653e..d280ec50354 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -43,7 +43,8 @@ class ErrorClipByValue(BaseErrorClipAttr): """ Clips tensor values to the range [min, max]. - Given a tensor t, this operation clips its value to min and max inplace. + Given a tensor ``t`` (see Examples below), this operation clips its value \ + to ``min`` and ``max`` inplace. - Any values less than min are set to min. - Any values greater than max are set to max. @@ -51,7 +52,7 @@ class ErrorClipByValue(BaseErrorClipAttr): Args: max (float): The maximum value to clip by. min (float, optional): The minimum value to clip by. if not set by user, \ - will be set to -max by framework. + will be set to ``-max`` by framework. Examples: .. code-block:: python @@ -138,10 +139,10 @@ class GradientClipByValue(BaseGradientClipAttr): """ Clips gradient values to the range [min, max]. - Given a tensor t, this operation clips its value to min and max inplace. + Given a tensor ``t``, this operation clips its value to ``min`` and ``max`` inplace. - - Any values less than min are set to min. - - Any values greater than max are set to max. + - Any values less than min are set to ``min``. + - Any values greater than max are set to ``max``. Args: max (float): The maximum value to clip by. @@ -276,9 +277,12 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): """ Clips values of multiple tensors by the ratio of the sum of their norms. - Given a list of tensors t_list, and a clipping ratio clip_norm, this - operation returns a list of clipped tensors list_clipped and the global - norm (global_norm) of all tensors in t_list. + Given a list of tensors ``t_list`` , and a clipping ratio ``clip_norm``, + this operation returns a instance of this class as first parameter of + ``set_gradient_clip`` method, second parameter of ``set_gradient_clip`` + is used to compute clipped tensors list ``list_clipped`` (default value + is ``None``, compute global norm ``global_norm`` based in all tensors). + global norm (global_norm) of all tensors in t_list. To perform the clipping, the values :math:`t\_list[i]` are set to: @@ -303,6 +307,10 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): .. code-block:: python import paddle.fluid as fluid + import paddle.fluid.core as core + import paddle + + place = core.CPUPlace() prog = fluid.framework.Program() startup_program = fluid.framework.Program() with fluid.program_guard( @@ -314,15 +322,41 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): predict = fluid.layers.fc(input=hidden2, size=10, act='softmax') cost = fluid.layers.cross_entropy(input=predict, label=label) avg_cost = fluid.layers.mean(cost) + prog_clip = prog.clone() avg_cost_clip = prog_clip.block(0).var(avg_cost.name) + + p_g = fluid.backward.append_backward(loss=avg_cost) p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip) - with fluid.program_guard(main_program=prog_clip): + with fluid.program_guard(main_program=prog_clip, startup_program=startup_program): fluid.clip.set_gradient_clip( fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0)) p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip) + grad_list = [elem[1] for elem in p_g] + grad_clip_list = [elem[1] for elem in p_g_clip] + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=128) + + exe = fluid.Executor(place) + feeder = fluid.DataFeeder(feed_list=[image, label], place=place) + exe.run(startup_program) + + count = 0 + for data in train_reader(): + count += 1 + print("count:%s" % count) + if count > 5: + break + out = exe.run(prog, feed=feeder.feed(data), fetch_list=grad_list) + out_clip = exe.run(prog_clip, + feed=feeder.feed(data), + fetch_list=grad_clip_list) + """ def __init__(self, clip_norm, group_name="default_group"): -- GitLab