add doc for ErrorClipByValue GradientClipByValue and GradientClipByGlobalNorm

03f4beb8 · qiaolongfei · e3578ab1 · 03f4beb8
显示空白变更内容
内联并排

Showing with 85 addition and 11 deletion

python/paddle/fluid/clip.py python/paddle/fluid/clip.py +85 -11

未找到文件。
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -24,8 +24,6 @@ __all__ = [
    'GradientClipByValue',
    'GradientClipByNorm',
    'GradientClipByGlobalNorm',
-    'append_gradient_clip_ops',
-    'error_clip_callback',
 ]
@@ -38,6 +36,25 @@ class BaseErrorClipAttr(object):
 class ErrorClipByValue(BaseErrorClipAttr):
+    """
+    Clips tensor values to the range [min, max].
+    Given a tensor t, this operation clips its value to min and max inplace.
+    - Any values less than min are set to min.
+    - Any values greater than max are set to max.
+    Args:
+        max (float): The maximum value to clip by.
+        min (float, optional): The minimum value to clip by. if not set by user, \
+        will be set to -max by framework.
+    Examples:
+        .. code-block:: python
+            var = fluid.framework.Variable(..., error_clip=ErrorClipByValue(max=5.0), ...)
+    """
    def __init__(self, max, min=None):
        max = float(max)
        if min is None:
@@ -99,6 +116,31 @@ class NullGradientClipAttr(BaseGradientClipAttr):
 class GradientClipByValue(BaseGradientClipAttr):
+    """
+    Clips gradient values to the range [min, max].
+    Given a tensor t, this operation clips its value to min and max inplace.
+    - Any values less than min are set to min.
+    - Any values greater than max are set to max.
+    Args:
+        max (float): The maximum value to clip by.
+        min (float, optional): The minimum value to clip by. if not set by user, \
+        will be set to -max by framework.
+    Examples:
+        .. code-block:: python
+            w_param_attrs = ParamAttr(name=None,
+              initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
+              learning_rate=1.0,
+              regularizer=L1Decay(1.0),
+              trainable=True,
+              clip=GradientClipByValue(-1.0, 1.0))
+            y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
+    """
    def __init__(self, max, min=None):
        max = float(max)
        if min is None:
@@ -120,6 +162,37 @@ class GradientClipByValue(BaseGradientClipAttr):
 class GradientClipByNorm(BaseGradientClipAttr):
+    """
+    Clips tensor values to a maximum L2-norm.
+    This operator limits the L2 norm of the input :math:`X` within :math:`max\_norm`.
+    If the L2 norm of :math:`X` is less than or equal to :math:`max\_norm`, :math:`Out`
+    will be the same as :math:`X`. If the L2 norm of :math:`X` is greater than
+    :math:`max\_norm`, :math:`X` will be linearly scaled to make the L2 norm of
+    :math:`Out` equal to :math:`max\_norm`, as shown in the following formula:
+    .. math::
+        Out = \\frac{max\_norm * X}{norm(X)},
+    where :math:`norm(X)` represents the L2 norm of :math:`X`.
+    Args:
+        clip_norm (float): The maximum norm value
+    Examples:
+        .. code-block:: python
+            w_param_attrs = ParamAttr(name=None,
+              initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
+              learning_rate=1.0,
+              regularizer=L1Decay(1.0),
+              trainable=True,
+              clip=GradientClipByNorm(clip_norm=2.0))
+            y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
+    """
    def __init__(self, clip_norm):
        self.clip_norm = clip_norm
@@ -184,13 +257,14 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
 def set_gradient_clip(clip, param_list=None, program=None):
    """
    To specify parameters that require gradient clip.
    Args:
        clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
                which describes the type and detailed attributes of required gradient clip.
-            param_list(list, None by default): Parameters that require gradient clip. 
+        param_list(list(Variable)): Parameters that require gradient clip.
                It can be a list of parameter or a list of parameter's name.
                When it's None, all parameters in the program will be included.
-            program(Program, None by default): The program where parameters are. 
+        program(Program): The program where parameters are.
                Will be the default main program when assigned with None.
    """
    if not isinstance(clip, BaseGradientClipAttr):