diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 243e74c9a3d6a14677e494aa610f3432c7f16832..fcefdb82f250c47b0756f0d644298da662c3f944 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -86,7 +86,7 @@ paddle.fluid.layers.conv2d ArgSpec(args=['input', 'num_filters', 'filter_size', paddle.fluid.layers.conv3d ArgSpec(args=['input', 'num_filters', 'filter_size', 'stride', 'padding', 'dilation', 'groups', 'param_attr', 'bias_attr', 'use_cudnn', 'act', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None, None, None, True, None, None)) paddle.fluid.layers.sequence_pool ArgSpec(args=['input', 'pool_type', 'is_test'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.layers.sequence_softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)) -paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(True, None)) +paddle.fluid.layers.softmax ArgSpec(args=['input', 'use_cudnn', 'name'], varargs=None, keywords=None, defaults=(False, None)) paddle.fluid.layers.pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)) paddle.fluid.layers.pool3d ArgSpec(args=['input', 'pool_size', 'pool_type', 'pool_stride', 'pool_padding', 'global_pooling', 'use_cudnn', 'ceil_mode', 'name', 'exclusive'], varargs=None, keywords=None, defaults=(-1, 'max', 1, 0, False, True, False, None, True)) paddle.fluid.layers.adaptive_pool2d ArgSpec(args=['input', 'pool_size', 'pool_type', 'require_index', 'name'], varargs=None, keywords=None, defaults=('max', False, None)) @@ -128,7 +128,7 @@ paddle.fluid.layers.row_conv ArgSpec(args=['input', 'future_context_size', 'para paddle.fluid.layers.multiplex ArgSpec(args=['inputs', 'index'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.layer_norm ArgSpec(args=['input', 'scale', 'shift', 'begin_norm_axis', 'epsilon', 'param_attr', 'bias_attr', 'act', 'name'], varargs=None, keywords=None, defaults=(True, True, 1, 1e-05, None, None, None, None)) paddle.fluid.layers.group_norm ArgSpec(args=['input', 'groups', 'epsilon', 'param_attr', 'bias_attr', 'act', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(1e-05, None, None, None, 'NCHW', None)) -paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, False, False)) +paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax'], varargs=None, keywords=None, defaults=(False, -100, True, False)) paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)) diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op.cc index 0397c7791e1768393ff642743d2f7085b25fb551..7754d2bfebdbc81e25432641b2eb4315386f75ff 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cc @@ -46,10 +46,10 @@ class SoftmaxWithCrossEntropyOpMaker .SetDefault(false); AddAttr( "numeric_stable_mode", - "(bool, default: false), A flag to indicate whether to use more " + "(bool, default: true), A flag to indicate whether to use more " "numerically stable algorithm. This flag is only valid when " "soft_label is false and GPU is used.") - .SetDefault(false); + .SetDefault(true); AddAttr( "ignore_index", "(int, default -100), Specifies a target value that is ignored and" diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 4a8488b68c6bc41d5ea6404d1519c2d7de3ebffd..f4a69a268da8426752dd990fdbec062a324dde60 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1767,7 +1767,7 @@ def sequence_softmax(input, use_cudnn=False, name=None): return softmax_out -def softmax(input, use_cudnn=True, name=None): +def softmax(input, use_cudnn=False, name=None): """ The input of the softmax operator is a tensor of any rank. The output tensor has the same shape as the input. @@ -1795,7 +1795,8 @@ def softmax(input, use_cudnn=True, name=None): Args: input (Variable): The input variable. use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \ - library is installed. + library is installed. To improve numerical stablity, set use_cudnn to \ + False by default. Default: False name (str|None): A name for this layer(optional). If set None, the layer will be named automatically. Default: None. @@ -5753,7 +5754,7 @@ def softmax_with_cross_entropy(logits, label, soft_label=False, ignore_index=kIgnoreIndex, - numeric_stable_mode=False, + numeric_stable_mode=True, return_softmax=False): """ **Softmax With Cross Entropy Operator.** @@ -5817,7 +5818,7 @@ def softmax_with_cross_entropy(logits, When soft_label is True or CPU is used, the algorithm is always numerically stable. Note that the speed may be slower when use - stable algorithm. Default: False + stable algorithm. Default: True return_softmax (bool): A flag indicating whether to return the softmax along with the cross entropy loss. Default: False