diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 429b9b0b5afcfa40cde9ecca808e8ff82061354c..6b1e782239c263f63707c8034e1c2e6de57c6b08 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1198,7 +1198,7 @@ def chunk_eval(input, @deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax") -def softmax(input, use_cudnn=False, name=None, axis=-1): +def softmax(input, use_cudnn=True, name=None, axis=-1): r""" This operator implements the softmax layer. The calculation process is as follows: diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index 915668de19d3cb5f67dc835211eb3fddd2dff6bf..45ffd422ac3a7d99affc917775c79ebba5bcecb0 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -843,7 +843,7 @@ def softmax(x, axis=-1, dtype=None, name=None): if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)): dtype = convert_np_dtype_to_dtype_(dtype) - use_cudnn = True if axis is -1 else False + use_cudnn = True if in_dygraph_mode(): outs_cast = x if dtype is None \