未验证 提交 a3ae080a 编写于 作者: C Charles-hit 提交者: GitHub

remove softmax api from fluid (#48388)

* move softmax to paddle2.0

* fix some bugs

* resolve conflict

* remove some code

* modify code style

* fix bugs

* fix code

* fix move code

* fix some bugs

* fix code

* fix some code

* modify the header file

* fix bugs

* fix some examples

* fix mish example

* fix code
上级 ea5ca555
...@@ -626,7 +626,7 @@ def detection_output( ...@@ -626,7 +626,7 @@ def detection_output(
target_box=loc, target_box=loc,
code_type='decode_center_size', code_type='decode_center_size',
) )
scores = nn.softmax(input=scores) scores = paddle.nn.functional.softmax(scores)
scores = paddle.transpose(scores, perm=[0, 2, 1]) scores = paddle.transpose(scores, perm=[0, 2, 1])
scores.stop_gradient = True scores.stop_gradient = True
nmsed_outs = helper.create_variable_for_type_inference( nmsed_outs = helper.create_variable_for_type_inference(
......
...@@ -68,7 +68,6 @@ __all__ = [ ...@@ -68,7 +68,6 @@ __all__ = [
'linear_chain_crf', 'linear_chain_crf',
'crf_decoding', 'crf_decoding',
'conv2d', 'conv2d',
'softmax',
'pool2d', 'pool2d',
'batch_norm', 'batch_norm',
'dropout', 'dropout',
...@@ -145,7 +144,7 @@ def _get_reduce_dim(dim, input): ...@@ -145,7 +144,7 @@ def _get_reduce_dim(dim, input):
else: else:
raise TypeError( raise TypeError(
"The type of dim must be int, list, tuple or range, but received {}".format( "The type of dim must be int, list, tuple or range, but received {}".format(
type(axis) type(dim)
) )
) )
if dim is None: if dim is None:
...@@ -1123,147 +1122,6 @@ def dropout( ...@@ -1123,147 +1122,6 @@ def dropout(
return out return out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax")
def softmax(input, use_cudnn=True, name=None, axis=-1):
r"""
This operator implements the softmax layer. The calculation process is as follows:
1. The dimension :attr:`axis` of the ``input`` will be permuted to the last.
2. Then the input tensor will be logically flattened to a 2-D matrix. The matrix's
second dimension(row length) is the same as the dimension :attr:`axis` of the input
tensor, and the first dimension(column length) is the product of all other
dimensions of the input tensor. For each row of the matrix, the softmax operator
squashes the K-dimensional(K is the width of the matrix, which is also the size
of the input tensor's dimension :attr:`axis`) vector of arbitrary real values to a
K-dimensional vector of real values in the range [0, 1] that add up to 1.
3. After the softmax operation is completed, the inverse operations of steps 1 and 2
are performed to restore the two-dimensional matrix to the same dimension as the ``input``.
It computes the exponential of the given dimension and the sum of exponential
values of all the other dimensions in the K-dimensional vector input.
Then the ratio of the exponential of the given dimension and the sum of
exponential values of all the other dimensions is the output of the softmax
operator.
For each row :math:`i` and each column :math:`j` in the matrix, we have:
.. math::
Out[i, j] = \\frac{\\exp(X[i, j])}{\\sum_j(exp(X[i, j])}
Example:
.. code-block:: text
Case 1:
Input:
X.shape = [2, 3, 4]
X.data = [[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]]
Attrs:
axis = -1
Output:
Out.shape = [2, 3, 4]
Out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.07232949, 0.19661193, 0.19661193, 0.53444665]],
[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
Case 2:
Input:
X.shape = [2, 3, 4]
X.data = [[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]]
Attrs:
axis = 1
Output:
Out.shape = [2, 3, 4]
Out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
[0.01786798, 0.01786798, 0.04661262, 0.04661262],
[0.97555875, 0.97555875, 0.93623955, 0.93623955]],
[[0.00490169, 0.00490169, 0.00490169, 0.00490169],
[0.26762315, 0.26762315, 0.26762315, 0.26762315],
[0.72747516, 0.72747516, 0.72747516, 0.72747516]]]
Args:
input (Tensor): The input tensor. A multi-dimension ``Tensor`` with type float32 or float64.
use_cudnn (bool, optional): Use cudnn kernel or not, it is valid only when the cudnn \
library is installed. To improve performance, set use_cudnn to True by default.
name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . Default: None.
will be named automatically. Default: None.
axis (int, optional): The index of dimension to perform softmax calculations, it should
be in range :math:`[-1, rank - 1]`, while :math:`rank` is the rank of
input tensor. Default: -1. -1 means the last dimension.
Returns:
Tensor: ``Tensor`` indicates the output of softmax. The data type and shape are the same as ``input`` .
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]], dtype='float32')
y = F.softmax(x, axis=1)
print(y)
# [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
# [0.01786798, 0.01786798, 0.04661262, 0.04661262],
# [0.97555870, 0.97555870, 0.93623954, 0.93623954]],
# [[0.00490169, 0.00490169, 0.00490169, 0.00490169],
# [0.26762316, 0.26762316, 0.26762316, 0.26762316],
# [0.72747517, 0.72747517, 0.72747517, 0.72747517]]]
"""
if in_dygraph_mode():
return _C_ops.softmax(input, axis)
if _non_static_mode():
return _legacy_C_ops.softmax(
input, 'axis', axis, 'use_cudnn', use_cudnn
)
inputs = {"X": [input]}
attrs = {"axis": axis, "use_cudnn": use_cudnn}
helper = LayerHelper('softmax', **locals())
check_variable_and_dtype(
input, 'input/x', ['float16', 'float32', 'float64'], 'softmax'
)
dtype = helper.input_dtype()
softmax_out = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type="softmax",
inputs={"X": input},
outputs={"Out": softmax_out},
attrs=attrs,
)
return softmax_out
def conv2d( def conv2d(
input, input,
num_filters, num_filters,
...@@ -1788,7 +1646,7 @@ def pool2d( ...@@ -1788,7 +1646,7 @@ def pool2d(
if pool_padding == "VALID": if pool_padding == "VALID":
padding_algorithm = "VALID" padding_algorithm = "VALID"
pool_padding = [0, 0] pool_padding = [0, 0]
if ceil_mode != False: if ceil_mode is not False:
raise ValueError( raise ValueError(
"When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. " "When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True." "Received ceil_mode: True."
...@@ -6643,7 +6501,7 @@ def deformable_roi_pooling( ...@@ -6643,7 +6501,7 @@ def deformable_roi_pooling(
) )
input_channels = input.shape[1] input_channels = input.shape[1]
if position_sensitive == False: if position_sensitive is False:
output_channels = input_channels output_channels = input_channels
else: else:
output_channels = input_channels / pooled_height / pooled_width output_channels = input_channels / pooled_height / pooled_width
...@@ -6867,9 +6725,11 @@ def mish(x, threshold=20, name=None): ...@@ -6867,9 +6725,11 @@ def mish(x, threshold=20, name=None):
.. code-block:: python .. code-block:: python
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
paddle.enable_static()
DATATYPE='float32' DATATYPE='float32'
x_data = np.array([i for i in range(1,5)]).reshape([1,1,4]).astype(DATATYPE) x_data = np.array([i for i in range(1,5)]).reshape([1,1,4]).astype(DATATYPE)
......
...@@ -1304,7 +1304,7 @@ class BeamSearchDecoder(Decoder): ...@@ -1304,7 +1304,7 @@ class BeamSearchDecoder(Decoder):
self.noend_mask_tensor, "float64" self.noend_mask_tensor, "float64"
) )
step_log_probs = paddle.log(nn.softmax(logits)) step_log_probs = paddle.log(paddle.nn.functional.softmax(logits))
step_log_probs = self._mask_probs(step_log_probs, beam_state.finished) step_log_probs = self._mask_probs(step_log_probs, beam_state.finished)
log_probs = nn.elementwise_add( log_probs = nn.elementwise_add(
x=step_log_probs, y=beam_state.log_probs, axis=0 x=step_log_probs, y=beam_state.log_probs, axis=0
...@@ -2330,7 +2330,7 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper): ...@@ -2330,7 +2330,7 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper):
if self.softmax_temperature is not None if self.softmax_temperature is not None
else outputs else outputs
) )
probs = nn.softmax(logits) probs = paddle.nn.functional.softmax(logits)
# TODO: remove this stop_gradient. The stop_gradient of sample_ids can # TODO: remove this stop_gradient. The stop_gradient of sample_ids can
# not pass to probs, since sampling_id op does not have corresponding # not pass to probs, since sampling_id op does not have corresponding
# grad op and thus can not pass. # grad op and thus can not pass.
......
...@@ -354,7 +354,7 @@ class TestSeResNeXt(TestParallelDyGraphRunnerBase): ...@@ -354,7 +354,7 @@ class TestSeResNeXt(TestParallelDyGraphRunnerBase):
label.stop_gradient = True label.stop_gradient = True
out = model(img) out = model(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False) softmax_out = paddle.nn.functional.softmax(out, use_cudnn=False)
loss = fluid.layers.cross_entropy(input=softmax_out, label=label) loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_loss = paddle.mean(x=loss) avg_loss = paddle.mean(x=loss)
return avg_loss return avg_loss
......
...@@ -342,7 +342,7 @@ class MultiHeadAttentionLayer(Layer): ...@@ -342,7 +342,7 @@ class MultiHeadAttentionLayer(Layer):
) )
if attn_bias is not None: if attn_bias is not None:
product += attn_bias product += attn_bias
weights = fluid.layers.softmax(product) weights = paddle.nn.functional.softmax(product)
if self._dropout_rate: if self._dropout_rate:
weights_droped = fluid.layers.dropout( weights_droped = fluid.layers.dropout(
weights, weights,
...@@ -849,7 +849,7 @@ class WrapDecoderLayer(Layer): ...@@ -849,7 +849,7 @@ class WrapDecoderLayer(Layer):
if dec_inputs is None: if dec_inputs is None:
# Return probs for independent decoder program. # Return probs for independent decoder program.
predict_out = fluid.layers.softmax(predict) predict_out = paddle.nn.functional.softmax(predict)
return predict_out return predict_out
return predict return predict
......
...@@ -1177,7 +1177,7 @@ def multi_head_attention( ...@@ -1177,7 +1177,7 @@ def multi_head_attention(
product = layers.matmul(x=scaled_q, y=k, transpose_y=True) product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
if attn_bias: if attn_bias:
product += attn_bias product += attn_bias
weights = layers.softmax(product) weights = paddle.nn.functional.softmax(product)
if dropout_rate: if dropout_rate:
weights = layers.dropout( weights = layers.dropout(
weights, weights,
...@@ -1715,7 +1715,7 @@ def wrap_decoder( ...@@ -1715,7 +1715,7 @@ def wrap_decoder(
bias_attr=const_bias_attr, bias_attr=const_bias_attr,
) )
if dec_inputs is None: if dec_inputs is None:
predict = layers.softmax(predict) predict = paddle.nn.functional.softmax(predict)
return predict return predict
...@@ -1834,7 +1834,7 @@ def fast_decode( ...@@ -1834,7 +1834,7 @@ def fast_decode(
logits = paddle.reshape(logits, (-1, trg_vocab_size)) logits = paddle.reshape(logits, (-1, trg_vocab_size))
topk_scores, topk_indices = layers.topk( topk_scores, topk_indices = layers.topk(
input=layers.softmax(logits), k=beam_size input=paddle.nn.functional.softmax(logits), k=beam_size
) )
accu_scores = layers.elementwise_add( accu_scores = layers.elementwise_add(
x=paddle.log(topk_scores), x=paddle.log(topk_scores),
......
...@@ -435,7 +435,9 @@ class BaseModel(fluid.dygraph.Layer): ...@@ -435,7 +435,9 @@ class BaseModel(fluid.dygraph.Layer):
cell_outputs = self._split_batch_beams(step_input) cell_outputs = self._split_batch_beams(step_input)
cell_outputs = self.fc(cell_outputs) cell_outputs = self.fc(cell_outputs)
step_log_probs = paddle.log(fluid.layers.softmax(cell_outputs)) step_log_probs = paddle.log(
paddle.nn.functional.softmax(cell_outputs)
)
noend_array = [-self.kinf] * self.tar_vocab_size noend_array = [-self.kinf] * self.tar_vocab_size
noend_array[self.beam_end_token] = 0 noend_array[self.beam_end_token] = 0
noend_mask_tensor = to_variable( noend_mask_tensor = to_variable(
...@@ -703,7 +705,7 @@ class AttentionModel(fluid.dygraph.Layer): ...@@ -703,7 +705,7 @@ class AttentionModel(fluid.dygraph.Layer):
attn = paddle.transpose(attn, [1, 0, 2]) attn = paddle.transpose(attn, [1, 0, 2])
attn = paddle.add(attn, mask * 1000000000) attn = paddle.add(attn, mask * 1000000000)
attn = paddle.transpose(attn, [1, 0, 2]) attn = paddle.transpose(attn, [1, 0, 2])
weight = fluid.layers.softmax(attn) weight = paddle.nn.functional.softmax(attn)
weight_memory = fluid.layers.matmul(weight, memory) weight_memory = fluid.layers.matmul(weight, memory)
return weight_memory return weight_memory
......
...@@ -67,7 +67,7 @@ class SubNetWithDict(fluid.dygraph.Layer): ...@@ -67,7 +67,7 @@ class SubNetWithDict(fluid.dygraph.Layer):
cache["k"], cache["v"] = k, v cache["k"], cache["v"] = k, v
weight = fluid.layers.matmul(x=q, y=k, transpose_y=True) weight = fluid.layers.matmul(x=q, y=k, transpose_y=True)
weight = fluid.layers.softmax(weight) weight = paddle.nn.functional.softmax(weight)
out = fluid.layers.matmul(weight, v) out = fluid.layers.matmul(weight, v)
return out return out
...@@ -113,7 +113,7 @@ class MainNetWithDict(fluid.dygraph.Layer): ...@@ -113,7 +113,7 @@ class MainNetWithDict(fluid.dygraph.Layer):
# Test to call function defined outside of class. # Test to call function defined outside of class.
def update_cache(cache): def update_cache(cache):
for k, val in cache.items(): for k, val in cache.items():
cache[k] = fluid.layers.softmax(val) cache[k] = paddle.nn.functional.softmax(val)
return cache return cache
......
...@@ -308,7 +308,7 @@ class NetWithExternalFunc(fluid.dygraph.Layer): ...@@ -308,7 +308,7 @@ class NetWithExternalFunc(fluid.dygraph.Layer):
# Test to call function behind caller. # Test to call function behind caller.
def softmax(x): def softmax(x):
return fluid.layers.softmax(x) return paddle.nn.functional.softmax(x)
class TestNetWithExternalFunc(TestDygraphIfElseNet): class TestNetWithExternalFunc(TestDygraphIfElseNet):
......
...@@ -535,7 +535,7 @@ def train_mobilenet(args, to_static): ...@@ -535,7 +535,7 @@ def train_mobilenet(args, to_static):
out = net(img) out = net(img)
t_end = time.time() t_end = time.time()
softmax_out = fluid.layers.softmax(out, use_cudnn=False) softmax_out = paddle.nn.functional.softmax(out)
loss = fluid.layers.cross_entropy( loss = fluid.layers.cross_entropy(
input=softmax_out, label=label input=softmax_out, label=label
) )
......
...@@ -48,7 +48,7 @@ class Policy(Layer): ...@@ -48,7 +48,7 @@ class Policy(Layer):
x = fluid.layers.relu(x) x = fluid.layers.relu(x)
action_scores = self.affine2(x) action_scores = self.affine2(x)
log_prob = fluid.layers.softmax(action_scores, axis=1) log_prob = paddle.nn.functional.softmax(action_scores, axis=1)
return log_prob return log_prob
......
...@@ -343,7 +343,7 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -343,7 +343,7 @@ class SeResNeXt(fluid.dygraph.Layer):
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_output]) y = paddle.reshape(y, shape=[-1, self.pool2d_avg_output])
out = self.out(y) out = self.out(y)
softmax_out = fluid.layers.softmax(out) softmax_out = paddle.nn.functional.softmax(out)
loss = fluid.layers.cross_entropy(input=softmax_out, label=label) loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_loss = paddle.mean(x=loss) avg_loss = paddle.mean(x=loss)
......
...@@ -153,7 +153,7 @@ class MultiHeadAttention(Layer): ...@@ -153,7 +153,7 @@ class MultiHeadAttention(Layer):
) )
if attn_bias is not None: if attn_bias is not None:
product += attn_bias product += attn_bias
weights = layers.softmax(product) weights = paddle.nn.functional.softmax(product)
if self.dropout_rate: if self.dropout_rate:
weights = layers.dropout(weights, dropout_prob=self.dropout_rate) weights = layers.dropout(weights, dropout_prob=self.dropout_rate)
out = layers.matmul(weights, v) out = layers.matmul(weights, v)
...@@ -840,7 +840,7 @@ class Transformer(Layer): ...@@ -840,7 +840,7 @@ class Transformer(Layer):
) )
caches = map_structure(split_batch_beams, caches) caches = map_structure(split_batch_beams, caches)
step_log_probs = split_batch_beams( step_log_probs = split_batch_beams(
paddle.log(fluid.layers.softmax(logits)) paddle.log(paddle.nn.functional.softmax(logits))
) )
step_log_probs = mask_probs( step_log_probs = mask_probs(
......
...@@ -33,7 +33,7 @@ class SimpleLayer(paddle.nn.Layer): ...@@ -33,7 +33,7 @@ class SimpleLayer(paddle.nn.Layer):
x = self.conv(x) x = self.conv(x)
x = paddle.flatten(x, 1, -1) x = paddle.flatten(x, 1, -1)
if target is not None: if target is not None:
x = paddle.fluid.layers.softmax(x) x = paddle.nn.functional.softmax(x)
loss = paddle.fluid.layers.cross_entropy(x, target) loss = paddle.fluid.layers.cross_entropy(x, target)
if self.use_ipu: if self.use_ipu:
loss = paddle.incubate.identity_loss(loss, 1) loss = paddle.incubate.identity_loss(loss, 1)
......
...@@ -48,7 +48,7 @@ class SimpleLayer(paddle.nn.Layer): ...@@ -48,7 +48,7 @@ class SimpleLayer(paddle.nn.Layer):
x = paddle.flatten(x, 1, -1) x = paddle.flatten(x, 1, -1)
if target is not None: if target is not None:
if self.use_softmax: if self.use_softmax:
x = paddle.fluid.layers.softmax(x) x = paddle.nn.functional.softmax(x)
if self.loss_op: if self.loss_op:
loss = self.loss_op(x, target) loss = self.loss_op(x, target)
else: else:
......
...@@ -32,7 +32,7 @@ class SimpleLayer(paddle.nn.Layer): ...@@ -32,7 +32,7 @@ class SimpleLayer(paddle.nn.Layer):
x = self.conv(x) x = self.conv(x)
x = paddle.flatten(x, 1, -1) x = paddle.flatten(x, 1, -1)
if target is not None: if target is not None:
x = paddle.fluid.layers.softmax(x) x = paddle.nn.functional.softmax(x)
loss = paddle.fluid.layers.cross_entropy(x, target) loss = paddle.fluid.layers.cross_entropy(x, target)
return x, loss return x, loss
return x return x
......
...@@ -119,7 +119,7 @@ class SimpleLayer(paddle.nn.Layer): ...@@ -119,7 +119,7 @@ class SimpleLayer(paddle.nn.Layer):
print(x) print(x)
x = paddle.flatten(x, 1, -1) x = paddle.flatten(x, 1, -1)
if target is not None: if target is not None:
x = paddle.fluid.layers.softmax(x) x = paddle.nn.functional.softmax(x)
loss = paddle.fluid.layers.cross_entropy(x, target) loss = paddle.fluid.layers.cross_entropy(x, target)
loss = paddle.incubate.identity_loss(loss, 1) loss = paddle.incubate.identity_loss(loss, 1)
return x, loss return x, loss
......
...@@ -47,7 +47,7 @@ class TestBase(IPUOpTest): ...@@ -47,7 +47,7 @@ class TestBase(IPUOpTest):
x = paddle.static.data( x = paddle.static.data(
name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32' name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
) )
out = paddle.fluid.layers.softmax(x, **self.attrs) out = paddle.nn.functional.softmax(x, **self.attrs)
self.fetch_list = [out.name] self.fetch_list = [out.name]
def run_model(self, exec_mode): def run_model(self, exec_mode):
......
...@@ -32,7 +32,7 @@ class MkldnnInplacePassTest(InferencePassTest): ...@@ -32,7 +32,7 @@ class MkldnnInplacePassTest(InferencePassTest):
conv_out_1 = fluid.layers.conv2d( conv_out_1 = fluid.layers.conv2d(
data, num_filters=3, filter_size=3, bias_attr=False data, num_filters=3, filter_size=3, bias_attr=False
) )
softmax_out = fluid.layers.softmax(conv_out_1) softmax_out = paddle.nn.functional.softmax(conv_out_1)
relu_out = fluid.layers.relu(conv_out_1) relu_out = fluid.layers.relu(conv_out_1)
eltwise_out = fluid.layers.elementwise_add( eltwise_out = fluid.layers.elementwise_add(
softmax_out, relu_out, axis=-1 softmax_out, relu_out, axis=-1
......
...@@ -77,7 +77,7 @@ class TensorRTSubgraphPassRelu6Test(TensorRTSubgraphPassActivationTest): ...@@ -77,7 +77,7 @@ class TensorRTSubgraphPassRelu6Test(TensorRTSubgraphPassActivationTest):
class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest): class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x): def append_act(self, x):
return fluid.layers.softmax(x) return paddle.nn.functional.softmax(x)
class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest): class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest):
......
...@@ -17,6 +17,7 @@ import unittest ...@@ -17,6 +17,7 @@ import unittest
import numpy as np import numpy as np
from inference_pass_test import InferencePassTest from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig from paddle.fluid.core import AnalysisConfig
...@@ -31,7 +32,7 @@ class FCFusePassTRTTest(InferencePassTest): ...@@ -31,7 +32,7 @@ class FCFusePassTRTTest(InferencePassTest):
fc_out1 = fluid.layers.fc( fc_out1 = fluid.layers.fc(
input=data, size=128, num_flatten_dims=1, act="relu" input=data, size=128, num_flatten_dims=1, act="relu"
) )
out = fluid.layers.softmax(input=fc_out1) out = paddle.nn.functional.softmax(fc_out1)
self.feeds = { self.feeds = {
"data": np.random.random((32, 128, 2, 2)).astype("float32") "data": np.random.random((32, 128, 2, 2)).astype("float32")
...@@ -61,7 +62,7 @@ class FCFusePassTRTStaticDims4Cols1Test(InferencePassTest): ...@@ -61,7 +62,7 @@ class FCFusePassTRTStaticDims4Cols1Test(InferencePassTest):
fc_out1 = fluid.layers.fc( fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=1, act="relu" input=data, size=64, num_flatten_dims=1, act="relu"
) )
out = fluid.layers.softmax(input=fc_out1) out = paddle.nn.functional.softmax(fc_out1)
self.feeds = { self.feeds = {
"data": np.random.random((32, 128, 32, 8)).astype("float32") "data": np.random.random((32, 128, 32, 8)).astype("float32")
...@@ -89,7 +90,7 @@ class FCFusePassTRTStaticDims4Cols2Test(InferencePassTest): ...@@ -89,7 +90,7 @@ class FCFusePassTRTStaticDims4Cols2Test(InferencePassTest):
fc_out1 = fluid.layers.fc( fc_out1 = fluid.layers.fc(
input=data, size=32, num_flatten_dims=2, act="relu" input=data, size=32, num_flatten_dims=2, act="relu"
) )
out = fluid.layers.softmax(input=fc_out1) out = paddle.nn.functional.softmax(fc_out1)
self.feeds = { self.feeds = {
"data": np.random.random((3, 24, 16, 16)).astype("float32") "data": np.random.random((3, 24, 16, 16)).astype("float32")
...@@ -115,7 +116,7 @@ class FCFusePassTRTDynamicDims2Test(InferencePassTest): ...@@ -115,7 +116,7 @@ class FCFusePassTRTDynamicDims2Test(InferencePassTest):
fc_out1 = fluid.layers.fc( fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=1, act="relu" input=data, size=64, num_flatten_dims=1, act="relu"
) )
out = fluid.layers.softmax(input=fc_out1) out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {"data": np.random.random((32, 128)).astype("float32")} self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
self.enable_trt = True self.enable_trt = True
...@@ -147,7 +148,7 @@ class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest): ...@@ -147,7 +148,7 @@ class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest):
fc_out1 = fluid.layers.fc( fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=1, act="relu" input=data, size=64, num_flatten_dims=1, act="relu"
) )
out = fluid.layers.softmax(input=fc_out1) out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")} self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True self.enable_trt = True
...@@ -179,7 +180,7 @@ class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest): ...@@ -179,7 +180,7 @@ class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest):
fc_out1 = fluid.layers.fc( fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=2, act="relu" input=data, size=64, num_flatten_dims=2, act="relu"
) )
out = fluid.layers.softmax(input=fc_out1) out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")} self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True self.enable_trt = True
...@@ -213,7 +214,7 @@ class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest): ...@@ -213,7 +214,7 @@ class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest):
fc_out1 = fluid.layers.fc( fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=1, act="relu" input=data, size=64, num_flatten_dims=1, act="relu"
) )
out = fluid.layers.softmax(input=fc_out1) out = paddle.nn.functional.softmax(fc_out1)
self.feeds = { self.feeds = {
"data": np.random.random((32, 12, 4, 6)).astype("float32") "data": np.random.random((32, 12, 4, 6)).astype("float32")
...@@ -249,7 +250,7 @@ class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest): ...@@ -249,7 +250,7 @@ class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest):
fc_out1 = fluid.layers.fc( fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=2, act="relu" input=data, size=64, num_flatten_dims=2, act="relu"
) )
out = fluid.layers.softmax(input=fc_out1) out = paddle.nn.functional.softmax(fc_out1)
self.feeds = { self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32") "data": np.random.random((32, 128, 32, 32)).astype("float32")
...@@ -285,7 +286,7 @@ class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest): ...@@ -285,7 +286,7 @@ class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest):
fc_out1 = fluid.layers.fc( fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=3, act="relu" input=data, size=64, num_flatten_dims=3, act="relu"
) )
out = fluid.layers.softmax(input=fc_out1) out = paddle.nn.functional.softmax(fc_out1)
self.feeds = { self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32") "data": np.random.random((32, 128, 32, 32)).astype("float32")
......
...@@ -30,7 +30,7 @@ class TRTGatherTest1(InferencePassTest): ...@@ -30,7 +30,7 @@ class TRTGatherTest1(InferencePassTest):
data = fluid.data(name='data', shape=[-1, 128], dtype='float32') data = fluid.data(name='data', shape=[-1, 128], dtype='float32')
index = fluid.data(name='index', shape=[-1, 1], dtype='int32') index = fluid.data(name='index', shape=[-1, 1], dtype='int32')
scale_out = paddle.gather(data, index=index) scale_out = paddle.gather(data, index=index)
out = fluid.layers.softmax(input=scale_out) out = paddle.nn.functional.softmax(scale_out)
self.feeds = { self.feeds = {
"data": np.random.random([self.bs, 128]).astype("float32"), "data": np.random.random([self.bs, 128]).astype("float32"),
...@@ -69,7 +69,7 @@ class TRTGatherTest2(InferencePassTest): ...@@ -69,7 +69,7 @@ class TRTGatherTest2(InferencePassTest):
data = fluid.data(name='data', shape=[16, 64], dtype='float32') data = fluid.data(name='data', shape=[16, 64], dtype='float32')
index = fluid.data(name='index', shape=[2], dtype='int32') index = fluid.data(name='index', shape=[2], dtype='int32')
scale_out = paddle.gather(data, index=index) scale_out = paddle.gather(data, index=index)
out = fluid.layers.softmax(input=scale_out) out = paddle.nn.functional.softmax(scale_out)
self.feeds = { self.feeds = {
"data": np.random.random([self.bs, 64]).astype("float32"), "data": np.random.random([self.bs, 64]).astype("float32"),
......
...@@ -17,6 +17,7 @@ import unittest ...@@ -17,6 +17,7 @@ import unittest
import numpy as np import numpy as np
from pass_test import PassTest from pass_test import PassTest
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
...@@ -31,7 +32,7 @@ class FCFusePassTest(PassTest): ...@@ -31,7 +32,7 @@ class FCFusePassTest(PassTest):
input=data, size=128, num_flatten_dims=1, act="relu" input=data, size=128, num_flatten_dims=1, act="relu"
) )
tmp_1 = fluid.layers.fc(input=tmp_0, size=32, num_flatten_dims=1) tmp_1 = fluid.layers.fc(input=tmp_0, size=32, num_flatten_dims=1)
tmp_2 = fluid.layers.softmax(input=tmp_1) tmp_2 = paddle.nn.functional.softmax(tmp_1)
self.feeds = {"data": np.random.random((32, 128)).astype("float32")} self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
self.fetch_list = [tmp_0, tmp_1, tmp_2] self.fetch_list = [tmp_0, tmp_1, tmp_2]
......
...@@ -79,7 +79,7 @@ class TestSoftmaxNet(unittest.TestCase): ...@@ -79,7 +79,7 @@ class TestSoftmaxNet(unittest.TestCase):
prediction = fluid.layers.fc(input=fc_1, size=2) prediction = fluid.layers.fc(input=fc_1, size=2)
# 4 x 2 # 4 x 2
prob = fluid.layers.softmax(prediction, axis=1) prob = paddle.nn.functional.softmax(prediction, axis=1)
cost = fluid.layers.cross_entropy(input=prob, label=label) cost = fluid.layers.cross_entropy(input=prob, label=label)
loss = paddle.mean(cost) loss = paddle.mean(cost)
......
...@@ -310,7 +310,7 @@ class SimpleAttention(fluid.dygraph.Layer): ...@@ -310,7 +310,7 @@ class SimpleAttention(fluid.dygraph.Layer):
shape=[attention_weight.shape[0], attention_weight.shape[1]], shape=[attention_weight.shape[0], attention_weight.shape[1]],
) )
weights_reshape = fluid.layers.softmax(weights_reshape) weights_reshape = paddle.nn.functional.softmax(weights_reshape)
scaled = fluid.layers.elementwise_mul( scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weights_reshape, axis=0 x=encoder_vec, y=weights_reshape, axis=0
) )
......
...@@ -41,7 +41,7 @@ class Policy(fluid.dygraph.Layer): ...@@ -41,7 +41,7 @@ class Policy(fluid.dygraph.Layer):
x = fluid.layers.dropout(x, self.dropout_ratio) x = fluid.layers.dropout(x, self.dropout_ratio)
x = fluid.layers.relu(x) x = fluid.layers.relu(x)
action_scores = self.affine2(x) action_scores = self.affine2(x)
return fluid.layers.softmax(action_scores, axis=1) return paddle.nn.functional.softmax(action_scores, axis=1)
class TestImperativeMnist(unittest.TestCase): class TestImperativeMnist(unittest.TestCase):
......
...@@ -376,7 +376,7 @@ class TestImperativeResneXt(unittest.TestCase): ...@@ -376,7 +376,7 @@ class TestImperativeResneXt(unittest.TestCase):
label.stop_gradient = True label.stop_gradient = True
out = se_resnext(img) out = se_resnext(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False) softmax_out = paddle.nn.functional.softmax(out)
loss = fluid.layers.cross_entropy( loss = fluid.layers.cross_entropy(
input=softmax_out, label=label input=softmax_out, label=label
) )
...@@ -456,7 +456,7 @@ class TestImperativeResneXt(unittest.TestCase): ...@@ -456,7 +456,7 @@ class TestImperativeResneXt(unittest.TestCase):
) )
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = se_resnext(img) out = se_resnext(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False) softmax_out = paddle.nn.function.softmax(out)
loss = fluid.layers.cross_entropy(input=softmax_out, label=label) loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_loss = paddle.mean(x=loss) avg_loss = paddle.mean(x=loss)
optimizer.minimize(avg_loss) optimizer.minimize(avg_loss)
......
...@@ -503,7 +503,7 @@ class MultiHeadAttentionLayer(Layer): ...@@ -503,7 +503,7 @@ class MultiHeadAttentionLayer(Layer):
) )
if attn_bias is not None: if attn_bias is not None:
product += attn_bias product += attn_bias
weights = fluid.layers.softmax(product) weights = paddle.nn.functional.softmax(product)
if self._dropout_rate: if self._dropout_rate:
weights_droped = fluid.layers.dropout( weights_droped = fluid.layers.dropout(
weights, weights,
...@@ -1013,7 +1013,7 @@ class WrapDecoderLayer(Layer): ...@@ -1013,7 +1013,7 @@ class WrapDecoderLayer(Layer):
if dec_inputs is None: if dec_inputs is None:
# Return probs for independent decoder program. # Return probs for independent decoder program.
predict_out = fluid.layers.softmax(predict) predict_out = paddle.nn.functional.softmax(predict)
return predict_out return predict_out
return predict return predict
......
...@@ -2748,7 +2748,7 @@ class TestLayer(LayerTest): ...@@ -2748,7 +2748,7 @@ class TestLayer(LayerTest):
data = fluid.data(name="input", shape=[-1, 32, 32], dtype="float32") data = fluid.data(name="input", shape=[-1, 32, 32], dtype="float32")
label = fluid.data(name="label", shape=[-1, 1], dtype="int") label = fluid.data(name="label", shape=[-1, 1], dtype="int")
fc_out = fluid.layers.fc(input=data, size=10) fc_out = fluid.layers.fc(input=data, size=10)
predict = fluid.layers.softmax(input=fc_out) predict = paddle.nn.functional.softmax(fc_out)
result = paddle.static.accuracy(input=predict, label=label, k=5) result = paddle.static.accuracy(input=predict, label=label, k=5)
place = fluid.CPUPlace() place = fluid.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
...@@ -2764,7 +2764,7 @@ class TestLayer(LayerTest): ...@@ -2764,7 +2764,7 @@ class TestLayer(LayerTest):
data = base.to_variable(x) data = base.to_variable(x)
label = base.to_variable(y) label = base.to_variable(y)
fc_out = fluid.layers.fc(data, size=10) fc_out = fluid.layers.fc(data, size=10)
predict = fluid.layers.softmax(fc_out) predict = paddle.nn.functional.softmax(fc_out)
dynamic_out = paddle.static.accuracy( dynamic_out = paddle.static.accuracy(
input=predict, label=label, k=5 input=predict, label=label, k=5
) )
...@@ -3056,7 +3056,7 @@ class TestBook(LayerTest): ...@@ -3056,7 +3056,7 @@ class TestBook(LayerTest):
): ):
data = self._get_data(name='data', shape=[10], dtype='float32') data = self._get_data(name='data', shape=[10], dtype='float32')
hid = layers.fc(input=data, size=20) hid = layers.fc(input=data, size=20)
return layers.softmax(hid, axis=1) return paddle.nn.functional.softmax(hid, axis=1)
@prog_scope() @prog_scope()
def make_nce(self): def make_nce(self):
......
...@@ -89,7 +89,7 @@ class TestMeanOpError(unittest.TestCase): ...@@ -89,7 +89,7 @@ class TestMeanOpError(unittest.TestCase):
input3 = fluid.layers.data( input3 = fluid.layers.data(
name='input3', shape=[4], dtype="float16" name='input3', shape=[4], dtype="float16"
) )
fluid.layers.softmax(input3) paddle.nn.functional.softmax(input3)
@unittest.skipIf( @unittest.skipIf(
......
...@@ -617,7 +617,7 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1): ...@@ -617,7 +617,7 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1):
def dot_attention(query, memory): def dot_attention(query, memory):
attn = layers.matmul(query, memory, transpose_y=True) attn = layers.matmul(query, memory, transpose_y=True)
weight = layers.softmax(attn) weight = paddle.nn.functional.softmax(attn)
weight_memory = layers.matmul(weight, memory) weight_memory = layers.matmul(weight, memory)
return weight_memory, weight return weight_memory, weight
......
...@@ -76,7 +76,7 @@ class DecoderCell(layers.RNNCell): ...@@ -76,7 +76,7 @@ class DecoderCell(layers.RNNCell):
) )
if encoder_padding_mask is not None: if encoder_padding_mask is not None:
attn_scores = paddle.add(attn_scores, encoder_padding_mask) attn_scores = paddle.add(attn_scores, encoder_padding_mask)
attn_scores = layers.softmax(attn_scores) attn_scores = paddle.nn.functional.softmax(attn_scores)
attn_out = paddle.squeeze( attn_out = paddle.squeeze(
layers.matmul(attn_scores, encoder_output), [1] layers.matmul(attn_scores, encoder_output), [1]
) )
...@@ -295,7 +295,7 @@ class Seq2SeqModel: ...@@ -295,7 +295,7 @@ class Seq2SeqModel:
decoder_output.sample_ids, decoder_output.sample_ids,
dec_seq_lengths, dec_seq_lengths,
) )
probs = layers.softmax(logits) probs = paddle.nn.functional.softmax(logits)
return probs, samples, sample_length return probs, samples, sample_length
......
...@@ -99,7 +99,7 @@ class TestMeanOpError(unittest.TestCase): ...@@ -99,7 +99,7 @@ class TestMeanOpError(unittest.TestCase):
input3 = fluid.layers.data( input3 = fluid.layers.data(
name='input3', shape=[4], dtype="float16" name='input3', shape=[4], dtype="float16"
) )
fluid.layers.softmax(input3) paddle.nn.functional.softmax(input3)
support_types = get_xpu_op_support_types('mean') support_types = get_xpu_op_support_types('mean')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册