未验证 提交 a3ae080a 编写于 作者: C Charles-hit 提交者: GitHub

remove softmax api from fluid (#48388)

* move softmax to paddle2.0

* fix some bugs

* resolve conflict

* remove some code

* modify code style

* fix bugs

* fix code

* fix move code

* fix some bugs

* fix code

* fix some code

* modify the header file

* fix bugs

* fix some examples

* fix mish example

* fix code
上级 ea5ca555
......@@ -626,7 +626,7 @@ def detection_output(
target_box=loc,
code_type='decode_center_size',
)
scores = nn.softmax(input=scores)
scores = paddle.nn.functional.softmax(scores)
scores = paddle.transpose(scores, perm=[0, 2, 1])
scores.stop_gradient = True
nmsed_outs = helper.create_variable_for_type_inference(
......
......@@ -68,7 +68,6 @@ __all__ = [
'linear_chain_crf',
'crf_decoding',
'conv2d',
'softmax',
'pool2d',
'batch_norm',
'dropout',
......@@ -145,7 +144,7 @@ def _get_reduce_dim(dim, input):
else:
raise TypeError(
"The type of dim must be int, list, tuple or range, but received {}".format(
type(axis)
type(dim)
)
)
if dim is None:
......@@ -1123,147 +1122,6 @@ def dropout(
return out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.softmax")
def softmax(input, use_cudnn=True, name=None, axis=-1):
r"""
This operator implements the softmax layer. The calculation process is as follows:
1. The dimension :attr:`axis` of the ``input`` will be permuted to the last.
2. Then the input tensor will be logically flattened to a 2-D matrix. The matrix's
second dimension(row length) is the same as the dimension :attr:`axis` of the input
tensor, and the first dimension(column length) is the product of all other
dimensions of the input tensor. For each row of the matrix, the softmax operator
squashes the K-dimensional(K is the width of the matrix, which is also the size
of the input tensor's dimension :attr:`axis`) vector of arbitrary real values to a
K-dimensional vector of real values in the range [0, 1] that add up to 1.
3. After the softmax operation is completed, the inverse operations of steps 1 and 2
are performed to restore the two-dimensional matrix to the same dimension as the ``input``.
It computes the exponential of the given dimension and the sum of exponential
values of all the other dimensions in the K-dimensional vector input.
Then the ratio of the exponential of the given dimension and the sum of
exponential values of all the other dimensions is the output of the softmax
operator.
For each row :math:`i` and each column :math:`j` in the matrix, we have:
.. math::
Out[i, j] = \\frac{\\exp(X[i, j])}{\\sum_j(exp(X[i, j])}
Example:
.. code-block:: text
Case 1:
Input:
X.shape = [2, 3, 4]
X.data = [[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]]
Attrs:
axis = -1
Output:
Out.shape = [2, 3, 4]
Out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.07232949, 0.19661193, 0.19661193, 0.53444665]],
[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
[0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
Case 2:
Input:
X.shape = [2, 3, 4]
X.data = [[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]]
Attrs:
axis = 1
Output:
Out.shape = [2, 3, 4]
Out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
[0.01786798, 0.01786798, 0.04661262, 0.04661262],
[0.97555875, 0.97555875, 0.93623955, 0.93623955]],
[[0.00490169, 0.00490169, 0.00490169, 0.00490169],
[0.26762315, 0.26762315, 0.26762315, 0.26762315],
[0.72747516, 0.72747516, 0.72747516, 0.72747516]]]
Args:
input (Tensor): The input tensor. A multi-dimension ``Tensor`` with type float32 or float64.
use_cudnn (bool, optional): Use cudnn kernel or not, it is valid only when the cudnn \
library is installed. To improve performance, set use_cudnn to True by default.
name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . Default: None.
will be named automatically. Default: None.
axis (int, optional): The index of dimension to perform softmax calculations, it should
be in range :math:`[-1, rank - 1]`, while :math:`rank` is the rank of
input tensor. Default: -1. -1 means the last dimension.
Returns:
Tensor: ``Tensor`` indicates the output of softmax. The data type and shape are the same as ``input`` .
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0],
[3.0, 4.0, 5.0, 6.0],
[7.0, 8.0, 8.0, 9.0]],
[[1.0, 2.0, 3.0, 4.0],
[5.0, 6.0, 7.0, 8.0],
[6.0, 7.0, 8.0, 9.0]]], dtype='float32')
y = F.softmax(x, axis=1)
print(y)
# [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
# [0.01786798, 0.01786798, 0.04661262, 0.04661262],
# [0.97555870, 0.97555870, 0.93623954, 0.93623954]],
# [[0.00490169, 0.00490169, 0.00490169, 0.00490169],
# [0.26762316, 0.26762316, 0.26762316, 0.26762316],
# [0.72747517, 0.72747517, 0.72747517, 0.72747517]]]
"""
if in_dygraph_mode():
return _C_ops.softmax(input, axis)
if _non_static_mode():
return _legacy_C_ops.softmax(
input, 'axis', axis, 'use_cudnn', use_cudnn
)
inputs = {"X": [input]}
attrs = {"axis": axis, "use_cudnn": use_cudnn}
helper = LayerHelper('softmax', **locals())
check_variable_and_dtype(
input, 'input/x', ['float16', 'float32', 'float64'], 'softmax'
)
dtype = helper.input_dtype()
softmax_out = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type="softmax",
inputs={"X": input},
outputs={"Out": softmax_out},
attrs=attrs,
)
return softmax_out
def conv2d(
input,
num_filters,
......@@ -1788,7 +1646,7 @@ def pool2d(
if pool_padding == "VALID":
padding_algorithm = "VALID"
pool_padding = [0, 0]
if ceil_mode != False:
if ceil_mode is not False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True."
......@@ -6643,7 +6501,7 @@ def deformable_roi_pooling(
)
input_channels = input.shape[1]
if position_sensitive == False:
if position_sensitive is False:
output_channels = input_channels
else:
output_channels = input_channels / pooled_height / pooled_width
......@@ -6867,9 +6725,11 @@ def mish(x, threshold=20, name=None):
.. code-block:: python
import paddle
import paddle.fluid as fluid
import numpy as np
paddle.enable_static()
DATATYPE='float32'
x_data = np.array([i for i in range(1,5)]).reshape([1,1,4]).astype(DATATYPE)
......
......@@ -1304,7 +1304,7 @@ class BeamSearchDecoder(Decoder):
self.noend_mask_tensor, "float64"
)
step_log_probs = paddle.log(nn.softmax(logits))
step_log_probs = paddle.log(paddle.nn.functional.softmax(logits))
step_log_probs = self._mask_probs(step_log_probs, beam_state.finished)
log_probs = nn.elementwise_add(
x=step_log_probs, y=beam_state.log_probs, axis=0
......@@ -2330,7 +2330,7 @@ class SampleEmbeddingHelper(GreedyEmbeddingHelper):
if self.softmax_temperature is not None
else outputs
)
probs = nn.softmax(logits)
probs = paddle.nn.functional.softmax(logits)
# TODO: remove this stop_gradient. The stop_gradient of sample_ids can
# not pass to probs, since sampling_id op does not have corresponding
# grad op and thus can not pass.
......
......@@ -354,7 +354,7 @@ class TestSeResNeXt(TestParallelDyGraphRunnerBase):
label.stop_gradient = True
out = model(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False)
softmax_out = paddle.nn.functional.softmax(out, use_cudnn=False)
loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_loss = paddle.mean(x=loss)
return avg_loss
......
......@@ -342,7 +342,7 @@ class MultiHeadAttentionLayer(Layer):
)
if attn_bias is not None:
product += attn_bias
weights = fluid.layers.softmax(product)
weights = paddle.nn.functional.softmax(product)
if self._dropout_rate:
weights_droped = fluid.layers.dropout(
weights,
......@@ -849,7 +849,7 @@ class WrapDecoderLayer(Layer):
if dec_inputs is None:
# Return probs for independent decoder program.
predict_out = fluid.layers.softmax(predict)
predict_out = paddle.nn.functional.softmax(predict)
return predict_out
return predict
......
......@@ -1177,7 +1177,7 @@ def multi_head_attention(
product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
if attn_bias:
product += attn_bias
weights = layers.softmax(product)
weights = paddle.nn.functional.softmax(product)
if dropout_rate:
weights = layers.dropout(
weights,
......@@ -1715,7 +1715,7 @@ def wrap_decoder(
bias_attr=const_bias_attr,
)
if dec_inputs is None:
predict = layers.softmax(predict)
predict = paddle.nn.functional.softmax(predict)
return predict
......@@ -1834,7 +1834,7 @@ def fast_decode(
logits = paddle.reshape(logits, (-1, trg_vocab_size))
topk_scores, topk_indices = layers.topk(
input=layers.softmax(logits), k=beam_size
input=paddle.nn.functional.softmax(logits), k=beam_size
)
accu_scores = layers.elementwise_add(
x=paddle.log(topk_scores),
......
......@@ -435,7 +435,9 @@ class BaseModel(fluid.dygraph.Layer):
cell_outputs = self._split_batch_beams(step_input)
cell_outputs = self.fc(cell_outputs)
step_log_probs = paddle.log(fluid.layers.softmax(cell_outputs))
step_log_probs = paddle.log(
paddle.nn.functional.softmax(cell_outputs)
)
noend_array = [-self.kinf] * self.tar_vocab_size
noend_array[self.beam_end_token] = 0
noend_mask_tensor = to_variable(
......@@ -703,7 +705,7 @@ class AttentionModel(fluid.dygraph.Layer):
attn = paddle.transpose(attn, [1, 0, 2])
attn = paddle.add(attn, mask * 1000000000)
attn = paddle.transpose(attn, [1, 0, 2])
weight = fluid.layers.softmax(attn)
weight = paddle.nn.functional.softmax(attn)
weight_memory = fluid.layers.matmul(weight, memory)
return weight_memory
......
......@@ -67,7 +67,7 @@ class SubNetWithDict(fluid.dygraph.Layer):
cache["k"], cache["v"] = k, v
weight = fluid.layers.matmul(x=q, y=k, transpose_y=True)
weight = fluid.layers.softmax(weight)
weight = paddle.nn.functional.softmax(weight)
out = fluid.layers.matmul(weight, v)
return out
......@@ -113,7 +113,7 @@ class MainNetWithDict(fluid.dygraph.Layer):
# Test to call function defined outside of class.
def update_cache(cache):
for k, val in cache.items():
cache[k] = fluid.layers.softmax(val)
cache[k] = paddle.nn.functional.softmax(val)
return cache
......
......@@ -308,7 +308,7 @@ class NetWithExternalFunc(fluid.dygraph.Layer):
# Test to call function behind caller.
def softmax(x):
return fluid.layers.softmax(x)
return paddle.nn.functional.softmax(x)
class TestNetWithExternalFunc(TestDygraphIfElseNet):
......
......@@ -535,7 +535,7 @@ def train_mobilenet(args, to_static):
out = net(img)
t_end = time.time()
softmax_out = fluid.layers.softmax(out, use_cudnn=False)
softmax_out = paddle.nn.functional.softmax(out)
loss = fluid.layers.cross_entropy(
input=softmax_out, label=label
)
......
......@@ -48,7 +48,7 @@ class Policy(Layer):
x = fluid.layers.relu(x)
action_scores = self.affine2(x)
log_prob = fluid.layers.softmax(action_scores, axis=1)
log_prob = paddle.nn.functional.softmax(action_scores, axis=1)
return log_prob
......
......@@ -343,7 +343,7 @@ class SeResNeXt(fluid.dygraph.Layer):
y = paddle.reshape(y, shape=[-1, self.pool2d_avg_output])
out = self.out(y)
softmax_out = fluid.layers.softmax(out)
softmax_out = paddle.nn.functional.softmax(out)
loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_loss = paddle.mean(x=loss)
......
......@@ -153,7 +153,7 @@ class MultiHeadAttention(Layer):
)
if attn_bias is not None:
product += attn_bias
weights = layers.softmax(product)
weights = paddle.nn.functional.softmax(product)
if self.dropout_rate:
weights = layers.dropout(weights, dropout_prob=self.dropout_rate)
out = layers.matmul(weights, v)
......@@ -840,7 +840,7 @@ class Transformer(Layer):
)
caches = map_structure(split_batch_beams, caches)
step_log_probs = split_batch_beams(
paddle.log(fluid.layers.softmax(logits))
paddle.log(paddle.nn.functional.softmax(logits))
)
step_log_probs = mask_probs(
......
......@@ -33,7 +33,7 @@ class SimpleLayer(paddle.nn.Layer):
x = self.conv(x)
x = paddle.flatten(x, 1, -1)
if target is not None:
x = paddle.fluid.layers.softmax(x)
x = paddle.nn.functional.softmax(x)
loss = paddle.fluid.layers.cross_entropy(x, target)
if self.use_ipu:
loss = paddle.incubate.identity_loss(loss, 1)
......
......@@ -48,7 +48,7 @@ class SimpleLayer(paddle.nn.Layer):
x = paddle.flatten(x, 1, -1)
if target is not None:
if self.use_softmax:
x = paddle.fluid.layers.softmax(x)
x = paddle.nn.functional.softmax(x)
if self.loss_op:
loss = self.loss_op(x, target)
else:
......
......@@ -32,7 +32,7 @@ class SimpleLayer(paddle.nn.Layer):
x = self.conv(x)
x = paddle.flatten(x, 1, -1)
if target is not None:
x = paddle.fluid.layers.softmax(x)
x = paddle.nn.functional.softmax(x)
loss = paddle.fluid.layers.cross_entropy(x, target)
return x, loss
return x
......
......@@ -119,7 +119,7 @@ class SimpleLayer(paddle.nn.Layer):
print(x)
x = paddle.flatten(x, 1, -1)
if target is not None:
x = paddle.fluid.layers.softmax(x)
x = paddle.nn.functional.softmax(x)
loss = paddle.fluid.layers.cross_entropy(x, target)
loss = paddle.incubate.identity_loss(loss, 1)
return x, loss
......
......@@ -47,7 +47,7 @@ class TestBase(IPUOpTest):
x = paddle.static.data(
name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
)
out = paddle.fluid.layers.softmax(x, **self.attrs)
out = paddle.nn.functional.softmax(x, **self.attrs)
self.fetch_list = [out.name]
def run_model(self, exec_mode):
......
......@@ -32,7 +32,7 @@ class MkldnnInplacePassTest(InferencePassTest):
conv_out_1 = fluid.layers.conv2d(
data, num_filters=3, filter_size=3, bias_attr=False
)
softmax_out = fluid.layers.softmax(conv_out_1)
softmax_out = paddle.nn.functional.softmax(conv_out_1)
relu_out = fluid.layers.relu(conv_out_1)
eltwise_out = fluid.layers.elementwise_add(
softmax_out, relu_out, axis=-1
......
......@@ -77,7 +77,7 @@ class TensorRTSubgraphPassRelu6Test(TensorRTSubgraphPassActivationTest):
class TensorRTSubgraphPassSoftMaxTest(TensorRTSubgraphPassActivationTest):
def append_act(self, x):
return fluid.layers.softmax(x)
return paddle.nn.functional.softmax(x)
class TensorRTSubgraphPassSigmoidTest(TensorRTSubgraphPassActivationTest):
......
......@@ -17,6 +17,7 @@ import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import AnalysisConfig
......@@ -31,7 +32,7 @@ class FCFusePassTRTTest(InferencePassTest):
fc_out1 = fluid.layers.fc(
input=data, size=128, num_flatten_dims=1, act="relu"
)
out = fluid.layers.softmax(input=fc_out1)
out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {
"data": np.random.random((32, 128, 2, 2)).astype("float32")
......@@ -61,7 +62,7 @@ class FCFusePassTRTStaticDims4Cols1Test(InferencePassTest):
fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=1, act="relu"
)
out = fluid.layers.softmax(input=fc_out1)
out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {
"data": np.random.random((32, 128, 32, 8)).astype("float32")
......@@ -89,7 +90,7 @@ class FCFusePassTRTStaticDims4Cols2Test(InferencePassTest):
fc_out1 = fluid.layers.fc(
input=data, size=32, num_flatten_dims=2, act="relu"
)
out = fluid.layers.softmax(input=fc_out1)
out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {
"data": np.random.random((3, 24, 16, 16)).astype("float32")
......@@ -115,7 +116,7 @@ class FCFusePassTRTDynamicDims2Test(InferencePassTest):
fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=1, act="relu"
)
out = fluid.layers.softmax(input=fc_out1)
out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
self.enable_trt = True
......@@ -147,7 +148,7 @@ class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest):
fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=1, act="relu"
)
out = fluid.layers.softmax(input=fc_out1)
out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True
......@@ -179,7 +180,7 @@ class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest):
fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=2, act="relu"
)
out = fluid.layers.softmax(input=fc_out1)
out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True
......@@ -213,7 +214,7 @@ class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest):
fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=1, act="relu"
)
out = fluid.layers.softmax(input=fc_out1)
out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {
"data": np.random.random((32, 12, 4, 6)).astype("float32")
......@@ -249,7 +250,7 @@ class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest):
fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=2, act="relu"
)
out = fluid.layers.softmax(input=fc_out1)
out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32")
......@@ -285,7 +286,7 @@ class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest):
fc_out1 = fluid.layers.fc(
input=data, size=64, num_flatten_dims=3, act="relu"
)
out = fluid.layers.softmax(input=fc_out1)
out = paddle.nn.functional.softmax(fc_out1)
self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32")
......
......@@ -30,7 +30,7 @@ class TRTGatherTest1(InferencePassTest):
data = fluid.data(name='data', shape=[-1, 128], dtype='float32')
index = fluid.data(name='index', shape=[-1, 1], dtype='int32')
scale_out = paddle.gather(data, index=index)
out = fluid.layers.softmax(input=scale_out)
out = paddle.nn.functional.softmax(scale_out)
self.feeds = {
"data": np.random.random([self.bs, 128]).astype("float32"),
......@@ -69,7 +69,7 @@ class TRTGatherTest2(InferencePassTest):
data = fluid.data(name='data', shape=[16, 64], dtype='float32')
index = fluid.data(name='index', shape=[2], dtype='int32')
scale_out = paddle.gather(data, index=index)
out = fluid.layers.softmax(input=scale_out)
out = paddle.nn.functional.softmax(scale_out)
self.feeds = {
"data": np.random.random([self.bs, 64]).astype("float32"),
......
......@@ -17,6 +17,7 @@ import unittest
import numpy as np
from pass_test import PassTest
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
......@@ -31,7 +32,7 @@ class FCFusePassTest(PassTest):
input=data, size=128, num_flatten_dims=1, act="relu"
)
tmp_1 = fluid.layers.fc(input=tmp_0, size=32, num_flatten_dims=1)
tmp_2 = fluid.layers.softmax(input=tmp_1)
tmp_2 = paddle.nn.functional.softmax(tmp_1)
self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
self.fetch_list = [tmp_0, tmp_1, tmp_2]
......
......@@ -79,7 +79,7 @@ class TestSoftmaxNet(unittest.TestCase):
prediction = fluid.layers.fc(input=fc_1, size=2)
# 4 x 2
prob = fluid.layers.softmax(prediction, axis=1)
prob = paddle.nn.functional.softmax(prediction, axis=1)
cost = fluid.layers.cross_entropy(input=prob, label=label)
loss = paddle.mean(cost)
......
......@@ -310,7 +310,7 @@ class SimpleAttention(fluid.dygraph.Layer):
shape=[attention_weight.shape[0], attention_weight.shape[1]],
)
weights_reshape = fluid.layers.softmax(weights_reshape)
weights_reshape = paddle.nn.functional.softmax(weights_reshape)
scaled = fluid.layers.elementwise_mul(
x=encoder_vec, y=weights_reshape, axis=0
)
......
......@@ -41,7 +41,7 @@ class Policy(fluid.dygraph.Layer):
x = fluid.layers.dropout(x, self.dropout_ratio)
x = fluid.layers.relu(x)
action_scores = self.affine2(x)
return fluid.layers.softmax(action_scores, axis=1)
return paddle.nn.functional.softmax(action_scores, axis=1)
class TestImperativeMnist(unittest.TestCase):
......
......@@ -376,7 +376,7 @@ class TestImperativeResneXt(unittest.TestCase):
label.stop_gradient = True
out = se_resnext(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False)
softmax_out = paddle.nn.functional.softmax(out)
loss = fluid.layers.cross_entropy(
input=softmax_out, label=label
)
......@@ -456,7 +456,7 @@ class TestImperativeResneXt(unittest.TestCase):
)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
out = se_resnext(img)
softmax_out = fluid.layers.softmax(out, use_cudnn=False)
softmax_out = paddle.nn.function.softmax(out)
loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_loss = paddle.mean(x=loss)
optimizer.minimize(avg_loss)
......
......@@ -503,7 +503,7 @@ class MultiHeadAttentionLayer(Layer):
)
if attn_bias is not None:
product += attn_bias
weights = fluid.layers.softmax(product)
weights = paddle.nn.functional.softmax(product)
if self._dropout_rate:
weights_droped = fluid.layers.dropout(
weights,
......@@ -1013,7 +1013,7 @@ class WrapDecoderLayer(Layer):
if dec_inputs is None:
# Return probs for independent decoder program.
predict_out = fluid.layers.softmax(predict)
predict_out = paddle.nn.functional.softmax(predict)
return predict_out
return predict
......
......@@ -2748,7 +2748,7 @@ class TestLayer(LayerTest):
data = fluid.data(name="input", shape=[-1, 32, 32], dtype="float32")
label = fluid.data(name="label", shape=[-1, 1], dtype="int")
fc_out = fluid.layers.fc(input=data, size=10)
predict = fluid.layers.softmax(input=fc_out)
predict = paddle.nn.functional.softmax(fc_out)
result = paddle.static.accuracy(input=predict, label=label, k=5)
place = fluid.CPUPlace()
exe = fluid.Executor(place)
......@@ -2764,7 +2764,7 @@ class TestLayer(LayerTest):
data = base.to_variable(x)
label = base.to_variable(y)
fc_out = fluid.layers.fc(data, size=10)
predict = fluid.layers.softmax(fc_out)
predict = paddle.nn.functional.softmax(fc_out)
dynamic_out = paddle.static.accuracy(
input=predict, label=label, k=5
)
......@@ -3056,7 +3056,7 @@ class TestBook(LayerTest):
):
data = self._get_data(name='data', shape=[10], dtype='float32')
hid = layers.fc(input=data, size=20)
return layers.softmax(hid, axis=1)
return paddle.nn.functional.softmax(hid, axis=1)
@prog_scope()
def make_nce(self):
......
......@@ -89,7 +89,7 @@ class TestMeanOpError(unittest.TestCase):
input3 = fluid.layers.data(
name='input3', shape=[4], dtype="float16"
)
fluid.layers.softmax(input3)
paddle.nn.functional.softmax(input3)
@unittest.skipIf(
......
......@@ -617,7 +617,7 @@ class RecurrentOpSubBlockTest(RecurrentOpTest1):
def dot_attention(query, memory):
attn = layers.matmul(query, memory, transpose_y=True)
weight = layers.softmax(attn)
weight = paddle.nn.functional.softmax(attn)
weight_memory = layers.matmul(weight, memory)
return weight_memory, weight
......
......@@ -76,7 +76,7 @@ class DecoderCell(layers.RNNCell):
)
if encoder_padding_mask is not None:
attn_scores = paddle.add(attn_scores, encoder_padding_mask)
attn_scores = layers.softmax(attn_scores)
attn_scores = paddle.nn.functional.softmax(attn_scores)
attn_out = paddle.squeeze(
layers.matmul(attn_scores, encoder_output), [1]
)
......@@ -295,7 +295,7 @@ class Seq2SeqModel:
decoder_output.sample_ids,
dec_seq_lengths,
)
probs = layers.softmax(logits)
probs = paddle.nn.functional.softmax(logits)
return probs, samples, sample_length
......
......@@ -99,7 +99,7 @@ class TestMeanOpError(unittest.TestCase):
input3 = fluid.layers.data(
name='input3', shape=[4], dtype="float16"
)
fluid.layers.softmax(input3)
paddle.nn.functional.softmax(input3)
support_types = get_xpu_op_support_types('mean')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册