diff --git a/python/paddle/nn/decode.py b/python/paddle/nn/decode.py index 030a2a048815b3303275a0c2fff0a85c4ca85789..4563df700e4115281b0e644ffc72429d41cd7276 100644 --- a/python/paddle/nn/decode.py +++ b/python/paddle/nn/decode.py @@ -189,20 +189,20 @@ class BeamSearchDecoder(Decoder): .. code-block:: python - import numpy as np - import paddle - from paddle.nn import BeamSearchDecoder, dynamic_decode - from paddle.nn import GRUCell, Linear, Embedding - trg_embeder = Embedding(100, 32) - output_layer = Linear(32, 32) - decoder_cell = GRUCell(input_size=32, hidden_size=32) - decoder = BeamSearchDecoder(decoder_cell, - start_token=0, - end_token=1, - beam_size=4, - embedding_fn=trg_embeder, - output_fn=output_layer) - + >>> import numpy as np + >>> import paddle + >>> from paddle.nn import BeamSearchDecoder, dynamic_decode + >>> from paddle.nn import GRUCell, Linear, Embedding + >>> trg_embeder = Embedding(100, 32) + >>> output_layer = Linear(32, 32) + >>> decoder_cell = GRUCell(input_size=32, hidden_size=32) + >>> decoder = BeamSearchDecoder(decoder_cell, + ... start_token=0, + ... end_token=1, + ... beam_size=4, + ... embedding_fn=trg_embeder, + ... output_fn=output_layer) + ... """ def __init__( @@ -1054,22 +1054,24 @@ def dynamic_decode( .. code-block:: python - import paddle - from paddle.nn import BeamSearchDecoder, dynamic_decode - from paddle.nn import GRUCell, Linear, Embedding - trg_embeder = Embedding(100, 32) - output_layer = Linear(32, 32) - decoder_cell = GRUCell(input_size=32, hidden_size=32) - decoder = BeamSearchDecoder(decoder_cell, - start_token=0, - end_token=1, - beam_size=4, - embedding_fn=trg_embeder, - output_fn=output_layer) - encoder_output = paddle.ones((4, 8, 32), dtype=paddle.get_default_dtype()) - outputs = dynamic_decode(decoder=decoder, - inits=decoder_cell.get_initial_states(encoder_output), - max_step_num=10) + >>> import paddle + >>> from paddle.nn import BeamSearchDecoder, dynamic_decode + >>> from paddle.nn import GRUCell, Linear, Embedding + >>> trg_embeder = Embedding(100, 32) + >>> output_layer = Linear(32, 32) + >>> decoder_cell = GRUCell(input_size=32, hidden_size=32) + >>> decoder = BeamSearchDecoder(decoder_cell, + ... start_token=0, + ... end_token=1, + ... beam_size=4, + ... embedding_fn=trg_embeder, + ... output_fn=output_layer) + >>> encoder_output = paddle.ones((4, 8, 32), dtype=paddle.get_default_dtype()) + >>> outputs = dynamic_decode(decoder=decoder, + ... inits=decoder_cell.get_initial_states(encoder_output), + ... max_step_num=10) + >>> print(outputs[0].shape) + [4, 11, 4] """ if in_dynamic_mode(): return _dynamic_decode_imperative( diff --git a/python/paddle/nn/layer/loss.py b/python/paddle/nn/layer/loss.py index 15dbeb54c0e0a1b625027f21bd3837bbe55d0e42..944d5a59a4b3e57b8db807c8a938d598fac1e13d 100644 --- a/python/paddle/nn/layer/loss.py +++ b/python/paddle/nn/layer/loss.py @@ -91,15 +91,15 @@ class BCEWithLogitsLoss(Layer): .. code-block:: python - import paddle + >>> import paddle - logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32") - label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32") - bce_logit_loss = paddle.nn.BCEWithLogitsLoss() - output = bce_logit_loss(logit, label) - print(output) - # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # 0.45618814) + >>> logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32") + >>> label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32") + >>> bce_logit_loss = paddle.nn.BCEWithLogitsLoss() + >>> output = bce_logit_loss(logit, label) + >>> print(output) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.45618808) """ @@ -303,50 +303,47 @@ class CrossEntropyLoss(Layer): .. code-block:: python - # hard labels - import paddle - paddle.seed(99999) - N=100 - C=200 - reduction='mean' - input = paddle.rand([N, C], dtype='float64') - label = paddle.randint(0, C, shape=[N], dtype='int64') - weight = paddle.rand([C], dtype='float64') - - cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( - weight=weight, reduction=reduction) - dy_ret = cross_entropy_loss( - input, - label) - print(dy_ret) - # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True, - # 5.34043430) + >>> # hard labels + >>> import paddle + >>> paddle.seed(2023) + >>> N=100 + >>> C=200 + >>> reduction='mean' + >>> input = paddle.rand([N, C], dtype='float64') + >>> label = paddle.randint(0, C, shape=[N], dtype='int64') + >>> weight = paddle.rand([C], dtype='float64') + + >>> cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + ... weight=weight, reduction=reduction) + >>> dy_ret = cross_entropy_loss(input, label) + >>> print(dy_ret) + Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=True, + 5.33697682) .. code-block:: python - # soft labels - import paddle - paddle.seed(99999) - axis = -1 - ignore_index = -100 - N = 4 - C = 3 - shape = [N, C] - reduction='mean' - weight = None - logits = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0) - labels = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0) - labels /= paddle.sum(labels, axis=axis, keepdim=True) - paddle_loss_mean = paddle.nn.functional.cross_entropy( - logits, - labels, - soft_label=True, - axis=axis, - weight=weight, - reduction=reduction) - print(paddle_loss_mean) - # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=True, - # 1.11043464) + >>> # soft labels + >>> import paddle + >>> paddle.seed(2023) + >>> axis = -1 + >>> ignore_index = -100 + >>> N = 4 + >>> C = 3 + >>> shape = [N, C] + >>> reduction='mean' + >>> weight = None + >>> logits = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0) + >>> labels = paddle.uniform(shape, dtype='float64', min=0.1, max=1.0) + >>> labels /= paddle.sum(labels, axis=axis, keepdim=True) + >>> paddle_loss_mean = paddle.nn.functional.cross_entropy(logits, + ... labels, + ... soft_label=True, + ... axis=axis, + ... weight=weight, + ... reduction=reduction) + >>> print(paddle_loss_mean) + Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=True, + 1.14554912) """ @@ -439,21 +436,25 @@ class HSigmoidLoss(Layer): Examples: .. code-block:: python - import paddle - paddle.set_device('cpu') - - input = paddle.uniform([4, 3]) - # [[0.56194401 -0.22450298 -0.10741806] # random - # [0.36136317 0.23556745 0.88748658] # random - # [0.18151939 0.80947340 -0.31078976] # random - # [0.68886101 -0.14239830 -0.41297770]] # random - label = paddle.to_tensor([0, 1, 4, 5]) - m = paddle.nn.HSigmoidLoss(3, 5) - out = m(input, label) - # [[2.42524505] - # [1.74917245] - # [3.14571381] - # [2.34564662]] + >>> import paddle + >>> paddle.set_device('cpu') + >>> paddle.seed(2023) + >>> input = paddle.uniform([4, 3]) + >>> print(input) + Tensor(shape=[4, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[ 0.73167229, 0.04029441, -0.48078126], + [ 0.81050646, -0.15199822, -0.18717426], + [ 0.94041789, 0.48874724, 0.03570259], + [ 0.46585739, 0.95573163, -0.91368192]]) + >>> label = paddle.to_tensor([0, 1, 4, 5]) + >>> m = paddle.nn.HSigmoidLoss(3, 6) + >>> out = m(input, label) + >>> print(out) + Tensor(shape=[4, 1], dtype=float32, place=Place(cpu), stop_gradient=False, + [[1.94512916], + [2.26129627], + [2.36135936], + [2.97453213]]) """ def __init__( @@ -558,13 +559,14 @@ class MSELoss(Layer): .. code-block:: python - import paddle - mse_loss = paddle.nn.loss.MSELoss() - input = paddle.to_tensor([1.5]) - label = paddle.to_tensor([1.7]) - output = mse_loss(input, label) - print(output) - # 0.04000002 + >>> import paddle + >>> mse_loss = paddle.nn.loss.MSELoss() + >>> input = paddle.to_tensor([1.5]) + >>> label = paddle.to_tensor([1.7]) + >>> output = mse_loss(input, label) + >>> print(output) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.04000002) """ @@ -642,29 +644,29 @@ class L1Loss(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - input = paddle.to_tensor([[1.5, 0.8], [0.2, 1.3]]) - label = paddle.to_tensor([[1.7, 1], [0.4, 0.5]]) + >>> input = paddle.to_tensor([[1.5, 0.8], [0.2, 1.3]]) + >>> label = paddle.to_tensor([[1.7, 1], [0.4, 0.5]]) - l1_loss = paddle.nn.L1Loss() - output = l1_loss(input, label) - print(output) - # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # 0.34999999) + >>> l1_loss = paddle.nn.L1Loss() + >>> output = l1_loss(input, label) + >>> print(output) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.34999999) - l1_loss = paddle.nn.L1Loss(reduction='sum') - output = l1_loss(input, label) - print(output) - # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # 1.39999998) + >>> l1_loss = paddle.nn.L1Loss(reduction='sum') + >>> output = l1_loss(input, label) + >>> print(output) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 1.39999998) - l1_loss = paddle.nn.L1Loss(reduction='none') - output = l1_loss(input, label) - print(output) - # Tensor(shape=[2, 2], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [[0.20000005, 0.19999999], - # [0.20000000, 0.79999995]]) + >>> l1_loss = paddle.nn.L1Loss(reduction='none') + >>> output = l1_loss(input, label) + >>> print(output) + Tensor(shape=[2, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[0.20000005, 0.19999999], + [0.20000000, 0.79999995]]) """ @@ -740,15 +742,15 @@ class BCELoss(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - input = paddle.to_tensor([0.5, 0.6, 0.7]) - label = paddle.to_tensor([1.0, 0.0, 1.0]) - bce_loss = paddle.nn.BCELoss() - output = bce_loss(input, label) - print(output) - # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # 0.65537101) + >>> input = paddle.to_tensor([0.5, 0.6, 0.7]) + >>> label = paddle.to_tensor([1.0, 0.0, 1.0]) + >>> bce_loss = paddle.nn.BCELoss() + >>> output = bce_loss(input, label) + >>> print(output) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.65537095) """ @@ -840,20 +842,22 @@ class NLLLoss(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - nll_loss = paddle.nn.loss.NLLLoss() - log_softmax = paddle.nn.LogSoftmax(axis=1) + >>> nll_loss = paddle.nn.loss.NLLLoss() + >>> log_softmax = paddle.nn.LogSoftmax(axis=1) - input = paddle.to_tensor([[0.88103855, 0.9908683 , 0.6226845 ], - [0.53331435, 0.07999352, 0.8549948 ], - [0.25879037, 0.39530203, 0.698465 ], - [0.73427284, 0.63575995, 0.18827209], - [0.05689114, 0.0862954 , 0.6325046 ]], "float32") - log_out = log_softmax(input) - label = paddle.to_tensor([0, 2, 1, 1, 0], "int64") - result = nll_loss(log_out, label) - print(result) # Tensor(shape=[], dtype=float32, place=CPUPlace, stop_gradient=True, 1.07202101) + >>> input = paddle.to_tensor([[0.88103855, 0.9908683 , 0.6226845 ], + ... [0.53331435, 0.07999352, 0.8549948 ], + ... [0.25879037, 0.39530203, 0.698465 ], + ... [0.73427284, 0.63575995, 0.18827209], + ... [0.05689114, 0.0862954 , 0.6325046 ]], "float32") + >>> log_out = log_softmax(input) + >>> label = paddle.to_tensor([0, 2, 1, 1, 0], "int64") + >>> result = nll_loss(log_out, label) + >>> print(result) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 1.07202101) """ @@ -929,12 +933,15 @@ class PoissonNLLLoss(Layer): Examples: .. code-block:: python - import paddle - - poisson_nll_loss = paddle.nn.loss.PoissonNLLLoss() - input = paddle.randn([5, 2], dtype=paddle.float32) - label = paddle.randn([5, 2], dtype=paddle.float32) - loss = poisson_nll_loss(input, label) + >>> import paddle + >>> paddle.seed(2023) + >>> poisson_nll_loss = paddle.nn.loss.PoissonNLLLoss() + >>> input = paddle.randn([5, 2], dtype=paddle.float32) + >>> label = paddle.randn([5, 2], dtype=paddle.float32) + >>> loss = poisson_nll_loss(input, label) + >>> print(loss) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 1.52983975) """ @@ -1017,32 +1024,36 @@ class KLDivLoss(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn - - shape = (5, 20) - x = paddle.uniform(shape, min=-10, max=10).astype('float32') - target = paddle.uniform(shape, min=-10, max=10).astype('float32') - - # 'batchmean' reduction, loss shape will be [] - kldiv_criterion = nn.KLDivLoss(reduction='batchmean') - pred_loss = kldiv_criterion(x, target) - # shape=[] - - # 'mean' reduction, loss shape will be [] - kldiv_criterion = nn.KLDivLoss(reduction='mean') - pred_loss = kldiv_criterion(x, target) - # shape=[] - - # 'sum' reduction, loss shape will be [] - kldiv_criterion = nn.KLDivLoss(reduction='sum') - pred_loss = kldiv_criterion(x, target) - # shape=[] - - # 'none' reduction, loss shape is same with X shape - kldiv_criterion = nn.KLDivLoss(reduction='none') - pred_loss = kldiv_criterion(x, target) - # shape=[5, 20] + >>> import paddle + >>> import paddle.nn as nn + + >>> shape = (5, 20) + >>> x = paddle.uniform(shape, min=-10, max=10).astype('float32') + >>> target = paddle.uniform(shape, min=-10, max=10).astype('float32') + + >>> # 'batchmean' reduction, loss shape will be [] + >>> kldiv_criterion = nn.KLDivLoss(reduction='batchmean') + >>> pred_loss = kldiv_criterion(x, target) + >>> print(pred_loss.shape) + [] + + >>> # 'mean' reduction, loss shape will be [] + >>> kldiv_criterion = nn.KLDivLoss(reduction='mean') + >>> pred_loss = kldiv_criterion(x, target) + >>> print(pred_loss.shape) + [] + + >>> # 'sum' reduction, loss shape will be [] + >>> kldiv_criterion = nn.KLDivLoss(reduction='sum') + >>> pred_loss = kldiv_criterion(x, target) + >>> print(pred_loss.shape) + [] + + >>> # 'none' reduction, loss shape is same with X shape + >>> kldiv_criterion = nn.KLDivLoss(reduction='none') + >>> pred_loss = kldiv_criterion(x, target) + >>> print(pred_loss.shape) + [5, 20] """ @@ -1099,16 +1110,16 @@ class MarginRankingLoss(Layer): .. code-block:: python - import paddle - - input = paddle.to_tensor([[1, 2], [3, 4]], dtype="float32") - other = paddle.to_tensor([[2, 1], [2, 4]], dtype="float32") - label = paddle.to_tensor([[1, -1], [-1, -1]], dtype="float32") - margin_rank_loss = paddle.nn.MarginRankingLoss() - loss = margin_rank_loss(input, other, label) + >>> import paddle - print(loss) - # 0.75 + >>> input = paddle.to_tensor([[1, 2], [3, 4]], dtype="float32") + >>> other = paddle.to_tensor([[2, 1], [2, 4]], dtype="float32") + >>> label = paddle.to_tensor([[1, -1], [-1, -1]], dtype="float32") + >>> margin_rank_loss = paddle.nn.MarginRankingLoss() + >>> loss = margin_rank_loss(input, other, label) + >>> print(loss) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.75000000) """ def __init__(self, margin=0.0, reduction='mean', name=None): @@ -1155,50 +1166,41 @@ class CTCLoss(Layer): .. code-block:: python - # declarative mode - import paddle - - # length of the longest logit sequence - max_seq_length = 4 - #length of the longest label sequence - max_label_length = 3 - # number of logit sequences - batch_size = 2 - # class num - class_num = 3 - - log_probs = paddle.to_tensor([[[4.17021990e-01, 7.20324516e-01, 1.14374816e-04], - [3.02332580e-01, 1.46755889e-01, 9.23385918e-02]], - - [[1.86260208e-01, 3.45560730e-01, 3.96767467e-01], - [5.38816750e-01, 4.19194520e-01, 6.85219526e-01]], - - [[2.04452246e-01, 8.78117442e-01, 2.73875929e-02], - [6.70467496e-01, 4.17304814e-01, 5.58689833e-01]], - - [[1.40386939e-01, 1.98101491e-01, 8.00744593e-01], - [9.68261600e-01, 3.13424170e-01, 6.92322612e-01]], - - [[8.76389146e-01, 8.94606650e-01, 8.50442126e-02], - [3.90547849e-02, 1.69830427e-01, 8.78142476e-01]]], dtype="float32") - labels = paddle.to_tensor([[1, 2, 2], - [1, 2, 2]], dtype="int32") - input_lengths = paddle.to_tensor([5, 5], dtype="int64") - label_lengths = paddle.to_tensor([3, 3], dtype="int64") - - loss = paddle.nn.CTCLoss(blank=0, reduction='none')(log_probs, labels, - input_lengths, - label_lengths) - print(loss) - # Tensor(shape=[2], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # [3.91798496, 2.90765190]) - - loss = paddle.nn.CTCLoss(blank=0, reduction='mean')(log_probs, labels, - input_lengths, - label_lengths) - print(loss) - # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # 1.13760614) + >>> # declarative mode + >>> import paddle + + >>> # length of the longest logit sequence + >>> max_seq_length = 4 + >>> #length of the longest label sequence + >>> max_label_length = 3 + >>> # number of logit sequences + >>> batch_size = 2 + >>> # class num + >>> class_num = 3 + + >>> log_probs = paddle.to_tensor([[[4.17021990e-01, 7.20324516e-01, 1.14374816e-04], + ... [3.02332580e-01, 1.46755889e-01, 9.23385918e-02]], + ... [[1.86260208e-01, 3.45560730e-01, 3.96767467e-01], + ... [5.38816750e-01, 4.19194520e-01, 6.85219526e-01]], + ... [[2.04452246e-01, 8.78117442e-01, 2.73875929e-02], + ... [6.70467496e-01, 4.17304814e-01, 5.58689833e-01]], + ... [[1.40386939e-01, 1.98101491e-01, 8.00744593e-01], + ... [9.68261600e-01, 3.13424170e-01, 6.92322612e-01]], + ... [[8.76389146e-01, 8.94606650e-01, 8.50442126e-02], + ... [3.90547849e-02, 1.69830427e-01, 8.78142476e-01]]], dtype="float32") + >>> labels = paddle.to_tensor([[1, 2, 2], [1, 2, 2]], dtype="int32") + >>> input_lengths = paddle.to_tensor([5, 5], dtype="int64") + >>> label_lengths = paddle.to_tensor([3, 3], dtype="int64") + + >>> loss = paddle.nn.CTCLoss(blank=0, reduction='none')(log_probs, labels, input_lengths, label_lengths) + >>> print(loss) + Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True, + [3.91798496, 2.90765214]) + + >>> loss = paddle.nn.CTCLoss(blank=0, reduction='mean')(log_probs, labels, input_lengths, label_lengths) + >>> print(loss) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 1.13760614) """ def __init__(self, blank=0, reduction='mean'): @@ -1247,33 +1249,33 @@ class RNNTLoss(Layer): Examples: .. code-block:: python - # declarative mode - import numpy as np - import paddle - from paddle.nn import RNNTLoss - - fn = RNNTLoss(reduction='sum', fastemit_lambda=0.0) - - acts = np.array([[[[0.1, 0.6, 0.1, 0.1, 0.1], - [0.1, 0.1, 0.6, 0.1, 0.1], - [0.1, 0.1, 0.2, 0.8, 0.1]], - [[0.1, 0.6, 0.1, 0.1, 0.1], - [0.1, 0.1, 0.2, 0.1, 0.1], - [0.7, 0.1, 0.2, 0.1, 0.1]]]]) - labels = [[1, 2]] - - acts = paddle.to_tensor(acts, stop_gradient=False) - - lengths = [acts.shape[1]] * acts.shape[0] - label_lengths = [len(l) for l in labels] - labels = paddle.to_tensor(labels, paddle.int32) - lengths = paddle.to_tensor(lengths, paddle.int32) - label_lengths = paddle.to_tensor(label_lengths, paddle.int32) - - costs = fn(acts, labels, lengths, label_lengths) - print(costs) - # Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=False, - # 4.49566677) + >>> # declarative mode + >>> import numpy as np + >>> import paddle + >>> from paddle.nn import RNNTLoss + + >>> fn = RNNTLoss(reduction='sum', fastemit_lambda=0.0) + + >>> acts = np.array([[[[0.1, 0.6, 0.1, 0.1, 0.1], + ... [0.1, 0.1, 0.6, 0.1, 0.1], + ... [0.1, 0.1, 0.2, 0.8, 0.1]], + ... [[0.1, 0.6, 0.1, 0.1, 0.1], + ... [0.1, 0.1, 0.2, 0.1, 0.1], + ... [0.7, 0.1, 0.2, 0.1, 0.1]]]]) + >>> labels = [[1, 2]] + + >>> acts = paddle.to_tensor(acts, stop_gradient=False) + + >>> lengths = [acts.shape[1]] * acts.shape[0] + >>> label_lengths = [len(l) for l in labels] + >>> labels = paddle.to_tensor(labels, paddle.int32) + >>> lengths = paddle.to_tensor(lengths, paddle.int32) + >>> label_lengths = paddle.to_tensor(label_lengths, paddle.int32) + + >>> costs = fn(acts, labels, lengths, label_lengths) + >>> print(costs) + Tensor(shape=[], dtype=float64, place=Place(cpu), stop_gradient=False, + -2.85042444) """ def __init__( @@ -1346,13 +1348,15 @@ class SmoothL1Loss(Layer): Examples: .. code-block:: python - import paddle - input = paddle.rand([3, 3]).astype("float32") - label = paddle.rand([3, 3]).astype("float32") - loss = paddle.nn.SmoothL1Loss() - output = loss(input, label) - print(output) - # 0.049606 + >>> import paddle + >>> paddle.seed(2023) + >>> input = paddle.rand([3, 3]).astype("float32") + >>> label = paddle.rand([3, 3]).astype("float32") + >>> loss = paddle.nn.SmoothL1Loss() + >>> output = loss(input, label) + >>> print(output) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.08307374) """ def __init__(self, reduction='mean', delta=1.0, name=None): @@ -1414,21 +1418,23 @@ class MultiLabelSoftMarginLoss(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32) - label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32) + >>> input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32) + >>> label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32) - multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='none') - loss = multi_label_soft_margin_loss(input, label) - print(loss) - # Tensor([3.49625897, 0.71111226, 0.43989015]) + >>> multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='none') + >>> loss = multi_label_soft_margin_loss(input, label) + >>> print(loss) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [3.49625897, 0.71111226, 0.43989015]) - multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='mean') - loss = multi_label_soft_margin_loss(input, label) - print(loss) - # Tensor(1.54908717) + >>> multi_label_soft_margin_loss = nn.MultiLabelSoftMarginLoss(reduction='mean') + >>> loss = multi_label_soft_margin_loss(input, label) + >>> print(loss) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 1.54908717) """ def __init__(self, weight=None, reduction="mean", name=None): @@ -1512,24 +1518,26 @@ class HingeEmbeddingLoss(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn - - input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32) - # label elements in {1., -1.} - label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32) - - hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='none') - loss = hinge_embedding_loss(input, label) - print(loss) - # Tensor([[0., -2., 0.], - # [0., -1., 2.], - # [1., 1., 1.]]) - - hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='mean') - loss = hinge_embedding_loss(input, label) - print(loss) - # Tensor(0.22222222) + >>> import paddle + >>> import paddle.nn as nn + + >>> input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32) + >>> # label elements in {1., -1.} + >>> label = paddle.to_tensor([[-1, 1, -1], [1, 1, 1], [1, -1, 1]], dtype=paddle.float32) + + >>> hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='none') + >>> loss = hinge_embedding_loss(input, label) + >>> print(loss) + Tensor(shape=[3, 3], dtype=float32, place=Place(cpu), stop_gradient=True, + [[ 0., -2., 0.], + [ 0., -1., 2.], + [ 1., 1., 1.]]) + + >>> hinge_embedding_loss = nn.HingeEmbeddingLoss(margin=1.0, reduction='mean') + >>> loss = hinge_embedding_loss(input, label) + >>> print(loss) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.22222222) """ def __init__(self, margin=1.0, reduction="mean", name=None): @@ -1595,23 +1603,29 @@ class CosineEmbeddingLoss(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - input1 = paddle.to_tensor([[1.6, 1.2, -0.5], [3.2, 2.6, -5.8]], 'float32') - input2 = paddle.to_tensor([[0.5, 0.5, -1.8], [2.3, -1.4, 1.1]], 'float32') - label = paddle.to_tensor([1, -1], 'int64') + >>> input1 = paddle.to_tensor([[1.6, 1.2, -0.5], [3.2, 2.6, -5.8]], 'float32') + >>> input2 = paddle.to_tensor([[0.5, 0.5, -1.8], [2.3, -1.4, 1.1]], 'float32') + >>> label = paddle.to_tensor([1, -1], 'int64') - cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='mean') - output = cosine_embedding_loss(input1, input2, label) - print(output) # 0.21155193 + >>> cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='mean') + >>> output = cosine_embedding_loss(input1, input2, label) + >>> print(output) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.21155193) - cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='sum') - output = cosine_embedding_loss(input1, input2, label) - print(output) # 0.42310387 + >>> cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='sum') + >>> output = cosine_embedding_loss(input1, input2, label) + >>> print(output) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.42310387) - cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='none') - output = cosine_embedding_loss(input1, input2, label) - print(output) # [0.42310387, 0. ] + >>> cosine_embedding_loss = paddle.nn.CosineEmbeddingLoss(margin=0.5, reduction='none') + >>> output = cosine_embedding_loss(input1, input2, label) + >>> print(output) + Tensor(shape=[2], dtype=float32, place=Place(cpu), stop_gradient=True, + [0.42310387, 0. ]) """ @@ -1703,21 +1717,23 @@ class TripletMarginWithDistanceLoss(Layer): Examples: .. code-block:: python - import paddle - from paddle.nn import TripletMarginWithDistanceLoss + >>> import paddle + >>> from paddle.nn import TripletMarginWithDistanceLoss - input = paddle.to_tensor([[1, 5, 3], [0, 3, 2], [1, 4, 1]], dtype=paddle.float32) - positive= paddle.to_tensor([[5, 1, 2], [3, 2, 1], [3, -1, 1]], dtype=paddle.float32) - negative = paddle.to_tensor([[2, 1, -3], [1, 1, -1], [4, -2, 1]], dtype=paddle.float32) - triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='none') - loss = triplet_margin_with_distance_loss(input, positive, negative,) - print(loss) - # Tensor([0. , 0.57496738, 0. ]) + >>> input = paddle.to_tensor([[1, 5, 3], [0, 3, 2], [1, 4, 1]], dtype=paddle.float32) + >>> positive= paddle.to_tensor([[5, 1, 2], [3, 2, 1], [3, -1, 1]], dtype=paddle.float32) + >>> negative = paddle.to_tensor([[2, 1, -3], [1, 1, -1], [4, -2, 1]], dtype=paddle.float32) + >>> triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='none') + >>> loss = triplet_margin_with_distance_loss(input, positive, negative,) + >>> print(loss) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [0. , 0.57496595, 0. ]) - triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='mean') - loss = triplet_margin_with_distance_loss(input, positive, negative,) - print(loss) - # Tensor(0.19165580) + >>> triplet_margin_with_distance_loss = TripletMarginWithDistanceLoss(reduction='mean') + >>> loss = triplet_margin_with_distance_loss(input, positive, negative,) + >>> print(loss) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.19165532) """ @@ -1812,20 +1828,22 @@ class TripletMarginLoss(Layer): Examples: .. code-block:: python - import paddle + >>> import paddle - input = paddle.to_tensor([[1, 5, 3], [0, 3, 2], [1, 4, 1]], dtype=paddle.float32) - positive= paddle.to_tensor([[5, 1, 2], [3, 2, 1], [3, -1, 1]], dtype=paddle.float32) - negative = paddle.to_tensor([[2, 1, -3], [1, 1, -1], [4, -2, 1]], dtype=paddle.float32) - triplet_margin_loss = paddle.nn.TripletMarginLoss(reduction='none') - loss = triplet_margin_loss(input, positive, negative) - print(loss) - # Tensor([0. , 0.57496738, 0. ]) + >>> input = paddle.to_tensor([[1, 5, 3], [0, 3, 2], [1, 4, 1]], dtype=paddle.float32) + >>> positive= paddle.to_tensor([[5, 1, 2], [3, 2, 1], [3, -1, 1]], dtype=paddle.float32) + >>> negative = paddle.to_tensor([[2, 1, -3], [1, 1, -1], [4, -2, 1]], dtype=paddle.float32) + >>> triplet_margin_loss = paddle.nn.TripletMarginLoss(reduction='none') + >>> loss = triplet_margin_loss(input, positive, negative) + >>> print(loss) + Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True, + [0. , 0.57496595, 0. ]) - triplet_margin_loss = paddle.nn.TripletMarginLoss(margin=1.0, swap=True, reduction='mean', ) - loss = triplet_margin_loss(input, positive, negative,) - print(loss) - # Tensor(0.19165580) + >>> triplet_margin_loss = paddle.nn.TripletMarginLoss(margin=1.0, swap=True, reduction='mean') + >>> loss = triplet_margin_loss(input, positive, negative) + >>> print(loss) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 2.40039468) """ @@ -1924,15 +1942,17 @@ class MultiMarginLoss(Layer): Examples: .. code-block:: python - import paddle - import paddle.nn as nn + >>> import paddle + >>> import paddle.nn as nn - input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32) - label = paddle.to_tensor([0, 1, 2], dtype=paddle.int32) + >>> input = paddle.to_tensor([[1, -2, 3], [0, -1, 2], [1, 0, 1]], dtype=paddle.float32) + >>> label = paddle.to_tensor([0, 1, 2], dtype=paddle.int32) - multi_margin_loss = nn.MultiMarginLoss(reduction='mean') - loss = multi_margin_loss(input, label) - print(loss) + >>> multi_margin_loss = nn.MultiMarginLoss(reduction='mean') + >>> loss = multi_margin_loss(input, label) + >>> print(loss) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 1.11111104) """ def __init__( @@ -2003,31 +2023,30 @@ class SoftMarginLoss(Layer): Examples: .. code-block:: python - import paddle - - input = paddle.to_tensor([[0.5, 0.6, 0.7],[0.3, 0.5, 0.2]], 'float32') - label = paddle.to_tensor([[1.0, -1.0, 1.0],[-1.0, 1.0, 1.0]], 'float32') - soft_margin_loss = paddle.nn.SoftMarginLoss() - output = soft_margin_loss(input, label) - print(output) - # Tensor(shape=[], dtype=float32, place=Place(gpu:0), stop_gradient=True, - # 0.64022040) - - input_np = paddle.uniform(shape=(5, 5), min=0.1, max=0.8, dtype="float64") - label_np = paddle.randint(high=2, shape=(5, 5), dtype="int64") - label_np[label_np==0]=-1 - input = paddle.to_tensor(input_np) - label = paddle.to_tensor(label_np) - soft_margin_loss = paddle.nn.SoftMarginLoss(reduction='none') - output = soft_margin_loss(input, label) - print(output) - # Tensor(shape=[5, 5], dtype=float64, place=Place(gpu:0), stop_gradient=True, - # [[0.61739663, 0.51405668, 1.09346100, 0.42385561, 0.91602303], - # [0.76997038, 1.01977148, 0.98971722, 1.13976032, 0.88152088], - # [0.55476735, 1.10505384, 0.89923519, 0.45018155, 1.06587511], - # [0.37998142, 0.48067240, 0.47791212, 0.55664053, 0.98581399], - # [0.78571653, 0.59319711, 0.39701841, 0.76172109, 0.83781742]]) - + >>> import paddle + >>> paddle.seed(2023) + >>> input = paddle.to_tensor([[0.5, 0.6, 0.7],[0.3, 0.5, 0.2]], 'float32') + >>> label = paddle.to_tensor([[1.0, -1.0, 1.0],[-1.0, 1.0, 1.0]], 'float32') + >>> soft_margin_loss = paddle.nn.SoftMarginLoss() + >>> output = soft_margin_loss(input, label) + >>> print(output) + Tensor(shape=[], dtype=float32, place=Place(cpu), stop_gradient=True, + 0.64022040) + + >>> input_np = paddle.uniform(shape=(5, 5), min=0.1, max=0.8, dtype="float64") + >>> label_np = paddle.randint(high=2, shape=(5, 5), dtype="int64") + >>> label_np[label_np==0]=-1 + >>> input = paddle.to_tensor(input_np) + >>> label = paddle.to_tensor(label_np) + >>> soft_margin_loss = paddle.nn.SoftMarginLoss(reduction='none') + >>> output = soft_margin_loss(input, label) + >>> print(output) + Tensor(shape=[5, 5], dtype=float64, place=Place(cpu), stop_gradient=True, + [[1.10725628, 0.48778139, 0.56217249, 1.12581404, 0.51430043], + [0.90375795, 0.37761249, 0.43007557, 0.95089798, 0.43288319], + [1.16043599, 0.63015939, 0.51362715, 0.43617541, 0.57783301], + [0.81927846, 0.52558369, 0.59713908, 0.83100696, 0.50811616], + [0.82684205, 1.02064907, 0.50296995, 1.13461733, 0.93222519]]) """ def __init__(self, reduction='mean', name=None): @@ -2100,16 +2119,23 @@ class GaussianNLLLoss(Layer): Examples:: .. code-block:: python - import paddle - import paddle.nn as nn - - input = paddle.randn([5, 2], dtype=paddle.float32) - label = paddle.randn([5, 2], dtype=paddle.float32) - variance = paddle.ones([5, 2], dtype=paddle.float32) - - gs_nll_loss = nn.GaussianNLLLoss(full=False, epsilon=1e-6, reduction='none') - loss = gs_nll_loss(input, label, variance) - print(loss) + >>> import paddle + >>> import paddle.nn as nn + >>> paddle.seed(2023) + + >>> input = paddle.randn([5, 2], dtype=paddle.float32) + >>> label = paddle.randn([5, 2], dtype=paddle.float32) + >>> variance = paddle.ones([5, 2], dtype=paddle.float32) + + >>> gs_nll_loss = nn.GaussianNLLLoss(full=False, epsilon=1e-6, reduction='none') + >>> loss = gs_nll_loss(input, label, variance) + >>> print(loss) + Tensor(shape=[5, 2], dtype=float32, place=Place(cpu), stop_gradient=True, + [[0.21808575, 1.43013096], + [1.05245590, 0.00394560], + [1.20861185, 0.00000062], + [0.56946373, 0.73300570], + [0.37142906, 0.12038800]]) Note: The clamping of ``variance`` is ignored with respect to autograd, and so the diff --git a/python/paddle/nn/quant/format.py b/python/paddle/nn/quant/format.py index 0e94ecef9f63380615655d391bc2f949dbc7e9ae..62694be27decbd9551586c800ece3232f99ee423 100644 --- a/python/paddle/nn/quant/format.py +++ b/python/paddle/nn/quant/format.py @@ -149,29 +149,31 @@ class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta): It defines some functions to convert quantizers and observers to quantize or dequantize operators that maintain the quantization parameters used during inference. + Examples: - .. code-block:: python - - # Given codes in ./customized_quanter.py - class CustomizedQuantedLayer(ConvertibleQuantedLayer): - def __init__(self): - super().__init__() - self.weight_a = paddle.create_parameter(shape=[1], dtype='float32') - self.weight_b = paddle.create_parameter(shape=[1], dtype='float32') - self.quanter_for_weight_a = None - self.activation_weight = None - def forward(self, input): - qweight_a = self.quanter_for_weight_a(self.weight_a) - weight_b = self.weight_b - qinput = self.activation_weight(input) - // compute with qweight_a, weight_b and qinput. - return qweight * qinput + weight_b - - def weights_to_quanters(self): - return [('weight_a', 'quanter_for_weight_a')] - - def activation_quanters(self): - return ['activation_weight'] + .. code-block:: python + + >>> # Given codes in ./customized_quanter.py + >>> class CustomizedQuantedLayer(ConvertibleQuantedLayer): + ... def __init__(self): + ... super().__init__() + ... self.weight_a = paddle.create_parameter(shape=[1], dtype='float32') + ... self.weight_b = paddle.create_parameter(shape=[1], dtype='float32') + ... self.quanter_for_weight_a = None + ... self.activation_weight = None + ... + ... def forward(self, input): + ... qweight_a = self.quanter_for_weight_a(self.weight_a) + ... weight_b = self.weight_b + ... qinput = self.activation_weight(input) + ... # compute with qweight_a, weight_b and qinput. + ... return qweight * qinput + weight_b + ... + ... def weights_to_quanters(self): + ... return [('weight_a', 'quanter_for_weight_a')] + ... + ... def activation_quanters(self): + ... return ['activation_weight'] """ def __init__(self):