未验证 提交 d983fc34 编写于 作者: Y yuehuayingxueluo 提交者: GitHub

clear fluid api: warpctc, nce, identity_loss (#48142)

* clear fluid api: warpctc, nce, identity_loss

* fix test_layers.py __init__.py

* fix loss.py

* change __init__.py and api calling method

* fix nce

* fix nce

* fix fluid.data

* delete warpctc api document

* fix loss.py

* fix ctc_loss

* fix test_warpctc_op.py

* fix test_layers.py

* fix some bug

* fix conflict

* fix ci bug

* Empty Commit test=allcase

* fix ci bug
上级 b5cd67e7
...@@ -37,8 +37,6 @@ from paddle import _C_ops, _legacy_C_ops ...@@ -37,8 +37,6 @@ from paddle import _C_ops, _legacy_C_ops
__all__ = [ __all__ = [
'cross_entropy', 'cross_entropy',
'square_error_cost', 'square_error_cost',
'warpctc',
'nce',
'softmax_with_cross_entropy', 'softmax_with_cross_entropy',
'sigmoid_cross_entropy_with_logits', 'sigmoid_cross_entropy_with_logits',
] ]
...@@ -182,416 +180,6 @@ def square_error_cost(input, label): ...@@ -182,416 +180,6 @@ def square_error_cost(input, label):
return paddle.nn.functional.square_error_cost(input, label) return paddle.nn.functional.square_error_cost(input, label)
def warpctc(
input,
label,
blank=0,
norm_by_times=False,
input_length=None,
label_length=None,
):
"""
An operator integrating the open source Warp-CTC library
(https://github.com/baidu-research/warp-ctc)
to compute Connectionist Temporal Classification (CTC) loss.
It can be aliased as softmax with CTC, since a native softmax activation is
interated to the Warp-CTC library to normalize values for each row of the
input tensor.
Args:
input (Variable): The unscaled probabilities of variable-length sequences,
which is a 2-D Tensor with LoD information, or a 3-D Tensor without Lod
information. When it is a 2-D LodTensor, its shape is
`[Lp, num_classes + 1]`, where `Lp` is the sum of all input
sequences' length and `num_classes` is the true number of classes.
(not including the blank label). When it is a 3-D Tensor, its shape
is `[max_logit_length, batch_size, num_classes + 1]`,
where `max_logit_length` is the longest length of
input logit sequence. The data type should be float32 or float64.
label (Variable): The ground truth of variable-length sequence,
which must be a 2-D Tensor with LoD information or a 3-D Tensor without
LoD information, needs to be consistent with the coressponding input.
When it is a 2-D LoDTensor, its shape is `[Lg, 1]`, where `Lg` is the sum
of all labels' length. When it is a 3-D Tensor, its shape is
`[batch_size, max_label_length]`, where `max_label_length` is the longest
length of label sequence. Data type must be int32.
blank (int, default 0): The blank label index of Connectionist
Temporal Classification (CTC) loss, which is in the
half-opened interval `[0, num_classes + 1)`. The data type must be int32.
norm_by_times(bool, default false): Whether to normalize the gradients
by the number of time-step, which is also the sequence's length.
There is no need to normalize the gradients if warpctc layer was
followed by a mean_op.
input_length(Variable): The length for each input sequence if it is
of Tensor type, it should have shape `[batch_size]` and dtype int64.
label_length(Variable): The length for each label sequence if it is
of Tensor type, it should have shape `[batch_size]` and dtype int64.
Returns:
Variable: The Connectionist Temporal Classification (CTC) loss,
which is a 2-D Tensor with the shape `[batch_size, 1]`.
The date type is the same as input.
Examples:
.. code-block:: python
# using LoDTensor
import paddle
import paddle.fluid as fluid
import numpy as np
# lengths of logit sequences
seq_lens = [2,6]
# lengths of label sequences
label_lens = [2,3]
# class num
class_num = 5
paddle.enable_static()
logits = fluid.data(name='logits',shape=[None, class_num+1],
dtype='float32',lod_level=1)
label = fluid.data(name='label', shape=[None, 1],
dtype='int32', lod_level=1)
cost = fluid.layers.warpctc(input=logits, label=label)
place = fluid.CPUPlace()
x = fluid.create_lod_tensor(
np.random.rand(np.sum(seq_lens), class_num+1).astype("float32"),
[seq_lens], place)
y = fluid.create_lod_tensor(
np.random.randint(0, class_num, [np.sum(label_lens), 1]).astype("int32"),
[label_lens], place)
exe = fluid.Executor(place)
output= exe.run(fluid.default_main_program(),
feed={"logits": x,"label": y},
fetch_list=[cost.name])
print(output)
.. code-block:: python
# using Tensor
import paddle
import paddle.fluid as fluid
import numpy as np
# length of the longest logit sequence
max_seq_length = 5
#length of the longest label sequence
max_label_length = 3
# number of logit sequences
batch_size = 16
# class num
class_num = 5
paddle.enable_static()
logits = fluid.data(name='logits',
shape=[max_seq_length, batch_size, class_num+1],
dtype='float32')
logits_length = fluid.data(name='logits_length', shape=[None],
dtype='int64')
label = fluid.data(name='label', shape=[batch_size, max_label_length],
dtype='int32')
label_length = fluid.data(name='labels_length', shape=[None],
dtype='int64')
cost = fluid.layers.warpctc(input=logits, label=label,
input_length=logits_length,
label_length=label_length)
place = fluid.CPUPlace()
x = np.random.rand(max_seq_length, batch_size, class_num+1).astype("float32")
y = np.random.randint(0, class_num, [batch_size, max_label_length]).astype("int32")
exe = fluid.Executor(place)
output= exe.run(fluid.default_main_program(),
feed={"logits": x,
"label": y,
"logits_length": np.array([max_seq_length]*batch_size).astype("int64"),
"labels_length": np.array([max_label_length]*batch_size).astype("int64")},
fetch_list=[cost.name])
print(output)
"""
if in_dygraph_mode():
if input_length is None or label_length is None:
raise ValueError(
"input_length and label_length must not be None in dygraph mode!"
)
loss_out = _C_ops.warpctc(
input, label, input_length, label_length, blank, norm_by_times
)
return loss_out
if _non_static_mode():
if input_length is None or label_length is None:
raise ValueError(
"input_length and label_length must not be None in dygraph mode!"
)
grad, loss_out = _legacy_C_ops.warpctc(
input,
label,
input_length,
label_length,
'blank',
blank,
'norm_by_times',
norm_by_times,
)
return loss_out
helper = LayerHelper('warpctc', **locals())
check_variable_and_dtype(input, 'input', ['float32', 'float64'], "warpctc")
check_variable_and_dtype(label, 'label', ['int32'], "warpctc")
this_inputs = {'Logits': [input], 'Label': [label]}
if input_length is not None and label_length is not None:
check_variable_and_dtype(
input_length, 'LogitsLength', ['int64'], "warpctc"
)
check_variable_and_dtype(
label_length, 'LabelLength', ['int64'], "warpctc"
)
this_inputs['LogitsLength'] = [input_length]
this_inputs['LabelLength'] = [label_length]
loss_out = helper.create_variable_for_type_inference(dtype=input.dtype)
grad_out = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op(
type='warpctc',
inputs=this_inputs,
outputs={'WarpCTCGrad': [grad_out], 'Loss': [loss_out]},
attrs={
'blank': blank,
'norm_by_times': norm_by_times,
},
)
return loss_out
# FIXME(wuyi): let docstring_checker.py understand @autodoc.
# For now, the comments in c++ use types like Tensor, but in python side
# the type is often "Variable", and arguments may vary.
@static_only
@templatedoc(op_type="nce")
def nce(
input,
label,
num_total_classes,
sample_weight=None,
param_attr=None,
bias_attr=None,
num_neg_samples=None,
name=None,
sampler="uniform",
custom_dist=None,
seed=0,
is_sparse=False,
):
"""
:api_attr: Static Graph
${comment}
Args:
input (Tensor): Input tensor, 2-D tensor with shape [batch_size, dim],
and data type is float32 or float64.
label (Tensor): Input label, 2-D tensor with shape [batch_size, num_true_class],
and data type is int64.
num_total_classes (int):${num_total_classes_comment}.
sample_weight (Tensor|None): A Tensor of shape [batch_size, 1]
storing a weight for each sample. The default weight for each
sample is 1.0.
param_attr (ParamAttr|None): To specify the weight parameter attribute.
Default: None, which means the default weight parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` .
bias_attr (ParamAttr|None): To specify the bias parameter attribute.
Default: None, which means the default bias parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` .
num_neg_samples (int): ${num_neg_samples_comment}.
name(str|None): For detailed information, please refer to
:ref:`api_guide_Name` . Usually name is no need to set and None by default.
sampler (str, optional): The sampler used to sample class from negative classes.
It can be 'uniform', 'log_uniform' or 'custom_dist'.
default: 'uniform'.
custom_dist (nd.array|None): A numpy ndarray with size=num_total_classes.
It is used when sampler is set to 'custom_dist'.
custom_dist[i] is the probability of i-th class to be sampled.
default: None.
seed (int, optional): The seed used in sampler. Default 0, means no random seed.
is_sparse(bool, optional): The flag indicating whether to use sparse update,
the weight@GRAD and bias@GRAD will be changed to SelectedRows. Default False.
Returns:
Tensor: The output nce loss.
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.enable_static()
window_size = 5
words = []
for i in range(window_size):
words.append(paddle.static.data(
name='word_{0}'.format(i), shape=[-1, 1], dtype='int64'))
dict_size = 10000
label_word = int(window_size / 2) + 1
embs = []
for i in range(window_size):
if i == label_word:
continue
emb = paddle.static.nn.embedding(input=words[i], size=[dict_size, 32],
param_attr='embed', is_sparse=True)
embs.append(emb)
embs = paddle.concat(x=embs, axis=1)
loss = paddle.static.nn.nce(input=embs, label=words[label_word],
num_total_classes=dict_size, param_attr='nce.w_0',
bias_attr='nce.b_0')
#or use custom distribution
dist = np.array([0.05,0.5,0.1,0.3,0.05])
loss = paddle.static.nn.nce(input=embs, label=words[label_word],
num_total_classes=5, param_attr='nce.w_1',
bias_attr='nce.b_1',
num_neg_samples=3,
sampler="custom_dist",
custom_dist=dist)
"""
helper = LayerHelper('nce', **locals())
check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'nce')
check_variable_and_dtype(label, 'label', ['int64'], 'nce')
dim = input.shape[1]
num_true_class = label.shape[1]
w = helper.create_parameter(
attr=helper.param_attr,
shape=[num_total_classes, dim],
is_bias=False,
dtype=input.dtype,
)
inputs = {}
if helper.bias_attr:
b = helper.create_parameter(
attr=helper.bias_attr,
shape=[num_total_classes, 1],
is_bias=True,
dtype=input.dtype,
)
inputs['Bias'] = b
cost = helper.create_variable_for_type_inference(dtype=input.dtype)
sample_logits = helper.create_variable_for_type_inference(dtype=input.dtype)
sample_labels = helper.create_variable_for_type_inference(dtype=label.dtype)
inputs['Input'] = input
inputs['Label'] = label
inputs['Weight'] = w
inputs['SampleWeight'] = sample_weight if sample_weight is not None else []
if sampler == "uniform":
sampler = 0
elif sampler == "log_uniform":
sampler = 1
elif sampler == "custom_dist":
assert custom_dist is not None
custom_dist_len = num_total_classes
alias_probs_ = [0] * custom_dist_len
alias_ = [0] * custom_dist_len
bigs = []
littles = []
for i in range(custom_dist_len):
normal_prob = custom_dist[i] * custom_dist_len
if normal_prob - 1.0 > 0:
bigs.append((i, normal_prob))
elif 1.0 - normal_prob > 0:
littles.append((i, normal_prob))
else:
alias_probs_[i] = normal_prob
alias_[i] = -1
while len(bigs) and len(littles):
big = bigs.pop(0)
little = littles.pop(0)
big_idx = big[0]
big_prob = big[1]
alias_probs_[little[0]] = little[1]
alias_[little[0]] = big_idx
big_left = big[1] + little[1] - 1
if big_left - 1.0 > 0:
bigs.append((big_idx, big_left))
elif 1.0 - big_left > 0:
littles.append((big_idx, big_left))
else:
alias_probs_[big_idx] = big_left
alias_[big_idx] = -1
if len(bigs):
big = bigs.pop(0)
alias_probs_[big[0]] = 1.0
alias_[big[0]] = -1
if len(littles):
little = littles.pop(0)
alias_probs_[little[0]] = 1.0
alias_[little[0]] = -1
def _init_by_numpy_array(numpy_array):
ret = helper.create_parameter(
attr=ParamAttr(),
shape=numpy_array.shape,
dtype=numpy_array.dtype,
default_initializer=NumpyArrayInitializer(numpy_array),
)
ret.stop_gradient = True
return ret
inputs['CustomDistProbs'] = _init_by_numpy_array(
np.array(custom_dist).astype('float32')
)
inputs['CustomDistAlias'] = _init_by_numpy_array(
np.array(alias_).astype('int32')
)
inputs['CustomDistAliasProbs'] = _init_by_numpy_array(
np.array(alias_probs_).astype('float32')
)
sampler = 2
else:
raise Exception("Unsupported sampler type.")
if num_neg_samples is None:
num_neg_samples = 10
else:
num_neg_samples = int(num_neg_samples)
remote_prefetch = is_sparse
print(
"With sparse mode, if your models has only small parameter prefetch may cause speed down"
)
attrs = {
'num_total_classes': int(num_total_classes),
'num_neg_samples': num_neg_samples,
'seed': seed,
'sampler': sampler,
'is_sparse': is_sparse,
'remote_prefetch': remote_prefetch,
}
helper.append_op(
type='nce',
inputs=inputs,
outputs={
'Cost': cost,
'SampleLogits': sample_logits,
'SampleLabels': sample_labels,
},
attrs=attrs,
)
return cost / (num_neg_samples + 1)
def softmax_with_cross_entropy( def softmax_with_cross_entropy(
logits, logits,
label, label,
...@@ -706,62 +294,6 @@ def softmax_with_cross_entropy( ...@@ -706,62 +294,6 @@ def softmax_with_cross_entropy(
) )
def identity_loss(x, reduction="none"):
r"""Marks a tensor as being part of the loss calculation for IPU.
This operator is used to handle on the (final) loss of a model so that
it is used as the start of backpropagation.
When `reduction` is `none`, return raw `Out`.
When `reduction` is `mean`, return
.. math::
Out = MEAN(Out)
When `reduction` is `sum`, return
.. math::
Out = SUM(Out)
Parameters:
x (Variable): The input tensor. The shapes is [N, *], where N is batch size and `*` means any number of
additional dimensions. It's data type should be float32, float64 on CPU and float16, float32 on IPU.
reduction(str|int, optional): Reduce the loss output. Supported string values are: 'sum', 'mean', 'none'
the corresponding int values are 0, 1, 2 respectively. The default value is "none".
Returns:
Variable: The loss ``Tensor`` with the specified reduction applied.
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle
paddle.enable_static()
loss = fluid.data(name="loss", shape=[-1, 1], dtype="float32")
out = paddle.incubate.identity_loss(loss, reduction=1)
"""
if isinstance(reduction, str):
reduction = {"sum": 0, "mean": 1, "none": 2}.get(reduction.lower())
if reduction is None:
raise Exception("Unsupported reduction type.")
if _non_static_mode():
return _legacy_C_ops.identity_loss(x, "reduction", reduction)
check_variable_and_dtype(x, 'x', ['float32', 'float64'], "identity_loss")
attrs = {'reduction': reduction}
helper = LayerHelper('identity_loss', **locals())
dtype = helper.input_dtype(input_param_name='x')
out = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type="identity_loss", inputs={"X": x}, outputs={"Out": out}, attrs=attrs
)
return out
@templatedoc() @templatedoc()
def sigmoid_cross_entropy_with_logits( def sigmoid_cross_entropy_with_logits(
x, label, ignore_index=kIgnoreIndex, name=None, normalize=False x, label, ignore_index=kIgnoreIndex, name=None, normalize=False
......
...@@ -98,11 +98,12 @@ class TestBase(IPUOpTest): ...@@ -98,11 +98,12 @@ class TestBase(IPUOpTest):
label_length = paddle.static.data( label_length = paddle.static.data(
name=self.feed_list[3], shape=self.feed_shape[3], dtype='int64' name=self.feed_list[3], shape=self.feed_shape[3], dtype='int64'
) )
out = paddle.fluid.layers.warpctc( out = paddle.nn.functional.ctc_loss(
logits, logits,
labels, labels,
input_length=input_length, input_length=input_length,
label_length=label_length, label_length=label_length,
reduction='mean',
**self.attrs **self.attrs
) )
loss = paddle.mean(out) loss = paddle.mean(out)
......
...@@ -1351,7 +1351,7 @@ class TestRemoteNce(TestDistLookupTableBase): ...@@ -1351,7 +1351,7 @@ class TestRemoteNce(TestDistLookupTableBase):
) )
) )
cost = fluid.layers.nce( cost = paddle.static.nn.nce(
input=input, input=input,
label=label, label=label,
num_total_classes=num_total_classes, num_total_classes=num_total_classes,
......
...@@ -76,8 +76,8 @@ class TestDygraphLoadStatic(unittest.TestCase): ...@@ -76,8 +76,8 @@ class TestDygraphLoadStatic(unittest.TestCase):
nce_label = fluid.data( nce_label = fluid.data(
name="nce_label", shape=[None, 10], dtype='int64' name="nce_label", shape=[None, 10], dtype='int64'
) )
nce_out_1 = fluid.layers.nce(nce_in, nce_label, 10000) nce_out_1 = paddle.static.nn.nce(nce_in, nce_label, 10000)
nce_out_2 = fluid.layers.nce(nce_in, nce_label, 10000) nce_out_2 = paddle.static.nn.nce(nce_in, nce_label, 10000)
prelu_in = fluid.data( prelu_in = fluid.data(
name="prelu_in", shape=[None, 5, 10, 10], dtype='float32' name="prelu_in", shape=[None, 5, 10, 10], dtype='float32'
......
...@@ -1314,7 +1314,7 @@ class TestLayer(LayerTest): ...@@ -1314,7 +1314,7 @@ class TestLayer(LayerTest):
embs = layers.concat(input=embs, axis=1) embs = layers.concat(input=embs, axis=1)
wl = fluid.layers.unsqueeze(words[label_word], axes=[0]) wl = fluid.layers.unsqueeze(words[label_word], axes=[0])
nce_loss = layers.nce( nce_loss = paddle.static.nn.nce(
input=embs, input=embs,
label=wl, label=wl,
num_total_classes=dict_size, num_total_classes=dict_size,
...@@ -3274,7 +3274,7 @@ class TestBook(LayerTest): ...@@ -3274,7 +3274,7 @@ class TestBook(LayerTest):
embs.append(emb) embs.append(emb)
embs = layers.concat(input=embs, axis=1) embs = layers.concat(input=embs, axis=1)
loss = layers.nce( loss = paddle.static.nn.nce(
input=embs, input=embs,
label=words[label_word], label=words[label_word],
num_total_classes=dict_size, num_total_classes=dict_size,
...@@ -4343,21 +4343,24 @@ class TestBook(LayerTest): ...@@ -4343,21 +4343,24 @@ class TestBook(LayerTest):
def test_warpctc_with_padding(self): def test_warpctc_with_padding(self):
# TODO(minqiyang): dygraph do not support lod now # TODO(minqiyang): dygraph do not support lod now
with self.static_graph(): with self.static_graph():
input_length = layers.data( input_length = paddle.static.data(
name='logits_length', shape=[11], dtype='int64' name='logits_length', shape=[11], dtype='int64'
) )
label_length = layers.data( label_length = paddle.static.data(
name='labels_length', shape=[12], dtype='int64' name='labels_length', shape=[12], dtype='int64'
) )
label = layers.data(name='label', shape=[12, 1], dtype='int32') label = paddle.static.data(
predict = layers.data( name='label', shape=[12, 1], dtype='int32'
)
predict = paddle.static.data(
name='predict', shape=[4, 4, 8], dtype='float32' name='predict', shape=[4, 4, 8], dtype='float32'
) )
output = layers.warpctc( output = paddle.nn.functional.ctc_loss(
input=predict, log_probs=predict,
label=label, labels=label,
input_length=input_length, input_lengths=input_length,
label_length=label_length, label_lengths=label_length,
reduction='none',
) )
return output return output
......
...@@ -210,7 +210,7 @@ class TestNCECase1SelectedRows(unittest.TestCase): ...@@ -210,7 +210,7 @@ class TestNCECase1SelectedRows(unittest.TestCase):
) )
) )
cost = fluid.layers.nce( cost = paddle.static.nn.nce(
input=input, input=input,
label=label, label=label,
num_total_classes=num_total_classes, num_total_classes=num_total_classes,
...@@ -291,7 +291,9 @@ class TestNCE_OpError(unittest.TestCase): ...@@ -291,7 +291,9 @@ class TestNCE_OpError(unittest.TestCase):
name='label1', shape=[-1, 4], dtype="int64" name='label1', shape=[-1, 4], dtype="int64"
) )
# the input(input) of nce layer must be Variable. # the input(input) of nce layer must be Variable.
self.assertRaises(TypeError, fluid.layers.nce, input1, label1, 5) self.assertRaises(
TypeError, paddle.static.nn.nce, input1, label1, 5
)
input2 = fluid.layers.data( input2 = fluid.layers.data(
name='input2', shape=[-1, 4], dtype="float32" name='input2', shape=[-1, 4], dtype="float32"
...@@ -300,7 +302,9 @@ class TestNCE_OpError(unittest.TestCase): ...@@ -300,7 +302,9 @@ class TestNCE_OpError(unittest.TestCase):
np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace() np.array([0.0, 3.0, 2.0, 4.0]), [[1, 1, 2]], fluid.CPUPlace()
) )
# the input(label) of nce layer must be Variable. # the input(label) of nce layer must be Variable.
self.assertRaises(TypeError, fluid.layers.nce, input2, label2, 5) self.assertRaises(
TypeError, paddle.static.nn.nce, input2, label2, 5
)
input3 = fluid.layers.data( input3 = fluid.layers.data(
name='input3', shape=[-1, 4], dtype="float16" name='input3', shape=[-1, 4], dtype="float16"
...@@ -309,7 +313,9 @@ class TestNCE_OpError(unittest.TestCase): ...@@ -309,7 +313,9 @@ class TestNCE_OpError(unittest.TestCase):
name='label3', shape=[-1, 1], dtype="int64" name='label3', shape=[-1, 1], dtype="int64"
) )
# the data type of input(input) must be float32 or float64. # the data type of input(input) must be float32 or float64.
self.assertRaises(TypeError, fluid.layers.nce, input3, label3, 5) self.assertRaises(
TypeError, paddle.static.nn.nce, input3, label3, 5
)
input4 = fluid.layers.data( input4 = fluid.layers.data(
name='input4', shape=[-1, 4], dtype="float32" name='input4', shape=[-1, 4], dtype="float32"
...@@ -318,7 +324,9 @@ class TestNCE_OpError(unittest.TestCase): ...@@ -318,7 +324,9 @@ class TestNCE_OpError(unittest.TestCase):
name='label4', shape=[-1, 1], dtype="int32" name='label4', shape=[-1, 1], dtype="int32"
) )
# the data type of input(label) must be int64. # the data type of input(label) must be int64.
self.assertRaises(TypeError, fluid.layers.nce, input4, label4, 5) self.assertRaises(
TypeError, paddle.static.nn.nce, input4, label4, 5
)
class TestDygraphNCE_OpError(unittest.TestCase): class TestDygraphNCE_OpError(unittest.TestCase):
......
...@@ -17,7 +17,7 @@ import unittest ...@@ -17,7 +17,7 @@ import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
from test_softmax_op import stable_softmax from test_softmax_op import stable_softmax
import paddle.fluid as fluid from paddle.fluid.framework import _test_eager_guard
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid import Program, program_guard from paddle.fluid import Program, program_guard
import paddle import paddle
...@@ -206,19 +206,6 @@ class CTCForward: ...@@ -206,19 +206,6 @@ class CTCForward:
return self.loss return self.loss
def python_api(
logits,
label,
logits_length=None,
labels_length=None,
blank=0,
norm_by_times=False,
):
return paddle.fluid.layers.warpctc(
logits, label, blank, norm_by_times, logits_length, labels_length
)
class TestWarpCTCOp(OpTest): class TestWarpCTCOp(OpTest):
def config(self): def config(self):
self.batch_size = 4 self.batch_size = 4
...@@ -317,7 +304,6 @@ class TestWarpCTCOpWithPadding(OpTest): ...@@ -317,7 +304,6 @@ class TestWarpCTCOpWithPadding(OpTest):
def setUp(self): def setUp(self):
self.op_type = "warpctc" self.op_type = "warpctc"
self.python_api = python_api
self.python_out_sig = ["Loss"] self.python_out_sig = ["Loss"]
self.config() self.config()
...@@ -394,7 +380,7 @@ class TestWarpCTCOpWithPadding(OpTest): ...@@ -394,7 +380,7 @@ class TestWarpCTCOpWithPadding(OpTest):
} }
def test_check_output(self): def test_check_output(self):
self.check_output(check_eager=True) self.check_output(check_eager=False)
def test_check_grad(self): def test_check_grad(self):
self.outputs['WarpCTCGrad'] = self.gradient self.outputs['WarpCTCGrad'] = self.gradient
...@@ -439,7 +425,6 @@ class TestWarpCTCOpFp64(OpTest): ...@@ -439,7 +425,6 @@ class TestWarpCTCOpFp64(OpTest):
def setUp(self): def setUp(self):
self.op_type = "warpctc" self.op_type = "warpctc"
self.python_api = python_api
self.python_out_sig = ["Loss"] self.python_out_sig = ["Loss"]
self.config() self.config()
...@@ -516,67 +501,74 @@ class TestWarpCTCOpFp64(OpTest): ...@@ -516,67 +501,74 @@ class TestWarpCTCOpFp64(OpTest):
} }
def test_check_output(self): def test_check_output(self):
self.check_output(check_eager=True) self.check_output(check_eager=False)
def test_check_grad(self): def test_check_grad(self):
self.outputs['WarpCTCGrad'] = self.gradient self.outputs['WarpCTCGrad'] = self.gradient
self.check_grad(["Logits"], "Loss", check_eager=True) self.check_grad(["Logits"], "Loss", check_eager=False)
class TestWarpCTCOpError(unittest.TestCase): class TestWarpCTCOpError(unittest.TestCase):
def test_errors(self): def test_errors(self):
paddle.enable_static()
with program_guard(Program(), Program()): with program_guard(Program(), Program()):
logits = fluid.data( logits = paddle.static.data(
name='logits', shape=[5, 16, 6], dtype='float32' name='logits', shape=[5, 16, 6], dtype='float32'
) )
logits_length = fluid.data( logits_length = paddle.static.data(
name='logits_length', shape=[None], dtype='int64' name='logits_length', shape=[None], dtype='int64'
) )
label = fluid.data(name='label', shape=[16, 3], dtype='int32') label = paddle.static.data(
label_length = fluid.data( name='label', shape=[16, 3], dtype='int32'
)
label_length = paddle.static.data(
name='labels_length', shape=[None], dtype='int64' name='labels_length', shape=[None], dtype='int64'
) )
def test_logits_Variable(): def test_logits_Variable():
logits_data = np.random.rand(5, 16, 6).astype(logits.dtype) logits_data = np.random.rand(5, 16, 6).astype(logits.dtype)
fluid.layers.warpctc( paddle.nn.functional.ctc_loss(
input=logits_data, log_probs=logits_data,
label=label, labels=label,
input_length=logits_length, input_lengths=logits_length,
label_length=label_length, label_lengths=label_length,
reduction='none',
) )
self.assertRaises(TypeError, test_logits_Variable) self.assertRaises(TypeError, test_logits_Variable)
def test_label_Variable(): def test_label_Variable():
label_data = np.random.randint(0, 5, [5, 1]).astype("int32") label_data = np.random.randint(0, 5, [5, 1]).astype("int32")
fluid.layers.warpctc( paddle.nn.functional.ctc_loss(
input=logits, log_probs=logits,
label=label_data, labels=label_data,
input_length=logits_length, input_lengths=logits_length,
label_length=label_length, label_lengths=label_length,
reduction='none',
) )
self.assertRaises(TypeError, test_label_Variable) self.assertRaises(TypeError, test_label_Variable)
def test_logits_len_Variable(): def test_logits_len_Variable():
logits_length_data = np.array([5] * 16).astype("int64") logits_length_data = np.array([5] * 16).astype("int64")
fluid.layers.warpctc( paddle.nn.functional.ctc_loss(
input=logits, log_probs=logits,
label=label, labels=label,
input_length=logits_length_data, input_lengths=logits_length_data,
label_length=label_length, label_lengths=label_length,
reduction='none',
) )
self.assertRaises(TypeError, test_logits_len_Variable) self.assertRaises(TypeError, test_logits_len_Variable)
def test_label_len_Variable(): def test_label_len_Variable():
label_length_data = np.array([3] * 16).astype("int64") label_length_data = np.array([3] * 16).astype("int64")
fluid.layers.warpctc( paddle.nn.functional.ctc_loss(
input=logits, log_probs=logits,
label=label, labels=label,
input_length=logits_length, input_lengths=logits_length,
label_length=label_length_data, label_length=label_length_data,
reduction='none',
) )
self.assertRaises(TypeError, test_label_len_Variable) self.assertRaises(TypeError, test_label_len_Variable)
...@@ -590,7 +582,13 @@ class TestWarpCTCOpError(unittest.TestCase): ...@@ -590,7 +582,13 @@ class TestWarpCTCOpError(unittest.TestCase):
softmax = paddle.to_tensor(logits) softmax = paddle.to_tensor(logits)
labels = paddle.to_tensor(labels) labels = paddle.to_tensor(labels)
fluid.layers.warpctc(input=softmax, label=labels) paddle.nn.functional.ctc_loss(
log_probs=softmax,
labels=labels,
input_lengths=None,
label_lengths=None,
reduction='none',
)
paddle.disable_static() paddle.disable_static()
self.assertRaises(ValueError, test_dygraph_with_lod) self.assertRaises(ValueError, test_dygraph_with_lod)
...@@ -598,74 +596,6 @@ class TestWarpCTCOpError(unittest.TestCase): ...@@ -598,74 +596,6 @@ class TestWarpCTCOpError(unittest.TestCase):
class TestCTCLossAPICase(unittest.TestCase): class TestCTCLossAPICase(unittest.TestCase):
def test_functinal_api(self):
self.batch_size = 4
self.num_classes = CUDA_BLOCK_SIZE + 2
self.logits_length = np.array([4, 1, 3, 3], dtype=np.int64)
self.labels_length = np.array([3, 1, 4, 4], dtype=np.int64)
self.blank = self.num_classes - 1
self.norm_by_times = False
logits = np.random.uniform(
0.1,
1.0,
[max(self.logits_length), self.batch_size, self.num_classes],
).astype("float32")
softmax = np.apply_along_axis(stable_softmax, -1, logits)
# labels should not be blank
labels = np.random.randint(
0,
self.num_classes - 1,
[self.batch_size, max(self.labels_length)],
dtype="int32",
)
ctc = CTCForward(
softmax,
self.logits_length,
labels,
self.labels_length,
self.num_classes,
self.batch_size,
self.blank,
self.norm_by_times,
)
loss_np = ctc.forward()
paddle.disable_static()
softmax = paddle.to_tensor(logits)
labels = paddle.to_tensor(labels)
logits_length = paddle.to_tensor(self.logits_length)
labels_length = paddle.to_tensor(self.labels_length)
loss_pd_mean = F.ctc_loss(
softmax,
labels,
logits_length,
labels_length,
blank=self.blank,
reduction='mean',
)
loss_pd_mean = loss_pd_mean.numpy()
loss_pd_sum = F.ctc_loss(
softmax,
labels,
logits_length,
labels_length,
blank=self.blank,
reduction='sum',
)
loss_pd_sum = loss_pd_sum.numpy()
paddle.enable_static()
loss_np = np.squeeze(loss_np, axis=-1)
loss_np_mean = (loss_np / labels_length.numpy()).mean()
loss_np_sum = loss_np.sum()
np.testing.assert_allclose(
loss_pd_mean, loss_np_mean, rtol=1e-05, atol=1
)
np.testing.assert_allclose(loss_pd_sum, loss_np_sum, rtol=1e-05, atol=1)
def test_class_api(self): def test_class_api(self):
self.batch_size = 3 self.batch_size = 3
self.num_classes = 15 self.num_classes = 15
...@@ -715,6 +645,81 @@ class TestCTCLossAPICase(unittest.TestCase): ...@@ -715,6 +645,81 @@ class TestCTCLossAPICase(unittest.TestCase):
np.testing.assert_allclose(loss_pd, loss_np, rtol=1e-05, atol=1) np.testing.assert_allclose(loss_pd, loss_np, rtol=1e-05, atol=1)
def test_eager_ctcloss(self):
def test_functinal_api():
self.batch_size = 4
self.num_classes = CUDA_BLOCK_SIZE + 2
self.logits_length = np.array([4, 1, 3, 3], dtype=np.int64)
self.labels_length = np.array([3, 1, 4, 4], dtype=np.int64)
self.blank = self.num_classes - 1
self.norm_by_times = False
logits = np.random.uniform(
0.1,
1.0,
[max(self.logits_length), self.batch_size, self.num_classes],
).astype("float32")
softmax = np.apply_along_axis(stable_softmax, -1, logits)
# labels should not be blank
labels = np.random.randint(
0,
self.num_classes - 1,
[self.batch_size, max(self.labels_length)],
dtype="int32",
)
ctc = CTCForward(
softmax,
self.logits_length,
labels,
self.labels_length,
self.num_classes,
self.batch_size,
self.blank,
self.norm_by_times,
)
loss_np = ctc.forward()
paddle.disable_static()
softmax = paddle.to_tensor(logits)
labels = paddle.to_tensor(labels)
logits_length = paddle.to_tensor(self.logits_length)
labels_length = paddle.to_tensor(self.labels_length)
loss_pd_mean = F.ctc_loss(
softmax,
labels,
logits_length,
labels_length,
blank=self.blank,
reduction='mean',
)
loss_pd_mean = loss_pd_mean.numpy()
loss_pd_sum = F.ctc_loss(
softmax,
labels,
logits_length,
labels_length,
blank=self.blank,
reduction='sum',
)
loss_pd_sum = loss_pd_sum.numpy()
paddle.enable_static()
loss_np = np.squeeze(loss_np, axis=-1)
loss_np_mean = (loss_np / labels_length.numpy()).mean()
loss_np_sum = loss_np.sum()
np.testing.assert_allclose(
loss_pd_mean, loss_np_mean, rtol=1e-05, atol=1
)
np.testing.assert_allclose(
loss_pd_sum, loss_np_sum, rtol=1e-05, atol=1
)
with _test_eager_guard():
test_functinal_api()
test_functinal_api()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -212,19 +212,6 @@ class CTCForward(object): ...@@ -212,19 +212,6 @@ class CTCForward(object):
return self.loss return self.loss
def python_api(
logits,
label,
logits_length=None,
labels_length=None,
blank=0,
norm_by_times=False,
):
return paddle.fluid.layers.warpctc(
logits, label, blank, norm_by_times, logits_length, labels_length
)
class XPUTestWarpCTCOp(XPUOpTestWrapper): class XPUTestWarpCTCOp(XPUOpTestWrapper):
def __init__(self): def __init__(self):
self.op_name = 'warpctc' self.op_name = 'warpctc'
...@@ -244,7 +231,6 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper): ...@@ -244,7 +231,6 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper):
self.op_type = "warpctc" self.op_type = "warpctc"
self.dtype = self.in_type self.dtype = self.in_type
self.place = paddle.XPUPlace(0) self.place = paddle.XPUPlace(0)
self.python_api = python_api
self.python_out_sig = ["Loss"] self.python_out_sig = ["Loss"]
self.config() self.config()
...@@ -325,7 +311,7 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper): ...@@ -325,7 +311,7 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper):
} }
def test_check_output(self): def test_check_output(self):
self.check_output(check_eager=True) self.check_output(check_eager=False)
def test_check_grad(self): def test_check_grad(self):
self.outputs['WarpCTCGrad'] = self.gradient self.outputs['WarpCTCGrad'] = self.gradient
...@@ -367,44 +353,48 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper): ...@@ -367,44 +353,48 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper):
def test_logits_Variable(): def test_logits_Variable():
logits_data = np.random.rand(5, 16, 6).astype(logits.dtype) logits_data = np.random.rand(5, 16, 6).astype(logits.dtype)
fluid.layers.warpctc( paddle.nn.functional.ctc_loss(
input=logits_data, log_probs=logits_data,
label=label, labels=label,
input_length=logits_length, input_lengths=logits_length,
label_length=label_length, label_lengths=label_length,
reduction='none',
) )
self.assertRaises(TypeError, test_logits_Variable) self.assertRaises(TypeError, test_logits_Variable)
def test_label_Variable(): def test_label_Variable():
label_data = np.random.randint(0, 5, [5, 1]).astype("int32") label_data = np.random.randint(0, 5, [5, 1]).astype("int32")
fluid.layers.warpctc( paddle.nn.functional.ctc_loss(
input=logits, log_probs=logits,
label=label_data, labels=label_data,
input_length=logits_length, input_lengths=logits_length,
label_length=label_length, label_lengths=label_length,
reduction='none',
) )
self.assertRaises(TypeError, test_label_Variable) self.assertRaises(TypeError, test_label_Variable)
def test_logits_len_Variable(): def test_logits_len_Variable():
logits_length_data = np.array([5] * 16).astype("int64") logits_length_data = np.array([5] * 16).astype("int64")
fluid.layers.warpctc( paddle.nn.functional.ctc_loss(
input=logits, log_probs=logits,
label=label, labels=label,
input_length=logits_length_data, input_lengths=logits_length_data,
label_length=label_length, label_lengths=label_length,
reduction='none',
) )
self.assertRaises(TypeError, test_logits_len_Variable) self.assertRaises(TypeError, test_logits_len_Variable)
def test_label_len_Variable(): def test_label_len_Variable():
label_length_data = np.array([3] * 16).astype("int64") label_length_data = np.array([3] * 16).astype("int64")
fluid.layers.warpctc( paddle.nn.functional.ctc_loss(
input=logits, log_probs=logits,
label=label, labels=label,
input_length=logits_length, input_lengths=logits_length,
label_length=label_length_data, label_lengths=label_length_data,
reduction='none',
) )
self.assertRaises(TypeError, test_label_len_Variable) self.assertRaises(TypeError, test_label_len_Variable)
...@@ -423,7 +413,9 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper): ...@@ -423,7 +413,9 @@ class XPUTestWarpCTCOp(XPUOpTestWrapper):
softmax = paddle.to_tensor(logits) softmax = paddle.to_tensor(logits)
labels = paddle.to_tensor(labels) labels = paddle.to_tensor(labels)
fluid.layers.warpctc(input=softmax, label=labels) paddle.nn.functional.ctc_loss(
log_probs=softmax, labels=labels, reduction='none'
)
paddle.disable_static() paddle.disable_static()
self.assertRaises(ValueError, test_dygraph_with_lod) self.assertRaises(ValueError, test_dygraph_with_lod)
......
...@@ -36,7 +36,7 @@ from . import nn # noqa: F401 ...@@ -36,7 +36,7 @@ from . import nn # noqa: F401
from . import asp # noqa: F401 from . import asp # noqa: F401
from . import multiprocessing # noqa: F401 from . import multiprocessing # noqa: F401
from ..fluid.layers.loss import identity_loss from .nn.loss import identity_loss
from ..fluid.incubate import fleet from ..fluid.incubate import fleet
from . import xpu from . import xpu
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.fluid.layer_helper import LayerHelper
from paddle.framework import (
_non_static_mode,
)
from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle import _legacy_C_ops
def identity_loss(x, reduction="none"):
r"""Marks a tensor as being part of the loss calculation for IPU.
This operator is used to handle on the (final) loss of a model so that
it is used as the start of backpropagation.
When `reduction` is `none`, return raw `Out`.
When `reduction` is `mean`, return
.. math::
Out = MEAN(Out)
When `reduction` is `sum`, return
.. math::
Out = SUM(Out)
Parameters:
x (Variable): The input tensor. The shapes is [N, *], where N is batch size and `*` means any number of
additional dimensions. It's data type should be float32, float64 on CPU and float16, float32 on IPU.
reduction(str|int, optional): Reduce the loss output. Supported string values are: 'sum', 'mean', 'none'
the corresponding int values are 0, 1, 2 respectively. The default value is "none".
Returns:
Variable: The loss ``Tensor`` with the specified reduction applied.
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
loss = paddle.static.data(name="loss", shape=[-1, 1], dtype="float32")
out = paddle.incubate.identity_loss(loss, reduction=1)
"""
if isinstance(reduction, str):
reduction = {"sum": 0, "mean": 1, "none": 2}.get(reduction.lower())
if reduction is None:
raise Exception("Unsupported reduction type.")
if _non_static_mode():
return _legacy_C_ops.identity_loss(x, "reduction", reduction)
check_variable_and_dtype(x, 'x', ['float32', 'float64'], "identity_loss")
attrs = {'reduction': reduction}
helper = LayerHelper('identity_loss', **locals())
dtype = helper.input_dtype(input_param_name='x')
out = helper.create_variable_for_type_inference(dtype)
helper.append_op(
type="identity_loss", inputs={"X": x}, outputs={"Out": out}, attrs=attrs
)
return out
...@@ -37,6 +37,8 @@ from ...fluid.framework import ( ...@@ -37,6 +37,8 @@ from ...fluid.framework import (
__all__ = [] __all__ = []
kIgnoreIndex = -100
def dice_loss(input, label, epsilon=0.00001, name=None): def dice_loss(input, label, epsilon=0.00001, name=None):
r""" r"""
...@@ -1818,7 +1820,70 @@ def ctc_loss( ...@@ -1818,7 +1820,70 @@ def ctc_loss(
""" """
loss_out = fluid.layers.warpctc( def warpctc(
input,
label,
blank=0,
norm_by_times=False,
input_length=None,
label_length=None,
):
if in_dygraph_mode():
if input_length is None or label_length is None:
raise ValueError(
"input_length and label_length must not be None in dygraph mode!"
)
loss_out = _C_ops.warpctc(
input, label, input_length, label_length, blank, norm_by_times
)
return loss_out
if _non_static_mode():
if input_length is None or label_length is None:
raise ValueError(
"input_length and label_length must not be None in dygraph mode!"
)
grad, loss_out = _legacy_C_ops.warpctc(
input,
label,
input_length,
label_length,
'blank',
blank,
'norm_by_times',
norm_by_times,
)
return loss_out
helper = LayerHelper('warpctc', **locals())
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], "warpctc"
)
check_variable_and_dtype(label, 'label', ['int32'], "warpctc")
this_inputs = {'Logits': [input], 'Label': [label]}
if input_length is not None and label_length is not None:
check_variable_and_dtype(
input_length, 'LogitsLength', ['int64'], "warpctc"
)
check_variable_and_dtype(
label_length, 'LabelLength', ['int64'], "warpctc"
)
this_inputs['LogitsLength'] = [input_length]
this_inputs['LabelLength'] = [label_length]
loss_out = helper.create_variable_for_type_inference(dtype=input.dtype)
grad_out = helper.create_variable_for_type_inference(dtype=input.dtype)
helper.append_op(
type='warpctc',
inputs=this_inputs,
outputs={'WarpCTCGrad': [grad_out], 'Loss': [loss_out]},
attrs={
'blank': blank,
'norm_by_times': norm_by_times,
},
)
return loss_out
loss_out = warpctc(
log_probs, labels, blank, norm_by_times, input_lengths, label_lengths log_probs, labels, blank, norm_by_times, input_lengths, label_lengths
) )
......
...@@ -30,7 +30,7 @@ from ...fluid.layers import group_norm # noqa: F401 ...@@ -30,7 +30,7 @@ from ...fluid.layers import group_norm # noqa: F401
from ...fluid.layers import instance_norm # noqa: F401 from ...fluid.layers import instance_norm # noqa: F401
from ...fluid.layers import layer_norm # noqa: F401 from ...fluid.layers import layer_norm # noqa: F401
from ...fluid.layers import multi_box_head # noqa: F401 from ...fluid.layers import multi_box_head # noqa: F401
from ...fluid.layers import nce # noqa: F401 from .loss import nce # noqa: F401
from ...fluid.layers import prelu # noqa: F401 from ...fluid.layers import prelu # noqa: F401
from ...fluid.layers import py_func # noqa: F401 from ...fluid.layers import py_func # noqa: F401
from ...fluid.layers import row_conv # noqa: F401 from ...fluid.layers import row_conv # noqa: F401
......
# -*- coding: utf-8 -*
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ...fluid.data_feeder import check_variable_and_dtype
from paddle.fluid.layers.layer_function_generator import templatedoc
# TODO: define loss functions of neural network
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import NumpyArrayInitializer
import numpy as np
from paddle.fluid.framework import (
static_only,
)
__all__ = []
# FIXME(wuyi): let docstring_checker.py understand @autodoc.
# For now, the comments in c++ use types like Tensor, but in python side
# the type is often "Variable", and arguments may vary.
@static_only
@templatedoc(op_type="nce")
def nce(
input,
label,
num_total_classes,
sample_weight=None,
param_attr=None,
bias_attr=None,
num_neg_samples=None,
name=None,
sampler="uniform",
custom_dist=None,
seed=0,
is_sparse=False,
):
"""
:api_attr: Static Graph
${comment}
Args:
input (Tensor): Input tensor, 2-D tensor with shape [batch_size, dim],
and data type is float32 or float64.
label (Tensor): Input label, 2-D tensor with shape [batch_size, num_true_class],
and data type is int64.
num_total_classes (int):${num_total_classes_comment}.
sample_weight (Tensor|None): A Tensor of shape [batch_size, 1]
storing a weight for each sample. The default weight for each
sample is 1.0.
param_attr (ParamAttr|None): To specify the weight parameter attribute.
Default: None, which means the default weight parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` .
bias_attr (ParamAttr|None): To specify the bias parameter attribute.
Default: None, which means the default bias parameter property is
used. See usage for details in :ref:`api_fluid_ParamAttr` .
num_neg_samples (int): ${num_neg_samples_comment}.
name(str|None): For detailed information, please refer to
:ref:`api_guide_Name` . Usually name is no need to set and None by default.
sampler (str, optional): The sampler used to sample class from negative classes.
It can be 'uniform', 'log_uniform' or 'custom_dist'.
default: 'uniform'.
custom_dist (nd.array|None): A numpy ndarray with size=num_total_classes.
It is used when sampler is set to 'custom_dist'.
custom_dist[i] is the probability of i-th class to be sampled.
default: None.
seed (int, optional): The seed used in sampler. Default 0, means no random seed.
is_sparse(bool, optional): The flag indicating whether to use sparse update,
the weight@GRAD and bias@GRAD will be changed to SelectedRows. Default False.
Returns:
Tensor: The output nce loss.
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.enable_static()
window_size = 5
words = []
for i in range(window_size):
words.append(paddle.static.data(
name='word_{0}'.format(i), shape=[-1, 1], dtype='int64'))
dict_size = 10000
label_word = int(window_size / 2) + 1
embs = []
for i in range(window_size):
if i == label_word:
continue
emb = paddle.static.nn.embedding(input=words[i], size=[dict_size, 32],
param_attr='embed', is_sparse=True)
embs.append(emb)
embs = paddle.concat(x=embs, axis=1)
loss = paddle.static.nn.nce(input=embs, label=words[label_word],
num_total_classes=dict_size, param_attr='nce.w_0',
bias_attr='nce.b_0')
#or use custom distribution
dist = np.array([0.05,0.5,0.1,0.3,0.05])
loss = paddle.static.nn.nce(input=embs, label=words[label_word],
num_total_classes=5, param_attr='nce.w_1',
bias_attr='nce.b_1',
num_neg_samples=3,
sampler="custom_dist",
custom_dist=dist)
"""
helper = LayerHelper('nce', **locals())
check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'nce')
check_variable_and_dtype(label, 'label', ['int64'], 'nce')
dim = input.shape[1]
num_true_class = label.shape[1]
w = helper.create_parameter(
attr=helper.param_attr,
shape=[num_total_classes, dim],
is_bias=False,
dtype=input.dtype,
)
inputs = {}
if helper.bias_attr:
b = helper.create_parameter(
attr=helper.bias_attr,
shape=[num_total_classes, 1],
is_bias=True,
dtype=input.dtype,
)
inputs['Bias'] = b
cost = helper.create_variable_for_type_inference(dtype=input.dtype)
sample_logits = helper.create_variable_for_type_inference(dtype=input.dtype)
sample_labels = helper.create_variable_for_type_inference(dtype=label.dtype)
inputs['Input'] = input
inputs['Label'] = label
inputs['Weight'] = w
inputs['SampleWeight'] = sample_weight if sample_weight is not None else []
if sampler == "uniform":
sampler = 0
elif sampler == "log_uniform":
sampler = 1
elif sampler == "custom_dist":
assert custom_dist is not None
custom_dist_len = num_total_classes
alias_probs_ = [0] * custom_dist_len
alias_ = [0] * custom_dist_len
bigs = []
littles = []
for i in range(custom_dist_len):
normal_prob = custom_dist[i] * custom_dist_len
if normal_prob - 1.0 > 0:
bigs.append((i, normal_prob))
elif 1.0 - normal_prob > 0:
littles.append((i, normal_prob))
else:
alias_probs_[i] = normal_prob
alias_[i] = -1
while len(bigs) and len(littles):
big = bigs.pop(0)
little = littles.pop(0)
big_idx = big[0]
big_prob = big[1]
alias_probs_[little[0]] = little[1]
alias_[little[0]] = big_idx
big_left = big[1] + little[1] - 1
if big_left - 1.0 > 0:
bigs.append((big_idx, big_left))
elif 1.0 - big_left > 0:
littles.append((big_idx, big_left))
else:
alias_probs_[big_idx] = big_left
alias_[big_idx] = -1
if len(bigs):
big = bigs.pop(0)
alias_probs_[big[0]] = 1.0
alias_[big[0]] = -1
if len(littles):
little = littles.pop(0)
alias_probs_[little[0]] = 1.0
alias_[little[0]] = -1
def _init_by_numpy_array(numpy_array):
ret = helper.create_parameter(
attr=ParamAttr(),
shape=numpy_array.shape,
dtype=numpy_array.dtype,
default_initializer=NumpyArrayInitializer(numpy_array),
)
ret.stop_gradient = True
return ret
inputs['CustomDistProbs'] = _init_by_numpy_array(
np.array(custom_dist).astype('float32')
)
inputs['CustomDistAlias'] = _init_by_numpy_array(
np.array(alias_).astype('int32')
)
inputs['CustomDistAliasProbs'] = _init_by_numpy_array(
np.array(alias_probs_).astype('float32')
)
sampler = 2
else:
raise Exception("Unsupported sampler type.")
if num_neg_samples is None:
num_neg_samples = 10
else:
num_neg_samples = int(num_neg_samples)
remote_prefetch = is_sparse
print(
"With sparse mode, if your models has only small parameter prefetch may cause speed down"
)
attrs = {
'num_total_classes': int(num_total_classes),
'num_neg_samples': num_neg_samples,
'seed': seed,
'sampler': sampler,
'is_sparse': is_sparse,
'remote_prefetch': remote_prefetch,
}
helper.append_op(
type='nce',
inputs=inputs,
outputs={
'Cost': cost,
'SampleLogits': sample_logits,
'SampleLabels': sample_labels,
},
attrs=attrs,
)
return cost / (num_neg_samples + 1)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册