diff --git a/paddle/operators/nce_op.cc b/paddle/operators/nce_op.cc index 84ba3ead2b52547b989a4541f31ea31ffcce6c63..994ddf717e7a5b883d8071c6a47da0b4b4074f2e 100644 --- a/paddle/operators/nce_op.cc +++ b/paddle/operators/nce_op.cc @@ -124,7 +124,8 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker { "This attribute only be used in unitest. Classes " "in this list wiil be used as negative classes " "for every samples. Under normal conditions, " - "user should avoid setting this attribute."); + "user should avoid setting this attribute.") + .SetDefault({}); AddComment(R"DOC( Compute and return the noise-contrastive estimation training loss. See [Noise-contrastive estimation: A new estimation principle for unnormalized statistical models](http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf). diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index b1db16a83ecc917528be1defa781f659342edd77..668f7788db97b9e629d8588a19690933416f428f 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -19,6 +19,7 @@ from ..layer_helper import LayerHelper from ..initializer import Normal, Constant from ..framework import Variable from ..param_attr import ParamAttr +from ..registry import autodoc from tensor import concat __all__ = [ @@ -28,7 +29,7 @@ __all__ = [ 'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand', 'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min', 'sequence_first_step', 'sequence_last_step', 'dropout', 'split', - 'l2_normalize', 'matmul', 'warpctc', 'sequence_reshape' + 'l2_normalize', 'matmul', 'warpctc', 'sequence_reshape', 'nce' ] @@ -1971,3 +1972,52 @@ def sequence_reshape(input, new_dim): outputs={'Out': [out]}, attrs={'new_dim': new_dim}) return out + + +@autodoc +def nce(input, + label, + num_total_classes, + sample_weight=None, + param_attr=None, + bias_attr=None, + num_neg_samples=None): + helper = LayerHelper('nce', **locals()) + assert isinstance(input, Variable) + dim = input.shape[1] + assert isinstance(label, Variable) + num_true_class = label.shape[1] + w = helper.create_parameter( + attr=helper.param_attr, + shape=[num_total_classes, dim], + is_bias=False, + dtype=input.dtype) + b = helper.create_parameter( + attr=helper.bias_attr, + shape=[num_total_classes, 1], + is_bias=True, + dtype=input.dtype) + cost = helper.create_tmp_variable(dtype=input.dtype) + sample_logits = helper.create_tmp_variable(dtype=input.dtype) + sample_labels = helper.create_tmp_variable(dtype=label.dtype) + + attrs = {'num_total_classes': int(num_total_classes)} + if num_neg_samples is not None: + attrs['num_neg_samples'] = int(num_neg_samples) + + helper.append_op( + type='nce', + inputs={ + 'Input': input, + 'Label': label, + 'Weight': w, + 'Bias': b, + 'SampleWeight': sample_weight if sample_weight is not None else [] + }, + outputs={ + 'Cost': cost, + 'SampleLogits': sample_logits, + 'SampleLabels': sample_labels + }, + attrs=attrs) + return cost diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 709abd6c6a4e0c2aa1b38a135d7424cd6886c966..b14198b231372c6e75434162e3a84be4c9890ece 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -17,8 +17,9 @@ import unittest import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets -from paddle.v2.fluid.framework import Program, program_guard +from paddle.v2.fluid.framework import Program, program_guard, default_main_program from paddle.v2.fluid.param_attr import ParamAttr +import decorators class TestBook(unittest.TestCase): @@ -225,6 +226,41 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(out) print(str(program)) + @decorators.prog_scope() + def test_nce(self): + window_size = 5 + words = [] + for i in xrange(window_size): + words.append( + layers.data( + name='word_{0}'.format(i), shape=[1], dtype='int64')) + + dict_size = 10000 + label_word = int(window_size / 2) + 1 + + embs = [] + for i in xrange(window_size): + if i == label_word: + continue + + emb = layers.embedding( + input=words[i], + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=True) + + embs.append(emb) + + embs = layers.concat(input=embs, axis=1) + loss = layers.nce(input=embs, + label=words[label_word], + num_total_classes=dict_size, + param_attr='nce.w', + bias_attr='nce.b') + avg_loss = layers.mean(x=loss) + self.assertIsNotNone(avg_loss) + print(str(default_main_program())) + if __name__ == '__main__': unittest.main()