diff --git a/paddle/operators/nce_op.cc b/paddle/operators/nce_op.cc index 84ba3ead2b52547b989a4541f31ea31ffcce6c63..994ddf717e7a5b883d8071c6a47da0b4b4074f2e 100644 --- a/paddle/operators/nce_op.cc +++ b/paddle/operators/nce_op.cc @@ -124,7 +124,8 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker { "This attribute only be used in unitest. Classes " "in this list wiil be used as negative classes " "for every samples. Under normal conditions, " - "user should avoid setting this attribute."); + "user should avoid setting this attribute.") + .SetDefault({}); AddComment(R"DOC( Compute and return the noise-contrastive estimation training loss. See [Noise-contrastive estimation: A new estimation principle for unnormalized statistical models](http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf). diff --git a/paddle/operators/nce_op.h b/paddle/operators/nce_op.h index e6b496f7896dcb412be8ff096fdccb2f0b682369..86fa13a649ce7fdcaad64e2609ceea2fb4d7e072 100644 --- a/paddle/operators/nce_op.h +++ b/paddle/operators/nce_op.h @@ -197,7 +197,8 @@ class NCEGradKernel : public framework::OpKernel { // get d_x auto d_x = context.Output(framework::GradVarName("Input")); if (d_x != nullptr) { - d_x->mutable_data(context.GetPlace()); + auto* d_x_data = d_x->mutable_data(context.GetPlace()); + std::fill(d_x_data, d_x_data + d_x->numel(), 0.0); auto d_x_matrix = EigenMatrix::From(*d_x); auto w_matrix = EigenMatrix::From(*(context.Input("Weight"))); for (int64_t i = 0; i < sample_labels->numel(); ++i) { diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py index 072119881644c650c3430c70bdab42f8d17df7ba..1fd6ba4b9c4d6e32c2c4115ccda6e9657862b04e 100644 --- a/python/paddle/v2/fluid/layers/nn.py +++ b/python/paddle/v2/fluid/layers/nn.py @@ -19,6 +19,7 @@ from ..layer_helper import LayerHelper from ..initializer import Normal, Constant from ..framework import Variable from ..param_attr import ParamAttr +from layer_function_generator import autodoc from tensor import concat __all__ = [ @@ -57,6 +58,7 @@ __all__ = [ 'warpctc', 'sequence_reshape', 'transpose', + 'nce', ] @@ -2190,6 +2192,61 @@ def sequence_reshape(input, new_dim): return out +@autodoc() +def nce(input, + label, + num_total_classes, + sample_weight=None, + param_attr=None, + bias_attr=None, + num_neg_samples=None): + helper = LayerHelper('nce', **locals()) + assert isinstance(input, Variable) + dim = input.shape[1] + assert isinstance(label, Variable) + num_true_class = label.shape[1] + w = helper.create_parameter( + attr=helper.param_attr, + shape=[num_total_classes, dim], + is_bias=False, + dtype=input.dtype) + b = helper.create_parameter( + attr=helper.bias_attr, + shape=[num_total_classes, 1], + is_bias=True, + dtype=input.dtype) + cost = helper.create_tmp_variable(dtype=input.dtype) + sample_logits = helper.create_tmp_variable(dtype=input.dtype) + sample_labels = helper.create_tmp_variable(dtype=label.dtype) + + if num_neg_samples is None: + num_neg_samples = 10 + else: + num_neg_samples = int(num_neg_samples) + + attrs = { + 'num_total_classes': int(num_total_classes), + 'num_neg_samples': num_neg_samples + } + + helper.append_op( + type='nce', + inputs={ + 'Input': input, + 'Label': label, + 'Weight': w, + 'Bias': b, + 'SampleWeight': sample_weight if sample_weight is not None else [] + }, + outputs={ + 'Cost': cost, + 'SampleLogits': sample_logits, + 'SampleLabels': sample_labels + }, + attrs=attrs) + return cost / (num_neg_samples + 1) + + def transpose(x, perm, name=None): """ **transpose Layer** diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 709abd6c6a4e0c2aa1b38a135d7424cd6886c966..b14198b231372c6e75434162e3a84be4c9890ece 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -17,8 +17,9 @@ import unittest import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets -from paddle.v2.fluid.framework import Program, program_guard +from paddle.v2.fluid.framework import Program, program_guard, default_main_program from paddle.v2.fluid.param_attr import ParamAttr +import decorators class TestBook(unittest.TestCase): @@ -225,6 +226,41 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(out) print(str(program)) + @decorators.prog_scope() + def test_nce(self): + window_size = 5 + words = [] + for i in xrange(window_size): + words.append( + layers.data( + name='word_{0}'.format(i), shape=[1], dtype='int64')) + + dict_size = 10000 + label_word = int(window_size / 2) + 1 + + embs = [] + for i in xrange(window_size): + if i == label_word: + continue + + emb = layers.embedding( + input=words[i], + size=[dict_size, 32], + param_attr='emb.w', + is_sparse=True) + + embs.append(emb) + + embs = layers.concat(input=embs, axis=1) + loss = layers.nce(input=embs, + label=words[label_word], + num_total_classes=dict_size, + param_attr='nce.w', + bias_attr='nce.b') + avg_loss = layers.mean(x=loss) + self.assertIsNotNone(avg_loss) + print(str(default_main_program())) + if __name__ == '__main__': unittest.main()