diff --git a/paddle/operators/nce_op.cc b/paddle/operators/nce_op.cc
index 84ba3ead2b52547b989a4541f31ea31ffcce6c63..994ddf717e7a5b883d8071c6a47da0b4b4074f2e 100644
--- a/paddle/operators/nce_op.cc
+++ b/paddle/operators/nce_op.cc
@@ -124,7 +124,8 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
                               "This attribute only be used in unitest. Classes "
                               "in this list wiil be used as negative classes "
                               "for every samples. Under normal conditions, "
-                              "user should avoid setting this attribute.");
+                              "user should avoid setting this attribute.")
+        .SetDefault({});
     AddComment(R"DOC(
 Compute and return the noise-contrastive estimation training loss.
 See [Noise-contrastive estimation: A new estimation principle for unnormalized statistical models](http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf).
diff --git a/python/paddle/v2/fluid/layers/nn.py b/python/paddle/v2/fluid/layers/nn.py
index b1db16a83ecc917528be1defa781f659342edd77..668f7788db97b9e629d8588a19690933416f428f 100644
--- a/python/paddle/v2/fluid/layers/nn.py
+++ b/python/paddle/v2/fluid/layers/nn.py
@@ -19,6 +19,7 @@ from ..layer_helper import LayerHelper
 from ..initializer import Normal, Constant
 from ..framework import Variable
 from ..param_attr import ParamAttr
+from ..registry import autodoc
 from tensor import concat
 
 __all__ = [
@@ -28,7 +29,7 @@ __all__ = [
     'batch_norm', 'beam_search_decode', 'conv2d_transpose', 'sequence_expand',
     'lstm_unit', 'reduce_sum', 'reduce_mean', 'reduce_max', 'reduce_min',
     'sequence_first_step', 'sequence_last_step', 'dropout', 'split',
-    'l2_normalize', 'matmul', 'warpctc', 'sequence_reshape'
+    'l2_normalize', 'matmul', 'warpctc', 'sequence_reshape', 'nce'
 ]
 
 
@@ -1971,3 +1972,52 @@ def sequence_reshape(input, new_dim):
         outputs={'Out': [out]},
         attrs={'new_dim': new_dim})
     return out
+
+
+@autodoc
+def nce(input,
+        label,
+        num_total_classes,
+        sample_weight=None,
+        param_attr=None,
+        bias_attr=None,
+        num_neg_samples=None):
+    helper = LayerHelper('nce', **locals())
+    assert isinstance(input, Variable)
+    dim = input.shape[1]
+    assert isinstance(label, Variable)
+    num_true_class = label.shape[1]
+    w = helper.create_parameter(
+        attr=helper.param_attr,
+        shape=[num_total_classes, dim],
+        is_bias=False,
+        dtype=input.dtype)
+    b = helper.create_parameter(
+        attr=helper.bias_attr,
+        shape=[num_total_classes, 1],
+        is_bias=True,
+        dtype=input.dtype)
+    cost = helper.create_tmp_variable(dtype=input.dtype)
+    sample_logits = helper.create_tmp_variable(dtype=input.dtype)
+    sample_labels = helper.create_tmp_variable(dtype=label.dtype)
+
+    attrs = {'num_total_classes': int(num_total_classes)}
+    if num_neg_samples is not None:
+        attrs['num_neg_samples'] = int(num_neg_samples)
+
+    helper.append_op(
+        type='nce',
+        inputs={
+            'Input': input,
+            'Label': label,
+            'Weight': w,
+            'Bias': b,
+            'SampleWeight': sample_weight if sample_weight is not None else []
+        },
+        outputs={
+            'Cost': cost,
+            'SampleLogits': sample_logits,
+            'SampleLabels': sample_labels
+        },
+        attrs=attrs)
+    return cost
diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py
index 709abd6c6a4e0c2aa1b38a135d7424cd6886c966..b14198b231372c6e75434162e3a84be4c9890ece 100644
--- a/python/paddle/v2/fluid/tests/test_layers.py
+++ b/python/paddle/v2/fluid/tests/test_layers.py
@@ -17,8 +17,9 @@ import unittest
 
 import paddle.v2.fluid.layers as layers
 import paddle.v2.fluid.nets as nets
-from paddle.v2.fluid.framework import Program, program_guard
+from paddle.v2.fluid.framework import Program, program_guard, default_main_program
 from paddle.v2.fluid.param_attr import ParamAttr
+import decorators
 
 
 class TestBook(unittest.TestCase):
@@ -225,6 +226,41 @@ class TestBook(unittest.TestCase):
             self.assertIsNotNone(out)
         print(str(program))
 
+    @decorators.prog_scope()
+    def test_nce(self):
+        window_size = 5
+        words = []
+        for i in xrange(window_size):
+            words.append(
+                layers.data(
+                    name='word_{0}'.format(i), shape=[1], dtype='int64'))
+
+        dict_size = 10000
+        label_word = int(window_size / 2) + 1
+
+        embs = []
+        for i in xrange(window_size):
+            if i == label_word:
+                continue
+
+            emb = layers.embedding(
+                input=words[i],
+                size=[dict_size, 32],
+                param_attr='emb.w',
+                is_sparse=True)
+
+            embs.append(emb)
+
+        embs = layers.concat(input=embs, axis=1)
+        loss = layers.nce(input=embs,
+                          label=words[label_word],
+                          num_total_classes=dict_size,
+                          param_attr='nce.w',
+                          bias_attr='nce.b')
+        avg_loss = layers.mean(x=loss)
+        self.assertIsNotNone(avg_loss)
+        print(str(default_main_program()))
+
 
 if __name__ == '__main__':
     unittest.main()