diff --git a/doc/fluid/api/layers.rst b/doc/fluid/api/layers.rst
index d443c49657b92583e527035f49e74462cf41487d..ecbd8191ccf5aa6046e7875fe8afa2ed0105e4a0 100644
--- a/doc/fluid/api/layers.rst
+++ b/doc/fluid/api/layers.rst
@@ -1768,3 +1768,11 @@ reverse
 ..  autofunction:: paddle.fluid.layers.reverse
     :noindex:
 
+.. _api_fluid_layers_rank_loss:
+
+rank_loss
+-------
+
+..  autofunction:: paddle.fluid.layers.rank_loss
+    :noindex:
+
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 2e57c9f8943e0d8efd487866534730fed0880a05..df6becabd166599df9f9963f704e372262104b2d 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -456,52 +456,124 @@ def py_reader(capacity,
               name=None,
               use_double_buffer=True):
     """
-    Create a reader and blocking queue for data feeding in Python
+    Create a Python reader for data feeding in Python
 
-    This layer returns a Reader Variable and a BlockingQueue.
-    The BlockingQueue provides `push()` method to push a `LoDTensorArray` 
-    object into the queue in Python side. In C++ side, the Reader 
-    Variable would invoke `pop()` method of the queue to retrieve the 
-    feeding data. The process of feeding data in Python side and fetching 
-    data in C++ side can run in parallel. The BlockingQueue should be closed 
-    using `close()` method when unused.
+    This layer returns a Reader Variable.
+    The Reader provides :code:`decorate_paddle_reader()` and
+    :code:`decorate_tensor_provider()` to set a Python generator as the data
+    source in Python side. When :code:`Executor::Run()` is invoked in C++
+    side, the data from the generator would be read automatically. Unlike
+    :code:`DataFeeder.feed()`, the data reading process and
+    :code:`Executor::Run()` process can run in parallel using
+    :code:`py_reader`. The :code:`start()` method of the Reader should be
+    called when each pass begins, while the :code:`reset()` method should be
+    called when the pass ends and :code:`fluid.core.EOFException` raises.
+    Note that :code:`Program.clone()` method cannot clone :code:`py_reader`.
 
     Args:
-       use_double_buffer(bool): Whether use double buffer or not.
-       capacity(int): The maximum capacity of the BlockingQueue.
+       capacity(int): The buffer capacity maintained by :code:`py_reader`.
        shapes(list|tuple): List of tuples which declaring data shapes.
        dtypes(list|tuple): List of strs which declaring data type.
        lod_levels(list|tuple): List of ints which declaring data lod_level.
        name(basestring): The prefix Python queue name and Reader name. None will
             be generated automatically.
+       use_double_buffer(bool): Whether use double buffer or not.
 
     Returns:
-       tuple(Variable, BlockingQueue):
-       A Reader Variable from which we can get feeding data.
-
-       A BlockingQueue object for data feeding.
+       Variable: A Reader from which we can get feeding data.
 
     Examples:
 
-        .. code-block:: python
+        1. The basic usage of :code:`py_reader` is as follows:
 
-            reader, queue = fluid.layers.py_reader(
-                                             capacity=10,
-                                             shapes=[[-1,3,224,224], [-1,1]],
-                                             dtypes=['float32', 'int64'])
-            # Via the reader, we can use 'read_file' layer to get data:
-            image, label = fluid.layers.read_file(reader)
-
-            # Via the blocking queue, we can feed data using threads
-            def feed_data(queue, feed_images, feed_labels):
-                for feed_image, feed_label in zip(feed_images, feed_labels):
-                    data = core.LoDTensorArray()
-                    data.append(feed_image)
-                    data.append(feed_label)
-                    queue.push(data)
-
-            thread = threading.Thread(target=feed_data, args=(queue, feed_images, feed_labels))
-            thread.start()
+        >>> import paddle.v2
+        >>> import paddle.fluid as fluid
+        >>> import paddle.dataset.mnist as mnist
+        >>>
+        >>> reader = fluid.layers.py_reader(capacity=64,
+        >>>                                 shapes=[(-1,3,224,224), (-1,1)],
+        >>>                                 dtypes=['float32', 'int64'])
+        >>> reader.decorate_paddle_reader(
+        >>>     paddle.v2.reader.shuffle(paddle.batch(mnist.train())
+        >>>
+        >>> img, label = fluid.layers.read_file(reader)
+        >>> loss = network(img, label) # some network definition
+        >>>
+        >>> fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
+        >>>
+        >>> exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
+        >>> for epoch_id in range(10):
+        >>>     reader.start()
+        >>>     try:
+        >>>         while True:
+        >>>             exe.run(fetch_list=[loss.name])
+        >>>     except fluid.core.EOFException:
+        >>>         reader.reset()
+
+        2. When training and testing are both performed, two different
+        :code:`py_reader` should be created with different names, e.g.:
+
+        >>> import paddle.v2
+        >>> import paddle.fluid as fluid
+        >>> import paddle.dataset.mnist as mnist
+        >>>
+        >>> def network(reader):
+        >>>     img, label = fluid.layers.read_file(reader)
+        >>>     # Here, we omitted the network definition
+        >>>     return loss
+        >>>
+        >>> train_reader = fluid.layers.py_reader(capacity=64,
+        >>>                                       shapes=[(-1,3,224,224), (-1,1)],
+        >>>                                       dtypes=['float32', 'int64'],
+        >>>                                       name='train_reader')
+        >>> train_reader.decorate_paddle_reader(
+        >>>     paddle.v2.reader.shuffle(paddle.batch(mnist.train())
+        >>>
+        >>> test_reader = fluid.layers.py_reader(capacity=32,
+        >>>                                      shapes=[(-1,3,224,224), (-1,1)],
+        >>>                                      dtypes=['float32', 'int64'],
+        >>>                                      name='test_reader')
+        >>> test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512))
+        >>>
+        >>> # Create train_main_prog and train_startup_prog
+        >>> train_main_prog = fluid.Program()
+        >>> train_startup_prog = fluid.Program()
+        >>> with fluid.program_guard(train_main_prog, train_startup_prog):
+        >>>     # Use fluid.unique_name.guard() to share parameters with test program
+        >>>     with fluid.unique_name.guard():
+        >>>         train_loss = network(train_reader) # some network definition
+        >>>         adam = fluid.optimizer.Adam(learning_rate=0.01)
+        >>>         adam.minimize(loss)
+        >>>
+        >>> # Create test_main_prog and test_startup_prog
+        >>> test_main_prog = fluid.Program()
+        >>> test_startup_prog = fluid.Program()
+        >>> with fluid.program_guard(test_main_prog, test_startup_prog):
+        >>>     # Use fluid.unique_name.guard() to share parameters with train program
+        >>>     with fluid.unique_name.guard():
+        >>>         test_loss = network(test_reader)
+        >>>
+        >>> fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog)
+        >>> fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog)
+        >>>
+        >>> train_exe = fluid.ParallelExecutor(use_cuda=True,
+        >>>                 loss_name=train_loss.name, main_program=train_main_prog)
+        >>> test_exe = fluid.ParallelExecutor(use_cuda=True,
+        >>>                 loss_name=test_loss.name, main_program=test_main_prog)
+        >>> for epoch_id in range(10):
+        >>>     train_reader.start()
+        >>>     try:
+        >>>         while True:
+        >>>             train_exe.run(fetch_list=[train_loss.name])
+        >>>     except fluid.core.EOFException:
+        >>>         train_reader.reset()
+        >>>
+        >>>     test_reader.start()
+        >>>     try:
+        >>>         while True:
+        >>>             test_exe.run(fetch_list=[test_loss.name])
+        >>>     except fluid.core.EOFException:
+        >>>         test_reader.reset()
     """
     dtypes = [convert_np_dtype_to_dtype_(dt) for dt in dtypes]
     shape_concat = []
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 4df806216aa3ecf03e81a491a1ffbb4cc47d7b50..5d7f1eadd93a82dc2bdb88c5f5c80e437df4e29f 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -110,6 +110,7 @@ __all__ = [
     'relu',
     'log',
     'crop',
+    'rank_loss',
 ]
 
 
@@ -5282,3 +5283,74 @@ def crop(x, shape=None, offsets=None, name=None):
         outputs={'Out': out},
         attrs=None if len(attrs) == 0 else attrs)
     return out
+
+
+def rank_loss(label, left, right, name=None):
+    """
+    **Rank loss layer for RankNet**
+
+    RankNet(http://icml.cc/2015/wp-content/uploads/2015/06/icml_ranking.pdf)
+    is a pairwise ranking model with a training sample consisting of a pair
+    of documents, A and B. Label P indicates whether A is ranked higher than B
+    or not:
+ 
+    P = {0, 1} or {0, 0.5, 1}, where 0.5 means that there is no information
+    about the rank of the input pair.
+    
+    Rank loss layer takes three inputs: left (o_i), right (o_j) and
+    label (P_{i,j}). The inputs respectively represent RankNet's output scores
+    for documents A and B and the value of label P. The following equation
+    computes rank loss C_{i,j} from the inputs:
+    
+    $$
+      C_{i,j} = -\tilde{P_{ij}} * o_{i,j} + \log(1 + e^{o_{i,j}}) \\
+      o_{i,j} =  o_i - o_j  \\
+      \tilde{P_{i,j}} = \left \{0, 0.5, 1 \right \} \ or \ \left \{0, 1 \right \}
+    $$
+    
+    Rank loss layer takes batch inputs with size batch_size (batch_size >= 1).   
+ 
+    Args:
+        label (Variable): Indicats whether A ranked higher than B or not.
+        left (Variable): RankNet's output score for doc A.
+        right (Variable): RankNet's output score for doc B.
+        name(str|None): A name for this layer(optional). If set None, the layer
+                        will be named automatically.
+
+    Returns:
+        list: The value of rank loss.
+
+    Raises:
+        ValueError: Any of label, left, and right is not a variable.
+
+    Examples:
+
+        .. code-block:: python
+
+            label = fluid.layers.data(name="label", shape=[4, 1], dtype="float32")
+            left = fluid.layers.data(name="left", shape=[4, 1], dtype="float32")
+            right = fluid.layers.data(name="right", shape=[4, 1], dtype="float32")
+            out = fluid.layers.rank_loss(label, left, right)
+
+
+    """
+    helper = LayerHelper('rank_loss', **locals())
+
+    if not (isinstance(label, Variable)):
+        raise ValueError("The label should be a Variable")
+
+    if not (isinstance(left, Variable)):
+        raise ValueError("The left should be a Variable")
+
+    if not (isinstance(right, Variable)):
+        raise ValueError("The right should be a Variable")
+
+    out = helper.create_tmp_variable("float32")
+
+    helper.append_op(
+        type='rank_loss',
+        inputs={"Label": label,
+                "Left": left,
+                "Right": right},
+        outputs={'Out': out})
+    return out
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index 6b1f206ea2f5a6226cfdb01c70a8ce4646ae4788..ab2ab24f354c1fbdc8b5221061db56a8d8a48689 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -443,6 +443,28 @@ class TestBook(unittest.TestCase):
             self.assertIsNotNone(ids)
         print(str(program))
 
+    def test_rank_loss(self):
+        program = Program()
+        with program_guard(program):
+            label = layers.data(
+                name='label',
+                append_batch_size=False,
+                shape=[16, 1],
+                dtype="float32")
+            left = layers.data(
+                name='left',
+                append_batch_size=False,
+                shape=[16, 1],
+                dtype="float32")
+            right = layers.data(
+                name='right',
+                append_batch_size=False,
+                shape=[16, 1],
+                dtype="float32")
+            out = layers.rank_loss(label, left, right, name="rank_loss")
+            self.assertIsNotNone(out)
+        print(str(program))
+
 
 if __name__ == '__main__':
     unittest.main()