diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc
index 461e5bd2d325d38a8bdb9dc29a577b8e58bd877a..f32b8896d41295fb31a3b8751f002a19cefb737a 100644
--- a/paddle/fluid/operators/lookup_table_op.cc
+++ b/paddle/fluid/operators/lookup_table_op.cc
@@ -34,8 +34,9 @@ class LookupTableOp : public framework::OperatorWithKernel {
     auto ids_dims = ctx->GetInputDim("Ids");
 
     auto ids_var_type = ctx->GetInputsVarType("Ids").front();
-    // ids_var_types also can be LOD_TENSOR_ARRAY, it's used as concat_rows.
-    // Maybe near future we will add concat_rows op.
+    // lookup_table and concat_rows use the same InferShape, for lookup_table,
+    // ids_var_type should be LoDTensor, for concat_rows, it should be
+    // SelectedRows.
     if (ids_var_type == framework::proto::VarType::LOD_TENSOR) {
       PADDLE_ENFORCE_EQ(ids_dims.size(), 2);
       PADDLE_ENFORCE_EQ(ids_dims[1], 1);
@@ -90,6 +91,44 @@ or not. And the output only shares the LoD information with input Ids.
   }
 };
 
+class ConcatRowsOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ConcatRowsOpMaker(OpProto* proto, OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(proto, op_checker) {
+    AddInput("W",
+             "(Tensor) The input tensor of concat_rows operator. "
+             "The rank of this tensor is 2.");
+    AddInput(
+        "Ids",
+        "(SelectedRows) The rows of Ids contains the index to be looked up "
+        "in W.");
+    AddOutput("Out",
+              "(SelectedRows or Tensor) The result of concatenating, which "
+              "have the same type as W.");
+    AddAttr<bool>("is_sparse",
+                  "(boolean, default true) This attribution is invalid, it's "
+                  "only used by `Lookup Table Operator`.")
+        .SetDefault(true);
+    AddAttr<int64_t>("padding_idx",
+                     "(int64, default -1) "
+                     "If the value is -1, it makes no effect to lookup. "
+                     "Otherwise the given value indicates padding the output "
+                     "with zeros whenever lookup encounters it in Ids.")
+        .SetDefault(-1);
+
+    AddComment(R"DOC(
+ConcatRows Operator.
+
+This operator is used to perform lookups on the W(dense tensor) according to
+rows contained by Idx(sparse tensor), then concatenates them into a sparse
+tensor or dense tensor.
+
+The type of Ids(Input) is SelectedRows.
+
+)DOC");
+  }
+};
+
 class LookupTableOpGradDescMaker
     : public framework::DefaultGradOpDescMaker<true> {
   using ::paddle::framework::DefaultGradOpDescMaker<
@@ -150,3 +189,8 @@ REGISTER_OP_CPU_KERNEL(lookup_table, ops::LookupTableKernel<float>,
                        ops::LookupTableKernel<double>);
 REGISTER_OP_CPU_KERNEL(lookup_table_grad, ops::LookupTableGradKernel<float>,
                        ops::LookupTableGradKernel<double>);
+
+// concat_rows is used by regularization and it doesn't have gradient operation.
+REGISTER_OPERATOR(concat_rows, ops::LookupTableOp, ops::ConcatRowsOpMaker);
+REGISTER_OP_CPU_KERNEL(concat_rows, ops::LookupTableKernel<float>,
+                       ops::LookupTableKernel<double>);
diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu
index 125e0f94415827127b0264241f2b4870ff409e06..b880d86cf663a94140edd3278185407f0ec5d34a 100644
--- a/paddle/fluid/operators/lookup_table_op.cu
+++ b/paddle/fluid/operators/lookup_table_op.cu
@@ -79,20 +79,17 @@ class LookupTableCUDAKernel : public framework::OpKernel<T> {
 
     int64_t* ids;
     int64_t K;
-    framework::Tensor* output_t;
+    auto* output_t = context.Output<Tensor>("Out");  // float tensor;
 
-    // ids_var_types also can be LOD_TENSOR_ARRAY, it's used as concat_rows.
-    // Maybe near future we will add concat_rows op.
-    if (ids_var->IsType<framework::LoDTensor>()) {
+    // lookup_table and concat_rows use the same kernel, for lookup_table,
+    // ids_var_type should be LoDTensor, for concat_rows, ids_var_type and
+    // out_var_type should be SelectedRows.
+    if (ids_var->IsType<LoDTensor>()) {
       auto* ids_t = context.Input<LoDTensor>("Ids");
-      output_t = context.Output<LoDTensor>("Out");  // float tensor
       ids = const_cast<int64_t*>(ids_t->data<int64_t>());
       K = ids_t->numel();
-    } else if (ids_var->IsType<framework::SelectedRows>()) {
-      auto* ids_t = context.Input<framework::SelectedRows>("Ids");
-      output_t = const_cast<framework::Tensor*>(
-          &(context.Output<framework::SelectedRows>("Out")
-                ->value()));  // float tensor
+    } else if (ids_var->IsType<SelectedRows>()) {
+      auto* ids_t = context.Input<SelectedRows>("Ids");
       ids = const_cast<int64_t*>(ids_t->rows().CUDAData(context.GetPlace()));
       K = ids_t->rows().size();
       output_t->Resize({K, table_t->dims()[1]});
@@ -194,3 +191,6 @@ REGISTER_OP_CUDA_KERNEL(lookup_table, ops::LookupTableCUDAKernel<float>,
 REGISTER_OP_CUDA_KERNEL(lookup_table_grad,
                         ops::LookupTableGradCUDAKernel<float>,
                         ops::LookupTableGradCUDAKernel<double>);
+
+REGISTER_OP_CUDA_KERNEL(concat_rows, ops::LookupTableCUDAKernel<float>,
+                        ops::LookupTableCUDAKernel<double>);
diff --git a/paddle/fluid/operators/lookup_table_op.h b/paddle/fluid/operators/lookup_table_op.h
index b2439c68371a8949922855604fe92c11a74c6ec9..32a0085e06fbe51845cf449605341604dacdc7d2 100644
--- a/paddle/fluid/operators/lookup_table_op.h
+++ b/paddle/fluid/operators/lookup_table_op.h
@@ -35,19 +35,16 @@ class LookupTableKernel : public framework::OpKernel<T> {
 
     int64_t* ids;
     int64_t ids_numel;
-    Tensor* output_t;
-
-    // ids_var_types also can be LOD_TENSOR_ARRAY, it's used as concat_rows.
-    // Maybe near future we will add concat_rows op.
+    auto* output_t = context.Output<Tensor>("Out");
+    // lookup_table and concat_rows use the same kernel, for lookup_table,
+    // ids_var_type should be LoDTensor, for concat_rows, ids_var_type and
+    // out_var_type should be SelectedRows.
     if (ids_var->IsType<LoDTensor>()) {
       auto* ids_t = context.Input<LoDTensor>("Ids");
-      output_t = context.Output<LoDTensor>("Out");
       ids = const_cast<int64_t*>(ids_t->data<int64_t>());
       ids_numel = ids_t->numel();
     } else if (ids_var->IsType<SelectedRows>()) {
       auto* ids_t = context.Input<SelectedRows>("Ids");
-      output_t =
-          const_cast<Tensor*>(&(context.Output<SelectedRows>("Out")->value()));
       ids = const_cast<int64_t*>(ids_t->rows().data());
       ids_numel = ids_t->rows().size();
       output_t->Resize({ids_numel, table_t->dims()[1]});
diff --git a/python/paddle/fluid/tests/unittests/test_concat_rows_op.py b/python/paddle/fluid/tests/unittests/test_concat_rows_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..6dd25c2e02756fada9e2e092bbdb1e207a8a6354
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_concat_rows_op.py
@@ -0,0 +1,76 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+import paddle.fluid.core as core
+from paddle.fluid.op import Operator
+from op_test import OpTest
+
+
+class TestConcatRowsOp(OpTest):
+    def check_with_place(self, place):
+        scope = core.Scope()
+
+        # create and initialize Grad Variable
+        height = 10
+        rows = [0, 4, 4, 7]
+        row_numel = 12
+
+        ids_selected_rows = scope.var('Ids').get_selected_rows()
+        ids_selected_rows.set_height(height)
+        ids_selected_rows.set_rows(rows)
+        np_array = np.ones((len(rows), row_numel)).astype("float32")
+        ids_tensor = ids_selected_rows.get_tensor()
+        ids_tensor.set(np_array, place)
+
+        # create and initialize W Variable
+        W = scope.var('W').get_tensor()
+        W_array = np.full((height, row_numel), 1.0).astype("float32")
+        for i in range(height):
+            W_array[i] *= i
+        W.set(W_array, place)
+
+        Out = scope.var('Out').get_selected_rows()
+        Out_array = np.full((len(rows), row_numel), -1.0).astype("float32")
+        Out.set_height(height)
+        Out.set_rows(rows)
+        Out_tensor = Out.get_tensor()
+        Out_tensor.set(Out_array, place)
+
+        # create and run concat_rows_op operator
+        concat_rows_op = Operator(
+            "concat_rows",
+            W='W',
+            Ids='Ids',
+            Out='Out',
+            attrs={'is_sparse': True})
+        concat_rows_op.run(scope, place)
+
+        # get and compare result
+        result_array = np.array(Out_tensor)
+
+        for idx, row in enumerate(rows):
+            assert (row == result_array[idx]).all()
+
+    def test_concat_rows(self):
+        places = [core.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(core.CUDAPlace(0))
+        for place in places:
+            self.check_with_place(place)
+
+
+if __name__ == "__main__":
+    unittest.main()