diff --git a/paddle/fluid/operators/lod_reset_op.cc b/paddle/fluid/operators/lod_reset_op.cc
index 6a66297cb843ead1a507a6867c1c562224861cbf..6599e183efcfb617d13c145c6eceb5cdb552adf9 100644
--- a/paddle/fluid/operators/lod_reset_op.cc
+++ b/paddle/fluid/operators/lod_reset_op.cc
@@ -22,17 +22,16 @@ class LoDResetOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext *ctx) const override {
-    // input check
     PADDLE_ENFORCE(ctx->HasInput("X"),
                    "Input(X) of LoDResetOp should not be null.");
     PADDLE_ENFORCE(ctx->HasOutput("Out"),
                    "Output(Out) of LoDResetOp should not be null.");
-    // If target LoD is not set form Input(), then it must be set from Attr().
-    if (!ctx->HasInput("TargetLoD")) {
+
+    if (!ctx->HasInput("Y")) {
       auto level0 = ctx->Attrs().Get<std::vector<int>>("target_lod");
-      PADDLE_ENFORCE(level0.size() > 1,
-                     "Target LoD is not found, should be set to be a valid one "
-                     "through Input() or Attr().");
+      PADDLE_ENFORCE_GT(level0.size(), 1,
+                        "If Input(Y) is not provided, the target lod should be "
+                        "specified by attribute `target_lod`.");
     }
     ctx->SetOutputDim("Out", ctx->GetInputDim("X"));
   }
@@ -50,36 +49,42 @@ class LoDResetOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   LoDResetOpMaker(OpProto *proto, OpAttrChecker *op_checker)
       : OpProtoAndCheckerMaker(proto, op_checker) {
-    AddInput("X", "(LoDTensor) The input tensor of lod_reset operator.");
-    AddInput("TargetLoD",
-             "(Tensor, optional) The target level 0 LoD from Input().")
+    AddInput("X",
+             "(Tensor, LoDTensor) Input variable of LoDResetOp which "
+             "could be a Tensor or LoDTensor, where the data of output "
+             "variable inherits from.");
+    AddInput("Y",
+             "(Tensor, LoDTensor, optional) If provided, lod of Input(Y) would "
+             "be considered as the target lod first, otherwise data of "
+             "Input(Y) would be considered as the target lod.")
         .AsDispensable();
-    AddOutput("Out", "(LoDTensor) The output tensor of lod_reset operator.");
+    AddOutput("Out",
+              "(LoDTensor) Output variable of LoDResetOp which should be a "
+              "LoDTensor.");
     AddAttr<std::vector<int>>("target_lod",
                               "The target level 0 LoD from Attr().")
         .SetDefault(std::vector<int>{});
     AddComment(R"DOC(LoDReset operator
 
-Reset LoD of Input(X) into a new one specified by Input(TargetLoD) or
-Attr(target_lod), or set LoD for Input(X) if it doesn't have one.
-Currently the lod_reset operator only supports the reset of level 0 LoD.
-At least one of Input(TargetLoD) and Attr(target_lod) must be set,
-and if both of them are set, Input(TargetLoD) will be chosen as the
-target LoD.
+Set LoD of `X` to a new one specified by `Y` or attribute `target_lod`. When `Y`
+provided, `Y.lod` would be considered as target LoD first, otherwise `Y.data`
+would be considered as target LoD. If `Y` is not provided, target LoD should be
+specified by attribute `target_lod`. If target LoD is specified by `Y.data` or
+`target_lod`, only one level LoD is supported.
 
 An example:
-Given a float LoDTensor X with shape (6, 1), its transpose form represents
-
-    [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
 
-with LoD = [[0, 2, 5, 6]] and the three (transposed) sequences look like
+Given a 1-level LoDTensor input(X)
+    X.lod =  [[ 0,      2,                   5     6 ]]
+    X.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
+    X.dims = [6, 1]
 
-    [1.0, 2.0], [3.0, 4.0, 5.0], [6.0].
+target_lod: [0, 4, 6]
 
-If target LoD = [0, 4, 6], the lod_reset operator will reset the LoD and
-the sequences that the LoDTensor Output(Out) contains becomes:
-
-    [1.0, 2.0, 3.0, 4.0], [5.0, 6.0].
+then we get an 1-level LoDTensor
+    Out.lod =  [[ 0,                   4,            6 ]]
+    Out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
+    Out.dims = [6, 1]
 
 )DOC");
   }
@@ -90,10 +95,16 @@ class LoDResetGradOp : public framework::OperatorWithKernel {
   using framework::OperatorWithKernel::OperatorWithKernel;
 
   void InferShape(framework::InferShapeContext *ctx) const override {
-    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) shouldn't be null.");
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of LoDResetGradOp should not be null.");
     PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
-                   "Input(Out@GRAD) shouldn't be null.");
-    ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
+                   "Input(Out@Grad) of LoDResetGradOp should not be null.");
+
+    auto x_grad_name = framework::GradVarName("X");
+    if (ctx->HasOutput(x_grad_name)) {
+      ctx->SetOutputDim(x_grad_name, ctx->GetInputDim("X"));
+      ctx->ShareLoD("X", /*->*/ x_grad_name);
+    }
   }
 
  protected:
@@ -111,9 +122,13 @@ class LoDResetGradOp : public framework::OperatorWithKernel {
 namespace ops = paddle::operators;
 REGISTER_OP(lod_reset, ops::LoDResetOp, ops::LoDResetOpMaker, lod_reset_grad,
             ops::LoDResetGradOp);
-REGISTER_OP_CPU_KERNEL(lod_reset,
-                       ops::LoDResetKernel<paddle::platform::CPUPlace, float>,
-                       ops::LoDResetKernel<paddle::platform::CPUPlace, double>);
+REGISTER_OP_CPU_KERNEL(
+    lod_reset, ops::LoDResetKernel<paddle::platform::CPUPlace, float>,
+    ops::LoDResetKernel<paddle::platform::CPUPlace, double>,
+    ops::LoDResetKernel<paddle::platform::CPUPlace, int>,
+    ops::LoDResetKernel<paddle::platform::CPUPlace, int64_t>);
 REGISTER_OP_CPU_KERNEL(
     lod_reset_grad, ops::LoDResetGradKernel<paddle::platform::CPUPlace, float>,
-    ops::LoDResetGradKernel<paddle::platform::CPUPlace, double>);
+    ops::LoDResetGradKernel<paddle::platform::CPUPlace, double>,
+    ops::LoDResetGradKernel<paddle::platform::CPUPlace, int>,
+    ops::LoDResetGradKernel<paddle::platform::CPUPlace, int64_t>);
diff --git a/paddle/fluid/operators/lod_reset_op.cu b/paddle/fluid/operators/lod_reset_op.cu
index b0e87a851a77a1cc98d419a63d4d9e5e1b9dd163..888d4c12eb4e3f4fd94d8dd4178c59acd0abb23b 100644
--- a/paddle/fluid/operators/lod_reset_op.cu
+++ b/paddle/fluid/operators/lod_reset_op.cu
@@ -18,8 +18,12 @@ namespace ops = paddle::operators;
 
 REGISTER_OP_CUDA_KERNEL(
     lod_reset, ops::LoDResetKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::LoDResetKernel<paddle::platform::CUDADeviceContext, double>);
+    ops::LoDResetKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::LoDResetKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::LoDResetKernel<paddle::platform::CUDADeviceContext, int64_t>);
 REGISTER_OP_CUDA_KERNEL(
     lod_reset_grad,
     ops::LoDResetGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::LoDResetGradKernel<paddle::platform::CUDADeviceContext, double>);
+    ops::LoDResetGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::LoDResetGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::LoDResetGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
diff --git a/paddle/fluid/operators/lod_reset_op.h b/paddle/fluid/operators/lod_reset_op.h
index 8186d4f8262101edc723af390eee1aec4fa6f3a5..99f01c2a255ade81421c2bba95ff3d38ced6f87c 100644
--- a/paddle/fluid/operators/lod_reset_op.h
+++ b/paddle/fluid/operators/lod_reset_op.h
@@ -26,35 +26,46 @@ class LoDResetKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const {
     auto* out = ctx.Output<framework::LoDTensor>("Out");
     auto* in = ctx.Input<framework::LoDTensor>("X");
-    auto* lod_t = ctx.Input<framework::Tensor>("TargetLoD");
+    auto* lod_t = ctx.Input<framework::LoDTensor>("Y");
+
+    out->ShareDataWith(*in);
 
     std::vector<int> level0;
     if (lod_t) {
-      auto* lod = lod_t->data<int>();
-      if (platform::is_gpu_place(ctx.GetPlace())) {
-        framework::Tensor lod_cpu;
-        framework::TensorCopy(*lod_t, platform::CPUPlace(),
-                              ctx.device_context(), &lod_cpu);
-        lod = lod_cpu.data<int>();
+      if (lod_t->lod().size() > 0) {
+        auto y_lod = lod_t->lod();
+        auto last_level = y_lod[y_lod.size() - 1];
+        PADDLE_ENFORCE_EQ(last_level.back(), in->dims()[0],
+                          "Last value of `Y`'s last level LoD should be equal "
+                          "to the first dimension of `X`");
+        out->set_lod(y_lod);
+        return;  // early return, since lod already set
+      } else {
+        auto* lod = lod_t->data<int>();
+        if (platform::is_gpu_place(ctx.GetPlace())) {
+          framework::Tensor lod_cpu;
+          framework::TensorCopy(*lod_t, platform::CPUPlace(),
+                                ctx.device_context(), &lod_cpu);
+          lod = lod_cpu.data<int>();
+        }
+        level0 = std::vector<int>(lod, lod + lod_t->numel());
       }
-      level0 = std::vector<int>(lod, lod + lod_t->numel());
     } else {
       level0 = ctx.Attr<std::vector<int>>("target_lod");
     }
 
-    PADDLE_ENFORCE(level0.size() > 1UL,
-                   "The size of target LoD should be greater than 1.");
-    PADDLE_ENFORCE(level0[0] == 0,
-                   "Target LoD should be a vector starting from 0.");
-    PADDLE_ENFORCE(level0.back() == in->dims()[0],
-                   "Target LoD should be a vector end with the "
-                   "first dimension of Input(X).");
+    PADDLE_ENFORCE_GT(level0.size(), 1UL,
+                      "Size of target LoD should be greater than 1.");
+    PADDLE_ENFORCE_EQ(level0[0], 0,
+                      "Target LoD should be a vector starting from 0.");
+    PADDLE_ENFORCE_EQ(level0.back(), in->dims()[0],
+                      "Target LoD should be a vector end with the "
+                      "first dimension of Input(X).");
     for (size_t i = 0; i < level0.size() - 1; ++i) {
       PADDLE_ENFORCE(level0[i + 1] > level0[i],
                      "Target LoD should be an ascending vector.");
     }
 
-    out->ShareDataWith(*in);
     // cast level0 to size_t
     std::vector<size_t> ulevel0(level0.size(), 0);
     std::transform(level0.begin(), level0.end(), ulevel0.begin(),
diff --git a/python/paddle/fluid/tests/unittests/test_lod_reset_op.py b/python/paddle/fluid/tests/unittests/test_lod_reset_op.py
index 3bf8230f8748dd87ec3c85b0cbd78df2e695a96b..6b6d4c824aeae319dacf224408ce96a0d9c5bb35 100644
--- a/python/paddle/fluid/tests/unittests/test_lod_reset_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_reset_op.py
@@ -42,7 +42,7 @@ class TestLodResetOpByInput(OpTest):
         target_lod_0 = [0, 4, 7, 10]
         self.inputs = {
             'X': (x, lod),
-            'TargetLoD': np.array([target_lod_0]).astype('int32')
+            'Y': np.array([target_lod_0]).astype('int32')
         }
         self.outputs = {'Out': (x, [target_lod_0])}
 
@@ -50,7 +50,7 @@ class TestLodResetOpByInput(OpTest):
         self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(["X"], "Out", no_grad_set=set("TargetLoD"))
+        self.check_grad(["X"], "Out", no_grad_set=set("Y"))
 
 
 class TestLodResetOpBoth(OpTest):
@@ -62,7 +62,7 @@ class TestLodResetOpBoth(OpTest):
         target_lod_0_in = [0, 4, 7, 10]
         self.inputs = {
             'X': (x, lod),
-            'TargetLoD': np.array(target_lod_0_in).astype('int32')
+            'Y': np.array(target_lod_0_in).astype('int32')
         }
         self.attrs = {'target_lod': target_lod_0_attr}
         self.outputs = {'Out': (x, [target_lod_0_in])}
@@ -71,7 +71,24 @@ class TestLodResetOpBoth(OpTest):
         self.check_output()
 
     def test_check_grad(self):
-        self.check_grad(["X"], "Out", no_grad_set=set("TargetLoD"))
+        self.check_grad(["X"], "Out", no_grad_set=set("Y"))
+
+
+class TestLodResetOpYIsLoDTensor(OpTest):
+    def setUp(self):
+        self.op_type = "lod_reset"
+        x = np.random.random((10, 20)).astype("float32")
+        lod = [[0, 3, 5, 10]]
+        y = np.random.random((10, 10)).astype("float32")
+        target_lod_0 = [[0, 4, 7, 10]]
+        self.inputs = {'X': (x, lod), 'Y': (y, target_lod_0)}
+        self.outputs = {'Out': (x, target_lod_0)}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad(self):
+        self.check_grad(["X"], "Out", no_grad_set=set("Y"))
 
 
 if __name__ == '__main__':