Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into update-api-reference-1

bf3ff5b0 · qiaolongfei · 8f59d79d · 3a25f21e · bf3ff5b0 · bf3ff5b0
86 changed file
--- a/benchmark/fluid/models/machine_translation.py
+++ b/benchmark/fluid/models/machine_translation.py
@@ -173,21 +173,6 @@ def seq_to_seq_net(embedding_dim, encoder_size, decoder_size, source_dict_dim,
        return avg_cost, feeding_list


-def to_lodtensor(data, place):
-    seq_lens = [len(seq) for seq in data]
-    cur_len = 0
-    lod = [cur_len]
-    for l in seq_lens:
-        cur_len += l
-        lod.append(cur_len)
-    flattened_data = np.concatenate(data, axis=0).astype("int64")
-    flattened_data = flattened_data.reshape([len(flattened_data), 1])
-    lod_t = core.LoDTensor()
-    lod_t.set(flattened_data, place)
-    lod_t.set_lod([lod])
-    return lod_t, lod[-1]
-
-
 def lodtensor_to_ndarray(lod_tensor):
    dims = lod_tensor.get_dims()
    ndarray = np.zeros(shape=dims).astype('float32')

--- a/benchmark/fluid/models/stacked_dynamic_lstm.py
+++ b/benchmark/fluid/models/stacked_dynamic_lstm.py
@@ -125,18 +125,3 @@ def get_model(args):
        batch_size=args.batch_size)

    return loss, inference_program, adam, train_reader, test_reader, batch_acc
-
-
-def to_lodtensor(data, place):
-    seq_lens = [len(seq) for seq in data]
-    cur_len = 0
-    lod = [cur_len]
-    for l in seq_lens:
-        cur_len += l
-        lod.append(cur_len)
-    flattened_data = numpy.concatenate(data, axis=0).astype("int64")
-    flattened_data = flattened_data.reshape([len(flattened_data), 1])
-    res = fluid.LoDTensor()
-    res.set(flattened_data, place)
-    res.set_lod([lod])
-    return res
--- a/doc/fluid/api/gen_doc.sh
+++ b/doc/fluid/api/gen_doc.sh
 #!/bin/bash
-python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler > layers.rst
+python gen_doc.py layers --submodules control_flow device io nn ops tensor detection learning_rate_scheduler metric > layers.rst

 for module in data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
 do

--- a/doc/fluid/api/initializer.rst
+++ b/doc/fluid/api/initializer.rst
@@ -33,6 +33,13 @@ Xavier
    :members:
    :noindex:

+Bilinear
+--------
+
+..  autoclass:: paddle.fluid.initializer.Bilinear
+    :members:
+    :noindex:
+
 force_init_on_cpu
 -----------------

@@ -73,3 +80,10 @@ XavierInitializer
    :members:
    :noindex:

+BilinearInitializer
+-------------------
+
+..  autoclass:: paddle.fluid.initializer.BilinearInitializer
+    :members:
+    :noindex:
+
--- a/doc/fluid/api/io.rst
+++ b/doc/fluid/api/io.rst
@@ -59,3 +59,39 @@ get_inference_program
 ..  autofunction:: paddle.fluid.io.get_inference_program
    :noindex:

+save_checkpoint
+---------------
+
+..  autofunction:: paddle.fluid.io.save_checkpoint
+    :noindex:
+
+load_checkpoint
+---------------
+
+..  autofunction:: paddle.fluid.io.load_checkpoint
+    :noindex:
+
+clean_checkpoint
+----------------
+
+..  autofunction:: paddle.fluid.io.clean_checkpoint
+    :noindex:
+
+load_persist_vars_without_grad
+------------------------------
+
+..  autofunction:: paddle.fluid.io.load_persist_vars_without_grad
+    :noindex:
+
+save_persist_vars_without_grad
+------------------------------
+
+..  autofunction:: paddle.fluid.io.save_persist_vars_without_grad
+    :noindex:
+
+get_latest_checkpoint_serial
+----------------------------
+
+..  autofunction:: paddle.fluid.io.get_latest_checkpoint_serial
+    :noindex:
+
--- a/doc/fluid/api/layers.rst
+++ b/doc/fluid/api/layers.rst
@@ -181,6 +181,12 @@ Print
 ..  autofunction:: paddle.fluid.layers.Print
    :noindex:

+is_empty
+--------
+
+..  autofunction:: paddle.fluid.layers.is_empty
+    :noindex:
+
 device
 ======

@@ -219,6 +225,12 @@ Send
 ..  autofunction:: paddle.fluid.layers.Send
    :noindex:

+Recv
+----
+
+..  autofunction:: paddle.fluid.layers.Recv
+    :noindex:
+
 open_recordio_file
 ------------------

@@ -255,6 +267,25 @@ double_buffer
 ..  autofunction:: paddle.fluid.layers.double_buffer
    :noindex:

+random_data_generator
+---------------------
+
+..  autofunction:: paddle.fluid.layers.random_data_generator
+    :noindex:
+
+Preprocessor
+------------
+
+..  autoclass:: paddle.fluid.layers.Preprocessor
+    :members:
+    :noindex:
+
+load
+----
+
+..  autofunction:: paddle.fluid.layers.load
+    :noindex:
+
 nn
 ==

@@ -399,10 +430,9 @@ conv2d_transpose
 conv3d_transpose
 ----------------

-..  autofunction:: paddle.fluid.layers.conv2d_transpose
+..  autofunction:: paddle.fluid.layers.conv3d_transpose
    :noindex:

-
 sequence_expand
 ---------------

@@ -613,6 +643,48 @@ roi_pool
 ..  autofunction:: paddle.fluid.layers.roi_pool
    :noindex:

+dice_loss
+---------
+
+..  autofunction:: paddle.fluid.layers.dice_loss
+    :noindex:
+
+image_resize
+------------
+
+..  autofunction:: paddle.fluid.layers.image_resize
+    :noindex:
+
+image_resize_short
+------------------
+
+..  autofunction:: paddle.fluid.layers.image_resize_short
+    :noindex:
+
+resize_bilinear
+---------------
+
+..  autofunction:: paddle.fluid.layers.resize_bilinear
+    :noindex:
+
+gather
+------
+
+..  autofunction:: paddle.fluid.layers.gather
+    :noindex:
+
+random_crop
+-----------
+
+..  autofunction:: paddle.fluid.layers.random_crop
+    :noindex:
+
+mean_iou
+--------
+
+..  autofunction:: paddle.fluid.layers.mean_iou
+    :noindex:
+
 ops
 ===

@@ -718,12 +790,6 @@ logical_not
 ..  autofunction:: paddle.fluid.layers.logical_not
    :noindex:

-uniform_random
--------------
-
-..  autofunction:: paddle.fluid.layers.uniform_random
-    :noindex:
-
 uniform_random_batch_size_like
 ------------------------------

@@ -742,12 +808,6 @@ gaussian_random_batch_size_like
 ..  autofunction:: paddle.fluid.layers.gaussian_random_batch_size_like
    :noindex:

-cumsum
------
-
-..  autofunction:: paddle.fluid.layers.cumsum
-    :noindex:
-
 scatter
 -------

@@ -760,6 +820,30 @@ sum
 ..  autofunction:: paddle.fluid.layers.sum
    :noindex:

+slice
+-----
+
+..  autofunction:: paddle.fluid.layers.slice
+    :noindex:
+
+polygon_box_transform
+---------------------
+
+..  autofunction:: paddle.fluid.layers.polygon_box_transform
+    :noindex:
+
+shape
+-----
+
+..  autofunction:: paddle.fluid.layers.shape
+    :noindex:
+
+maxout
+------
+
+..  autofunction:: paddle.fluid.layers.maxout
+    :noindex:
+
 sigmoid
 -------

@@ -916,18 +1000,6 @@ stanh
 ..  autofunction:: paddle.fluid.layers.stanh
    :noindex:

-hard_shrink
-----------
-
-..  autofunction:: paddle.fluid.layers.hard_shrink
-    :noindex:
-
-thresholded_relu
----------------
-
-..  autofunction:: paddle.fluid.layers.thresholded_relu
-    :noindex:
-
 hard_sigmoid
 ------------

@@ -940,6 +1012,30 @@ swish
 ..  autofunction:: paddle.fluid.layers.swish
    :noindex:

+uniform_random
+--------------
+
+..  autofunction:: paddle.fluid.layers.uniform_random
+    :noindex:
+
+hard_shrink
+-----------
+
+..  autofunction:: paddle.fluid.layers.hard_shrink
+    :noindex:
+
+cumsum
+------
+
+..  autofunction:: paddle.fluid.layers.cumsum
+    :noindex:
+
+thresholded_relu
+----------------
+
+..  autofunction:: paddle.fluid.layers.thresholded_relu
+    :noindex:
+
 tensor
 ======

@@ -997,6 +1093,18 @@ fill_constant
 ..  autofunction:: paddle.fluid.layers.fill_constant
    :noindex:

+argmin
+------
+
+..  autofunction:: paddle.fluid.layers.argmin
+    :noindex:
+
+argmax
+------
+
+..  autofunction:: paddle.fluid.layers.argmax
+    :noindex:
+
 ones
 ----

@@ -1012,6 +1120,12 @@ zeros
 detection
 =========

+prior_box
+---------
+
+..  autofunction:: paddle.fluid.layers.prior_box
+    :noindex:
+
 multi_box_head
 --------------

@@ -1099,3 +1213,18 @@ noam_decay
 ..  autofunction:: paddle.fluid.layers.noam_decay
    :noindex:

+metric
+======
+
+accuracy
+--------
+
+..  autofunction:: paddle.fluid.layers.accuracy
+    :noindex:
+
+auc
+---
+
+..  autofunction:: paddle.fluid.layers.auc
+    :noindex:
+
--- a/doc/fluid/api/optimizer.rst
+++ b/doc/fluid/api/optimizer.rst
@@ -89,6 +89,13 @@ DecayedAdagradOptimizer
    :members:
    :noindex:

+RMSPropOptimizer
+----------------
+
+..  autoclass:: paddle.fluid.optimizer.RMSPropOptimizer
+    :members:
+    :noindex:
+
 Adadelta
 --------


--- a/doc/fluid/api/profiler.rst
+++ b/doc/fluid/api/profiler.rst
@@ -23,3 +23,15 @@ profiler
 ..  autofunction:: paddle.fluid.profiler.profiler
    :noindex:

+start_profiler
+--------------
+
+..  autofunction:: paddle.fluid.profiler.start_profiler
+    :noindex:
+
+stop_profiler
+-------------
+
+..  autofunction:: paddle.fluid.profiler.stop_profiler
+    :noindex:
+
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@@ -406,6 +406,9 @@ void Executor::EnableMKLDNN(const ProgramDesc& program) {
      }
    }
  }
+#else
+  LOG(WARNING)
+      << "'MKLDNN' is not supported, Please re-compile with WITH_MKLDNN option";
 #endif
 }


--- a/paddle/fluid/framework/lod_tensor.cc
+++ b/paddle/fluid/framework/lod_tensor.cc
@@ -410,5 +410,38 @@ void LoDTensor::MergeLoDTensor(
  }
 }

+LoD ConvertToLengthBasedLoD(const LoD &offset_lod) {
+  LoD length_lod;
+  length_lod.reserve(offset_lod.size());
+  for (size_t lvl = 0; lvl < offset_lod.size(); ++lvl) {
+    std::vector<size_t> level;
+    if (offset_lod[lvl].size() > 0) {
+      level.reserve(offset_lod[lvl].size() - 1);
+    }
+    for (size_t idx = 0; idx < offset_lod[lvl].size() - 1; ++idx) {
+      level.push_back(offset_lod[lvl][idx + 1] - offset_lod[lvl][idx]);
+    }
+    length_lod.push_back(level);
+  }
+  return length_lod;
+}
+
+LoD ConvertToOffsetBasedLoD(const LoD &length_lod) {
+  LoD offset_lod;
+  offset_lod.reserve(length_lod.size());
+  for (size_t lvl = 0; lvl < length_lod.size(); ++lvl) {
+    std::vector<size_t> level;
+    level.reserve(length_lod[lvl].size() + 1);
+    size_t tmp = 0;
+    level.push_back(tmp);
+    for (size_t idx = 0; idx < length_lod[lvl].size(); ++idx) {
+      tmp += length_lod[lvl][idx];
+      level.push_back(tmp);
+    }
+    offset_lod.push_back(level);
+  }
+  return offset_lod;
+}
+
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/lod_tensor.h
+++ b/paddle/fluid/framework/lod_tensor.h
@@ -226,5 +226,19 @@ extern void WriteToRecordIO(recordio::Writer* writer,
 extern std::vector<LoDTensor> ReadFromRecordIO(
    recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx);

+/*
+ * Convert between length-based LoD and offset-based LoD.
+ * The implementation of LoDTensor class use offset-based LoD.
+ * However, we want to expose the more user-friendly length-based
+ * LoD to the Python side instead.
+ *
+ * Example:
+ * If offset_lod = [[0, 2, 3],[0, 3, 5, 9]]
+ * then length_lod = [[2, 1], [3, 2, 4]]
+ */
+LoD ConvertToLengthBasedLoD(const LoD& offset_lod);
+
+LoD ConvertToOffsetBasedLoD(const LoD& length_lod);
+
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/fluid/framework/lod_tensor_test.cc
+++ b/paddle/fluid/framework/lod_tensor_test.cc
@@ -228,6 +228,38 @@ TEST(LoD, CheckAbsLoD) {
  ASSERT_FALSE(CheckAbsLoD(abs_lod0));
 }

+TEST(LoD, ConvertToLengthBasedLoD) {
+  LoD offset_lod;
+  offset_lod.push_back(std::vector<size_t>({0, 2}));
+  offset_lod.push_back(std::vector<size_t>({0, 1, 3}));
+  offset_lod.push_back(std::vector<size_t>({0, 2, 4, 5}));
+
+  LoD length_lod = ConvertToLengthBasedLoD(offset_lod);
+
+  LoD expected;
+  expected.push_back(std::vector<size_t>({2}));
+  expected.push_back(std::vector<size_t>({1, 2}));
+  expected.push_back(std::vector<size_t>({2, 2, 1}));
+
+  EXPECT_EQ(length_lod, expected);
+}
+
+TEST(LoD, ConvertToOffsetBasedLoD) {
+  LoD length_lod;
+  length_lod.push_back(std::vector<size_t>({2}));
+  length_lod.push_back(std::vector<size_t>({1, 2}));
+  length_lod.push_back(std::vector<size_t>({2, 2, 1}));
+
+  LoD offset_lod = ConvertToOffsetBasedLoD(length_lod);
+
+  LoD expected;
+  expected.push_back(std::vector<size_t>({0, 2}));
+  expected.push_back(std::vector<size_t>({0, 1, 3}));
+  expected.push_back(std::vector<size_t>({0, 2, 4, 5}));
+
+  EXPECT_EQ(offset_lod, expected);
+}
+
 template <typename T>
 static void TestRecordIO() {
  LoDTensor tensor;

--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@@ -43,48 +43,29 @@ Scope& Scope::NewScope() const {
 }

 Variable* Scope::Var(const std::string& name) {
-  // acquire the lock when new var under this scope
  std::unique_lock<std::mutex> lock(mutex_);
-  auto* v = FindVarLocally(name);
-  if (v != nullptr) return v;
-
-  v = new Variable();
-  vars_[name].reset(v);
-  VLOG(3) << "Create variable " << name;
-  v->name_ = &(vars_.find(name)->first);
-  return v;
+  return VarInternal(name);
 }

 Variable* Scope::Var(std::string* name) {
-  auto var_name = string::Sprintf("%p.%d", this, vars_.size());
+  std::unique_lock<std::mutex> lock(mutex_);
+  auto new_name = string::Sprintf("%p.%d", this, vars_.size());
  if (name != nullptr) {
-    *name = var_name;
+    *name = new_name;
  }
-  return Var(var_name);
+  return VarInternal(new_name);
 }

 Variable* Scope::FindVar(const std::string& name) const {
-  // acquire the lock when find var
  std::unique_lock<std::mutex> lock(mutex_);
  return FindVarInternal(name);
 }

-Variable* Scope::FindVarInternal(const std::string& name) const {
-  auto var = FindVarLocally(name);
-  if (var != nullptr) {
-    return var;
-  }
-  return (parent_ == nullptr) ? nullptr : parent_->FindVarInternal(name);
-}
-
 const Scope* Scope::FindScope(const Variable* var) const {
-  for (auto& kv : vars_) {
-    if (kv.second.get() == var) {
-      return this;
-    }
-  }
-  return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
+  std::unique_lock<std::mutex> lock(mutex_);
+  return FindScopeInternal(var);
 }
+
 void Scope::DropKids() {
  std::unique_lock<std::mutex> lock(mutex_);
  for (Scope* s : kids_) delete s;
@@ -92,6 +73,7 @@ void Scope::DropKids() {
 }

 std::vector<std::string> Scope::LocalVarNames() const {
+  std::unique_lock<std::mutex> lock(mutex_);
  std::vector<std::string> known_vars;
  known_vars.reserve(this->vars_.size());
  for (auto& p : vars_) {
@@ -127,6 +109,39 @@ void Scope::EraseVars(const std::vector<std::string>& var_names) {

 void Scope::Rename(const std::string& origin_name,
                   const std::string& new_name) const {
+  std::unique_lock<std::mutex> lock(mutex_);
+  RenameInternal(origin_name, new_name);
+}
+
+std::string Scope::Rename(const std::string& origin_name) const {
+  std::unique_lock<std::mutex> lock(mutex_);
+  auto new_name = string::Sprintf("%p.%d", this, vars_.size());
+  RenameInternal(origin_name, new_name);
+  return new_name;
+}
+
+Variable* Scope::VarInternal(const std::string& name) {
+  auto* v = FindVarLocally(name);
+  if (v != nullptr) return v;
+
+  v = new Variable();
+  vars_[name].reset(v);
+  VLOG(3) << "Create variable " << name;
+  v->name_ = &(vars_.find(name)->first);
+  return v;
+}
+
+const Scope* Scope::FindScopeInternal(const Variable* var) const {
+  for (auto& kv : vars_) {
+    if (kv.second.get() == var) {
+      return this;
+    }
+  }
+  return (parent_ == nullptr) ? nullptr : parent_->FindScope(var);
+}
+
+void Scope::RenameInternal(const std::string& origin_name,
+                           const std::string& new_name) const {
  auto origin_it = vars_.find(origin_name);
  PADDLE_ENFORCE(origin_it != vars_.end(),
                 "Cannot find original variable with name %s", origin_name);
@@ -137,10 +152,12 @@ void Scope::Rename(const std::string& origin_name,
  vars_.erase(origin_it);
 }

-std::string Scope::Rename(const std::string& origin_name) const {
-  auto var_name = string::Sprintf("%p.%d", this, vars_.size());
-  Rename(origin_name, var_name);
-  return var_name;
+Variable* Scope::FindVarInternal(const std::string& name) const {
+  auto var = FindVarLocally(name);
+  if (var != nullptr) {
+    return var;
+  }
+  return (parent_ == nullptr) ? nullptr : parent_->FindVar(name);
 }

 Variable* Scope::FindVarLocally(const std::string& name) const {

--- a/paddle/fluid/framework/scope.h
+++ b/paddle/fluid/framework/scope.h
@@ -88,12 +88,20 @@ class Scope {
  // Call Scope::NewScope for a sub-scope.
  explicit Scope(Scope const* parent) : parent_(parent) {}

+  // Called by Var.
+  Variable* VarInternal(const std::string& name);
+
+  // Called by FindScope.
+  const Scope* FindScopeInternal(const Variable* var) const;
+
+  // Called by Rename.
+  void RenameInternal(const std::string& origin_name,
+                      const std::string& new_name) const;
+
  // Called by FindVar recursively.
-  // Caller doesn't own the returned Variable.
  Variable* FindVarInternal(const std::string& name) const;

  // Called by FindVarInternal and Var.
-  // Caller doesn't own the returned Variable.
  Variable* FindVarLocally(const std::string& name) const;

  // Scope in `kids_` are owned by this class.

--- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc
+++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc
@@ -29,6 +29,7 @@ DEFINE_string(data_file, "", "File of input index data.");
 DEFINE_int32(repeat, 100, "Running the inference program repeat times");
 DEFINE_bool(prepare_vars, true, "Prepare variables before executor");
 DEFINE_int32(num_threads, 1, "Number of threads should be used");
+DECLARE_bool(use_mkldnn);

 inline double GetCurrentMs() {
  struct timeval time;
@@ -103,9 +104,9 @@ void ThreadRunInfer(
    const int tid, paddle::framework::Scope* scope,
    const std::vector<std::vector<const paddle::framework::LoDTensor*>>& jobs) {
  // maybe framework:ProgramDesc is not thread-safe
+  paddle::platform::CPUPlace place;
+  paddle::framework::Executor executor(place);
  auto& sub_scope = scope->NewScope();
-  auto place = paddle::platform::CPUPlace();
-  auto executor = paddle::framework::Executor(place);
  auto inference_program =
      paddle::inference::Load(&executor, scope, FLAGS_model_path);

@@ -182,8 +183,8 @@ TEST(inference, nlp) {
    stop_ms = GetCurrentMs();
  } else {
    // 1. Define place, executor, scope
-    auto place = paddle::platform::CPUPlace();
-    auto executor = paddle::framework::Executor(place);
+    paddle::platform::CPUPlace place;
+    paddle::framework::Executor executor(place);

    // 2. Initialize the inference_program and load parameters
    std::unique_ptr<paddle::framework::ProgramDesc> inference_program;

--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@@ -19,18 +19,18 @@ limitations under the License. */
 namespace paddle {
 namespace operators {

-#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT)               \
-  class OP_NAME##OpMaker                                                \
-      : public ::paddle::framework::OpProtoAndCheckerMaker {            \
-   public:                                                              \
-    void Make() override {                                              \
-      AddInput("X", "Input of " #OP_NAME " operator");                  \
-      AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X");   \
-      AddAttr<bool>("use_mkldnn",                                       \
-                    "(bool, default false) Only used in mkldnn kernel") \
-          .SetDefault(false);                                           \
-      AddComment(OP_COMMENT);                                           \
-    }                                                                   \
+#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT)             \
+  class OP_NAME##OpMaker                                              \
+      : public ::paddle::framework::OpProtoAndCheckerMaker {          \
+   public:                                                            \
+    void Make() override {                                            \
+      AddInput("X", "Input of " #OP_NAME " operator");                \
+      AddOutput("Out", "Output of " #OP_NAME " operator").Reuse("X"); \
+      AddAttr<bool>("use_mkldnn",                                     \
+                    "(default false) Only used in mkldnn kernel")     \
+          .SetDefault(false);                                         \
+      AddComment(OP_COMMENT);                                         \
+    }                                                                 \
  }

 #define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE)              \
@@ -112,7 +112,7 @@ $$out = \frac{1}{1 + e^{-x}}$$
 __attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC(
 Logsigmoid Activation Operator

-$$out = \log \frac{1}{1 + e^{-x}}$$
+$$out = \\log \\frac{1}{1 + e^{-x}}$$

 )DOC";

@@ -196,7 +196,7 @@ $out = [x]$
 __attribute__((unused)) constexpr char ReciprocalDoc[] = R"DOC(
 Reciprocal Activation Operator.

-$$out = \frac{1}{x}$$
+$$out = \\frac{1}{x}$$

 )DOC";


--- a/paddle/fluid/operators/crop_op.h
+++ b/paddle/fluid/operators/crop_op.h
@@ -52,7 +52,7 @@ static std::vector<int> GetOffsets(const framework::ExecutionContext& ctx) {
  } else {
    res = ctx.Attr<std::vector<int>>("offsets");
    PADDLE_ENFORCE_EQ(
-        rank, res.size(),
+        rank, static_cast<int>(res.size()),
        "Offsets size should be equal to dimension size of input tensor.");
  }
  return res;

--- a/paddle/fluid/operators/detection/box_coder_op.cc
+++ b/paddle/fluid/operators/detection/box_coder_op.cc
@@ -106,23 +106,36 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
              "and M represents the number of deocded boxes.");

    AddComment(R"DOC(
-Bounding Box Coder Operator.
+
+Bounding Box Coder.
+
 Encode/Decode the target bounding box with the priorbox information.
+
 The Encoding schema described below:
-ox = (tx - px) / pw / pxv
-oy = (ty - py) / ph / pyv
-ow = log(abs(tw / pw)) / pwv 
-oh = log(abs(th / ph)) / phv 
+
+    ox = (tx - px) / pw / pxv
+
+    oy = (ty - py) / ph / pyv
+
+    ow = log(abs(tw / pw)) / pwv 
+
+    oh = log(abs(th / ph)) / phv 
+
 The Decoding schema described below:
-ox = (pw * pxv * tx * + px) - tw / 2
-oy = (ph * pyv * ty * + py) - th / 2
-ow = exp(pwv * tw) * pw + tw / 2
-oh = exp(phv * th) * ph + th / 2
-where tx, ty, tw, th denote the target box's center coordinates, width and
-height respectively. Similarly, px, py, pw, ph denote the priorbox's(anchor)
-center coordinates, width and height. pxv, pyv, pwv, phv denote the variance
-of the priorbox and ox, oy, ow, oh denote the encoded/decoded coordinates,
-width and height.
+
+    ox = (pw * pxv * tx * + px) - tw / 2
+
+    oy = (ph * pyv * ty * + py) - th / 2
+
+    ow = exp(pwv * tw) * pw + tw / 2
+
+    oh = exp(phv * th) * ph + th / 2
+
+where `tx`, `ty`, `tw`, `th` denote the target box's center coordinates, width
+and height respectively. Similarly, `px`, `py`, `pw`, `ph` denote the
+priorbox's (anchor) center coordinates, width and height. `pxv`, `pyv`, `pwv`,
+`phv` denote the variance of the priorbox and `ox`, `oy`, `ow`, `oh` denote the
+encoded/decoded coordinates, width and height.
 )DOC");
  }
 };

--- a/paddle/fluid/operators/elementwise_mul_op.cc
+++ b/paddle/fluid/operators/elementwise_mul_op.cc
@@ -15,7 +15,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/elementwise_mul_op.h"
 #include "paddle/fluid/operators/elementwise_op.h"
 namespace ops = paddle::operators;
-REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\odot\\ Y");
+REGISTER_ELEMWISE_OP(elementwise_mul, "Mul", "Out = X \\\\odot Y");
 REGISTER_OP_CPU_KERNEL(
    elementwise_mul,
    ops::ElementwiseMulKernel<paddle::platform::CPUDeviceContext, float>,

--- a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
+++ b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
@@ -36,11 +36,12 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
  void Apply() override {
    AddAttr<float>("mean",
                   "(float, default 0.0) "
-                   "mean of random tensor.")
+                   "The mean (or center) of the gaussian distribution.")
        .SetDefault(.0f);
    AddAttr<float>("std",
                   "(float, default 1.0) "
-                   "std of random tensor.")
+                   "The standard deviation (std, or spread) of the "
+                   "gaussian distribution.")
        .SetDefault(1.0f);
    AddAttr<int>("seed",
                 "(int, default 0) "
@@ -55,9 +56,11 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
        .SetDefault(framework::proto::VarType::FP32);

    AddComment(R"DOC(
-GaussianRandom Operator.

 Used to initialize tensors with gaussian random generator.
+The defalut mean of the distribution is 0. and defalut standard
+deviation (std) of the distribution is 1.. Uers can set mean and std
+by input arguments.
 )DOC");
  }
 };

--- a/paddle/fluid/operators/nce_op.cc
+++ b/paddle/fluid/operators/nce_op.cc
@@ -128,8 +128,10 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
                              "user should avoid setting this attribute.")
        .SetDefault({});
    AddComment(R"DOC(
-Compute and return the noise-contrastive estimation training loss.
-See [Noise-contrastive estimation: A new estimation principle for unnormalized statistical models](http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf).
+Compute and return the noise-contrastive estimation training loss. See 
+`Noise-contrastive estimation: A new estimation principle for unnormalized 
+statistical models 
+ <http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf>`_.
 By default this operator uses a uniform distribution for sampling.
 )DOC");
  }

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -144,28 +144,74 @@ PYBIND11_PLUGIN(core) {
  py::class_<LoDTensor, Tensor>(m, "LoDTensor")
      .def_buffer(
          [](Tensor &self) -> py::buffer_info { return CastToPyBuffer(self); })
-      .def(
-          "__init__",
-          [](LoDTensor &instance, const std::vector<std::vector<size_t>> &lod) {
-            LoD new_lod;
-            new_lod.reserve(lod.size());
-            std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
-            new (&instance) LoDTensor(new_lod);
-          })
+      .def("__init__",
+           [](LoDTensor &instance, const std::vector<std::vector<size_t>>
+                                       &recursive_sequence_lengths) {
+             LoD new_lod;
+             new_lod.reserve(recursive_sequence_lengths.size());
+             std::copy(recursive_sequence_lengths.begin(),
+                       recursive_sequence_lengths.end(),
+                       std::back_inserter(new_lod));
+             LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
+             PADDLE_ENFORCE(
+                 CheckLoD(new_offset_lod, -1),
+                 "the provided recursive_sequence_lengths info is invalid");
+             new (&instance) LoDTensor(new_offset_lod);
+           })
      .def("__init__", [](LoDTensor &instance) { new (&instance) LoDTensor(); })
      .def("set_lod",
           [](LoDTensor &self, const std::vector<std::vector<size_t>> &lod) {
+             // the input lod is offset-based level-of-detail info
+             LOG(WARNING)
+                 << "set_lod is deprecated and will be removed by 9.2018, "
+                    "please switch to set_recursive_sequence_lengths.";
             LoD new_lod;
             new_lod.reserve(lod.size());
             std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
+             PADDLE_ENFORCE(CheckLoD(new_lod, vectorize(self.dims()).front()),
+                            "the provided lod info is invalid");
             self.set_lod(new_lod);
           })
-      .def("lod", [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
-        auto lod = self.lod();
-        std::vector<std::vector<size_t>> new_lod;
-        new_lod.reserve(lod.size());
-        std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
-        return new_lod;
+      .def("set_recursive_sequence_lengths",
+           [](LoDTensor &self, const std::vector<std::vector<size_t>>
+                                   &recursive_sequence_lengths) {
+             // the input recursive_sequence_lengths is length-based
+             // level-of-detail info
+             LoD new_lod;
+             new_lod.reserve(recursive_sequence_lengths.size());
+             std::copy(recursive_sequence_lengths.begin(),
+                       recursive_sequence_lengths.end(),
+                       std::back_inserter(new_lod));
+             LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod);
+             PADDLE_ENFORCE(
+                 CheckLoD(new_offset_lod, vectorize(self.dims()).front()),
+                 "the provided recursive_sequence_lengths info is invalid");
+             self.set_lod(new_offset_lod);
+           })
+      .def("lod",
+           [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
+             // output the offset-based lod info
+             LOG(WARNING) << "lod is deprecated and will be removed by 9.2018, "
+                             "please switch to recursive_sequence_lengths.";
+             LoD lod = self.lod();
+             std::vector<std::vector<size_t>> new_lod;
+             new_lod.reserve(lod.size());
+             std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
+             return new_lod;
+           })
+      .def("recursive_sequence_lengths",
+           [](LoDTensor &self) -> std::vector<std::vector<size_t>> {
+             // output the length-based lod info
+             LoD lod = ConvertToLengthBasedLoD(self.lod());
+             std::vector<std::vector<size_t>> new_lod;
+             new_lod.reserve(lod.size());
+             std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod));
+             return new_lod;
+           })
+      .def("has_valid_recursive_sequence_lengths", [](LoDTensor &self) -> bool {
+        // Check that the lod info is valid and match the outermost
+        // dimension of the LoDTensor data
+        return CheckLoD(self.lod(), vectorize(self.dims()).front());
      });

  py::class_<SelectedRows>(m, "SelectedRows")

--- a/paddle/testing/paddle_gtest_main.cc
+++ b/paddle/testing/paddle_gtest_main.cc
@@ -31,6 +31,7 @@ int main(int argc, char** argv) {
      strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory"));
 #else
  new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn"));
+  new_argv.push_back(strdup("--undefok=use_mkldnn"));
 #endif
  int new_argc = static_cast<int>(new_argv.size());
  char** new_argv_address = new_argv.data();

--- a/python/paddle/fluid/data_feeder.py
+++ b/python/paddle/fluid/data_feeder.py
@@ -47,7 +47,7 @@ class DataToLoDTensorConverter(object):
        self.lod = []

        for i in six.range(lod_level):
-            self.lod.append([0])
+            self.lod.append([])

    def feed(self, data):
        self._feed_impl_(data, self.lod, self.lod_level)
@@ -56,8 +56,7 @@ class DataToLoDTensorConverter(object):
        if lod_level == 0:
            self.data.append(data)
        else:
-            cur_lod_len = len(data)
-            lod[0].append(lod[0][-1] + cur_lod_len)
+            lod[0].append(len(data))
            for each_data in data:
                self._feed_impl_(each_data, lod[1:], lod_level - 1)

@@ -66,7 +65,7 @@ class DataToLoDTensorConverter(object):
        t = core.LoDTensor()
        t.set(arr, self.place)
        if self.lod_level > 0:
-            t.set_lod(self.lod)
+            t.set_recursive_sequence_lengths(self.lod)
        return t



--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -15,11 +15,13 @@
 import framework
 import numpy as np
 import contextlib
+from framework import convert_np_dtype_to_dtype_
+from core import VarDesc

 __all__ = [
-    'Constant', 'Uniform', 'Normal', 'Xavier', 'force_init_on_cpu',
+    'Constant', 'Uniform', 'Normal', 'Xavier', 'Bilinear', 'force_init_on_cpu',
    'init_on_cpu', 'ConstantInitializer', 'UniformInitializer',
-    'NormalInitializer', 'XavierInitializer'
+    'NormalInitializer', 'XavierInitializer', 'BilinearInitializer'
 ]

 _force_init_on_cpu_ = False
@@ -422,6 +424,101 @@ class MSRAInitializer(Initializer):
        return op


+class BilinearInitializer(Initializer):
+    """Implements the bilinear initializer.
+
+    This initializer can be used in transposed convolution operator to
+    act as upsampling. Users can upsample a feature map with shape of
+    (B, C, H, W) by any integer factor. The usage is:
+  
+    >>>  factor = 2
+    >>>  w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
+    >>>                     initializer=Bilinear())
+    >>>  conv_up = fluid.layers.conv2d_transpose(
+    >>>      input,
+    >>>      num_filters=C,
+    >>>      output_size=None,
+    >>>      filter_size=2 * factor - factor % 2,
+    >>>      padding=ceil((factor - 1) / 2.),
+    >>>      stride=factor,
+    >>>      groups=C,
+    >>>      param_attr=w_attr,
+    >>>      bias_attr=False)
+
+
+    Where, `num_filters=C` and `groups=C` means this is channel-wise tranposed
+    convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`,
+    This initializer will set a (K, K) interpolation kernel for every channel
+    of the filter identically. The resulting shape of the output feature map
+    will be (B, C, factor * H, factor * W). Note that the learning rate and the
+    weight decay are set to 0 in order to keep coefficient values of bilinear
+    interpolation unchanged during training. 
+    """
+
+    def __init__(self):
+        """Constructor for BilinearInitializer.
+        """
+        super(BilinearInitializer, self).__init__()
+
+    def __call__(self, var, block):
+        """Add biliear initialization ops for a variable
+
+        Args:
+            var (Variable): Variable that needs to be initialized.
+            block (Block): The block in which initialization ops should
+                           be added.
+
+        Returns:
+            the initialization op
+
+        Raises:
+            ValueError: If type of `var` and `block` is not right.
+                        If the shape of `var` size is not 4 and
+                        var.shape[2] != var.shape[3].
+        """
+        if not isinstance(var, framework.Variable):
+            raise ValueError("var must be framework.Variable.")
+
+        if not isinstance(block, framework.Block):
+            raise ValueError("block must be framework.Block.")
+
+        shape = var.shape
+        if len(shape) != 4:
+            raise ValueError("the length of shape must be 4.")
+        if shape[2] != shape[3]:
+            raise ValueError("shape[2] must be equal to shape[3].")
+
+        weight = np.zeros(np.prod(var.shape), dtype='float32')
+        size = shape[3]
+        # factor
+        f = np.ceil(size / 2.)
+        # center
+        c = (2 * f - 1 - f % 2) / (2. * f)
+        for i in range(np.prod(shape)):
+            x = i % size
+            y = (i / size) % size
+            weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c))
+        weight = np.reshape(weight, shape)
+
+        if var.dtype == VarDesc.VarType.FP32:
+            value_name = "fp32_values"
+            values = [float(v) for v in weight.flat]
+        else:
+            raise ValueError("Unsupported dtype %s", input.dtype)
+        if np.prod(shape) > 1024 * 1024:
+            raise ValueError("The size of input is too big. ")
+        op = block.append_op(
+            type='assign_value',
+            outputs={'Out': [var]},
+            attrs={
+                'dtype': var.dtype,
+                'shape': list(shape),
+                value_name: values
+            })
+        var.op = op
+        return op
+
+
 # We short the class name, since users will use the initializer with the package
 # name. The sample code:
 #
@@ -436,3 +533,4 @@ Uniform = UniformInitializer
 Normal = NormalInitializer
 Xavier = XavierInitializer
 MSRA = MSRAInitializer
+Bilinear = BilinearInitializer
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -237,9 +237,56 @@ class BlockGuard(object):

 class ParallelDo(object):
    """
-    ParallelDo class.
+    ParallelDo is used to represent multi-thread data parallel processing.

-    ParallelDo class is used to create a ParallelDo.
+    Its vanilla implementation can be shown as the following (:math:`|` means
+    single thread and :math:`||||` means multiple threads)
+
+    .. code-block:: text
+
+      In the forward pass
+        |      Split input onto different devices
+        |      Copy parameter onto different devices
+        ||||   Compute forward pass in parallel
+        |      Merge output from different devices
+
+      In the backward pass
+        |      Split output@grad onto different devices
+        ||||   Compute backward pass in parallel
+        |      accumulate param@grad from different devices to the first device
+        |      Merge input@grad from different devices
+        |      Copy param@grad to the place of parallel_do_op
+
+    Examples:
+
+    .. code-block:: python
+
+      images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
+      label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+
+      # ParallelDo version & Single-thread version
+      if thread_num > 1:
+          places = fluid.layers.get_places(thread_num)
+          pd = fluid.layers.ParallelDo(places)
+          with pd.do():
+              images = pd.read_input(images)
+              label = pd.read_input(label)
+              predict = cnn_model(images)
+              cost = fluid.layers.cross_entropy(input=predict, label=label)
+
+              avg_cost = fluid.layers.mean(x=cost)
+              pd.write_output(avg_cost)
+
+          avg_cost = pd()
+          avg_cost = fluid.layers.mean(avg_cost)
+      else:
+          predict = cnn_model(images)
+          cost = fluid.layers.cross_entropy(input=predict, label=label)
+          avg_cost = fluid.layers.mean(x=cost)
+
+    .. warning::
+    
+       It will be soon deprecated, please use ParallelExecutor instead.
    """

    def __init__(self, places, use_nccl=False, name=None):
@@ -610,6 +657,29 @@ class WhileGuard(BlockGuard):


 class While(object):
+    """
+    while loop control flow.
+
+    Args:
+        cond (Variable): condition used to compare.
+        name (str): The name of this layer.
+
+    Examples:
+          .. code-block:: python
+
+            d0 = layers.data("d0", shape=[10], dtype='float32')
+            data_array = layers.array_write(x=d0, i=i)
+            array_len = layers.fill_constant(shape=[1],dtype='int64', value=3)
+
+            cond = layers.less_than(x=i, y=array_len)
+            while_op = layers.While(cond=cond)
+            with while_op.block():
+                d = layers.array_read(array=data_array, i=i)
+                i = layers.increment(x=i, in_place=True)
+                layers.array_write(result, i=i, array=d)
+                layers.less_than(x=i, y=array_len, cond=cond)
+    """
+
    BEFORE_WHILE_BLOCK = 0
    IN_WHILE_BLOCK = 1
    AFTER_WHILE_BLOCK = 2
@@ -679,8 +749,8 @@ def lod_rank_table(x, level=0):
        .. code-block:: text

            x is a LoDTensor:
-                x.lod = [[0,                2, 3],
-                         [0,             5, 6, 7]]
+                x.lod = [[2,                1],
+                         [5,             1, 1]]
                x.data = [a, b, c, d, e, f, g]

            1. set level to 0:

--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
@@ -97,7 +97,9 @@ def detection_output(loc,
        nms_eta(float): The parameter for adaptive NMS.

    Returns:
-        Variable: The detection outputs is a LoDTensor with shape [No, 6].
+        Variable: 
+        
+            The detection outputs is a LoDTensor with shape [No, 6].
            Each row has six values: [label, confidence, xmin, ymin, xmax, ymax].
            `No` is the total number of detections in this mini-batch. For each
            instance, the offsets in first dimension are called LoD, the offset
@@ -110,15 +112,15 @@ def detection_output(loc,
    Examples:
        .. code-block:: python

-        pb = layers.data(name='prior_box', shape=[10, 4],
+            pb = layers.data(name='prior_box', shape=[10, 4],
                         append_batch_size=False, dtype='float32')
-        pbv = layers.data(name='prior_box_var', shape=[10, 4],
+            pbv = layers.data(name='prior_box_var', shape=[10, 4],
                          append_batch_size=False, dtype='float32')
-        loc = layers.data(name='target_box', shape=[2, 21, 4],
+            loc = layers.data(name='target_box', shape=[2, 21, 4],
                          append_batch_size=False, dtype='float32')
-        scores = layers.data(name='scores', shape=[2, 21, 10],
+            scores = layers.data(name='scores', shape=[2, 21, 10],
                          append_batch_size=False, dtype='float32')
-        nmsed_outs = fluid.layers.detection_output(scores=scores,
+            nmsed_outs = fluid.layers.detection_output(scores=scores,
                                       loc=loc,
                                       prior_box=pb,
                                       prior_box_var=pbv)
@@ -296,8 +298,6 @@ def target_assign(input,
                  mismatch_value=None,
                  name=None):
    """
-    **Target assigner operator**
-
    This operator can be, for given the target bounding boxes or labels,
    to assign classification and regression targets to each prediction as well as
    weights to prediction. The weights is used to specify which prediction would
@@ -311,20 +311,24 @@ def target_assign(input,

    1. Assigning all outpts based on `match_indices`:

-    If id = match_indices[i][j] > 0,
+    .. code-block:: text
+
+        If id = match_indices[i][j] > 0,

-        out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K]
-        out_weight[i][j] = 1.
+            out[i][j][0 : K] = X[lod[i] + id][j % P][0 : K]
+            out_weight[i][j] = 1.

-    Otherwise,
+        Otherwise,

-        out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
-        out_weight[i][j] = 0.
+            out[j][j][0 : K] = {mismatch_value, mismatch_value, ...}
+            out_weight[i][j] = 0.

    2. Assigning out_weight based on `neg_indices` if `neg_indices` is provided:

    Assumed that the row offset for each instance in `neg_indices` is called neg_lod,
    for i-th instance and each `id` of neg_indices in this instance:
+    
+    .. code-block:: text

        out[i][id][0 : K] = {mismatch_value, mismatch_value, ...}
        out_weight[i][id] = 1.0
@@ -341,10 +345,23 @@ def target_assign(input,
       mismatch_value (float32): Fill this value to the mismatched location.

    Returns:
-       out (Variable): The output is a 3D Tensor with shape [N, P, K],
-           N and P is the same as they are in `neg_indices`, K is the
-           same as it in input of X. If `match_indices[i][j]`.
-       out_weight (Variable): The weight for output with the shape of [N, P, 1].
+        tuple: 
+        
+               A tuple(out, out_weight) is returned. out is a 3D Tensor with 
+               shape [N, P, K], N and P is the same as they are in 
+               `neg_indices`, K is the same as it in input of X. If 
+               `match_indices[i][j]`. out_weight is the weight for output with 
+               the shape of [N, P, 1].
+
+    Examples:
+
+        .. code-block:: python
+
+            matched_indices, matched_dist = fluid.layers.bipartite_match(iou)
+            gt = layers.data(
+                        name='gt', shape=[1, 1], dtype='int32', lod_level=1)
+            trg, trg_weight = layers.target_assign(
+                            gt, matched_indices, mismatch_value=0)
    """
    helper = LayerHelper('target_assign', **locals())
    out = helper.create_tmp_variable(dtype=input.dtype)

--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -378,16 +378,16 @@ def random_data_generator(low, high, shapes, lod_levels, for_parallel=True):
       Variable: A Reader Variable from which we can get random data.

    Examples:
-       .. code-block:: python

-         reader = fluid.layers.io.random_data_generator(
-                                          low=0.0,
-                                          high=1.0,
-                                          shapes=[(3,224,224), (1)],
-                                          lod_levels=[0, 0])
+        .. code-block:: python

-         # Via the reader, we can use 'read_file' layer to get data:
-         image, label = fluid.layers.io.read_file(reader)
+            reader = fluid.layers.random_data_generator(
+                                             low=0.0,
+                                             high=1.0,
+                                             shapes=[[3,224,224], [1]],
+                                             lod_levels=[0, 0])
+            # Via the reader, we can use 'read_file' layer to get data:
+            image, label = fluid.layers.read_file(reader)
    """
    dtypes = [core.VarDesc.VarType.FP32] * len(shapes)
    shape_concat = []

--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
@@ -25,10 +25,11 @@ import nn
 import ops
 import tensor
 from ..initializer import init_on_cpu
+from ..framework import default_main_program, Parameter

 __all__ = [
    'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
-    'polynomial_decay', 'piecewise_decay', 'noam_decay'
+    'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS'
 ]


@@ -212,15 +213,27 @@ def polynomial_decay(learning_rate,
 def piecewise_decay(boundaries, values):
    """Applies piecewise decay to the initial learning rate.

-    >>> boundaries = [10000, 20000]
-    >>> values = [1.0, 0.5, 0.1]
-    >>>
-    >>> if step < 10000:
-    >>>     learning_rate = 1.0
-    >>> elif 10000 <= step < 20000:
-    >>>     learning_rate = 0.5
-    >>> else:
-    >>>     learning_rate = 0.1
+      The algorithm can be described as the code below.
+
+      .. code-block:: python
+
+        boundaries = [10000, 20000]
+        values = [1.0, 0.5, 0.1]
+        if step < 10000:
+            learning_rate = 1.0
+        elif 10000 <= step < 20000:
+            learning_rate = 0.5
+        else:
+            learning_rate = 0.1
+    Args:
+        boundaries: A list of steps numbers.
+        values: A list of learning rate values that will be picked during
+            different step boundaries.
+
+    Returns:
+        The decayed learning rate.
+
+
    """

    if len(values) - len(boundaries) != 1:
@@ -252,3 +265,41 @@ def piecewise_decay(boundaries, values):
                tensor.assign(last_value_var, lr)

    return lr
+
+
+def append_LARS(params_grads, learning_rate, weight_decay):
+    """Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
+       each layer.
+
+    ```python
+        learning_rate *= local_gw_ratio * sqrt(sumsq(param))
+                        / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
+    ```
+
+    Args:
+        learning_rate: A learning rate Variable. This
+          is the global learning rate for LARS.
+        weight_decay: A Python `float` number.
+
+    Returns:
+        The decayed learning rate
+    """
+
+    def _balanced_weight(param_norm, grad_norm):
+        if weight_decay == 1.0:
+            return grad_norm + param_norm
+        else:
+            return grad_norm + weight_decay * param_norm
+
+    for param, grad in params_grads:
+        param_lr = param.optimize_attr['learning_rate']
+        param_norm = ops.sqrt(nn.reduce_sum(input=ops.square(param)))
+        grad_norm = ops.sqrt(nn.reduce_sum(input=ops.square(grad)))
+        if type(param_lr) == float and param_lr == 1.0:
+            decayed_lr = learning_rate * param_norm \
+                         / _balanced_weight(param_norm, grad_norm)
+        else:
+            decayed_lr = learning_rate * param_lr * param_norm \
+                         / _balanced_weight(param_norm, grad_norm)
+        # set back param local learning rate
+        param.optimize_attr['learning_rate'] = decayed_lr
--- a/python/paddle/fluid/layers/metric.py
+++ b/python/paddle/fluid/layers/metric.py
@@ -53,6 +53,43 @@ def accuracy(input, label, k=1, correct=None, total=None):


 def auc(input, label, curve='ROC', num_thresholds=200):
+    """
+    **Area Under the Curve (AUC) Layer**
+
+    This implementation computes the AUC according to forward output and label.
+    It is used very widely in binary classification evaluation. 
+
+    Note: If input label contains values other than 0 and 1, it will be cast 
+    to `bool`. Find the relevant definitions `here <https://en.wikipedia.org\
+    /wiki/Receiver_operating_characteristic#Area_under_the_curve>`_.
+
+    There are two types of possible curves:
+
+        1. ROC: Receiver operating characteristic;
+        2. PR: Precision Recall
+
+    Args:
+        input(Variable): A floating-point 2D Variable, values are in the range 
+                         [0, 1]. Each row is sorted in descending order. This 
+                         input should be the output of topk. Typically, this 
+                         Variable indicates the probability of each label.
+        label(Variable): A 2D int Variable indicating the label of the training 
+                         data. The height is batch size and width is always 1.
+        curve(str): Curve type, can be 'ROC' or 'PR'. Default 'ROC'.
+        num_thresholds(int): The number of thresholds to use when discretizing 
+                             the roc curve. Default 200.
+
+    Returns:
+        Variable: A scalar representing the current AUC.
+
+    Examples:
+        .. code-block:: python
+        
+            # network is a binary classification model and label the ground truth
+            prediction = network(image, is_infer=True)
+            auc_out=fluid.layers.auc(input=prediction, label=label)
+    """
+
    warnings.warn(
        "This interface not recommended, fluid.layers.auc compute the auc at every minibatch, \
        but can not aggregate them and get the pass AUC, because pass \

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -364,8 +364,7 @@ def dynamic_lstm(input,
        cell_activation(str): The activation for cell output. Choices = ["sigmoid",
                              "tanh", "relu", "identity"], default "tanh".
        candidate_activation(str): The activation for candidate hidden state.
-                              Choices = ["sigmoid", "tanh",
-                                  "relu", "identity"],
+                              Choices = ["sigmoid", "tanh", "relu", "identity"],
                              default "tanh".
        dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
        name(str|None): A name for this layer(optional). If set None, the layer
@@ -540,27 +539,31 @@ def dynamic_lstmp(input,
        cell_activation(str): The activation for cell output. Choices = ["sigmoid",
                              "tanh", "relu", "identity"], default "tanh".
        candidate_activation(str): The activation for candidate hidden state.
-                              Choices = ["sigmoid", "tanh",
-                                  "relu", "identity"],
+                              Choices = ["sigmoid", "tanh", "relu", "identity"],
                              default "tanh".
        proj_activation(str): The activation for projection output.
-                              Choices = ["sigmoid", "tanh",
-                                  "relu", "identity"],
+                              Choices = ["sigmoid", "tanh", "relu", "identity"],
                              default "tanh".
        dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
        name(str|None): A name for this layer(optional). If set None, the layer
                        will be named automatically.

    Returns:
-        tuple: The projection of hidden state, and cell state of LSTMP. The \
-               shape of projection is (T x P), for the cell state which is \
-               (T x D), and both LoD is the same with the `input`.
+        tuple: A tuple of two output variable: the projection of hidden state, \
+               and cell state of LSTMP. The shape of projection is (T x P), \
+               for the cell state which is (T x D), and both LoD is the same \
+               with the `input`.

    Examples:
+
        .. code-block:: python

+            dict_dim, emb_dim = 128, 64
+            data = fluid.layers.data(name='sequence', shape=[1],
+                                     dtype='int32', lod_level=1)
+            emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
            hidden_dim, proj_dim = 512, 256
-            fc_out = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
+            fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4,
                                     act=None, bias_attr=None)
            proj_out, _ = fluid.layers.dynamic_lstmp(input=fc_out,
                                                     size=hidden_dim * 4,
@@ -626,10 +629,10 @@ def dynamic_gru(input,
                candidate_activation='tanh',
                h_0=None):
    """
-    **Dynamic GRU Layer**
+    **Gated Recurrent Unit (GRU) Layer**

    Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
-    Sequence Modeling <https://arxiv.org/abs/1412.3555>`_
+    Sequence Modeling <https://arxiv.org/abs/1412.3555>`_ .

    The formula is as follows:

@@ -676,17 +679,25 @@ def dynamic_gru(input,
            Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
        candidate_activation(str): The activation for candidate hidden state.
            Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
-        h_0 (Variable): The hidden output of the first time step.
+        h_0 (Variable): This is initial hidden state. If not set, default is
+            zero. This is a tensor with shape (N x D), where N is the number of
+            total time steps of input mini-batch feature and D is the hidden
+            size.

    Returns:
        Variable: The hidden state of GRU. The shape is :math:`(T \\times D)`, \
-            and lod is the same with the input.
+            and sequence length is the same with the input.

    Examples:
+
        .. code-block:: python

+            dict_dim, emb_dim = 128, 64
+            data = fluid.layers.data(name='sequence', shape=[1],
+                                     dtype='int32', lod_level=1)
+            emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
            hidden_dim = 512
-            x = fluid.layers.fc(input=data, size=hidden_dim * 3)
+            x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
            hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
    """

@@ -924,13 +935,13 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):

    Drop or keep each element of `x` independently. Dropout is a regularization
    technique for reducing overfitting by preventing neuron co-adaption during
-    training. The dropout operator randomly set (according to the given dropout
+    training. The dropout operator randomly sets (according to the given dropout
    probability) the outputs of some units to zero, while others are remain
    unchanged.

    Args:
-        x (Variable): The input tensor.
-         dropout_prob (float): Probability of setting units to zero.
+        x (Variable): The input tensor variable.
+        dropout_prob (float): Probability of setting units to zero.
        is_test (bool): A flag indicating whether it is in test phrase or not.
        seed (int): A Python integer used to create random seeds. If this
                    parameter is set to None, a random seed is used.
@@ -940,13 +951,14 @@ def dropout(x, dropout_prob, is_test=False, seed=None, name=None):
                         will be named automatically.

    Returns:
-        Variable: A tensor variable.
+        Variable: A tensor variable is the shape with `x`.

    Examples:
+
        .. code-block:: python

-          x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
-          droped = fluid.layers.dropout(input=x, dropout_rate=0.5)
+            x = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
+            droped = fluid.layers.dropout(x, dropout_prob=0.5)
    """

    helper = LayerHelper('dropout', **locals())
@@ -1198,6 +1210,41 @@ def sequence_conv(input,


 def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
+    """
+    This function computes the softmax activation among all time-steps for each
+    sequence. The dimension of each time-step should be 1. Thus, the shape of
+    input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N` 
+    is the sum of the length of all sequences.
+
+    For i-th sequence in a mini-batch:
+
+    .. math::
+
+        Out(X[lod[i]:lod[i+1]], :) = \\frac{\exp(X[lod[i]:lod[i+1], :])}{\sum(\exp(X[lod[i]:lod[i+1], :]))}
+
+    For example, for a mini-batch of 3 sequences with variable-length,
+    each containing 2, 3, 2 time-steps, the lod of which is [0, 2, 5, 7],
+    then softmax will be computed among :math:`X[0:2, :]`, :math:`X[2:5, :]`,
+    :math:`X[5:7, :]`, and :math:`N` turns out to be 7.
+
+    Args:
+        input (Variable): The input variable which is a LoDTensor.
+        bias_attr (ParamAttr|None): attributes for bias
+        param_attr (ParamAttr|None): attributes for parameter
+        use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn \
+        library is installed. Default: True
+    
+    Returns:
+        Variable: output of sequence_softmax
+
+    Examples:
+
+        .. code-block:: python
+
+             x = fluid.layers.data(name='x', shape=[7, 1],
+                              dtype='float32', lod_level=1)
+             x_sequence_softmax = fluid.layers.sequence_softmax(input=x)
+    """
    helper = LayerHelper('sequence_softmax', **locals())
    dtype = helper.input_dtype()
    softmax_out = helper.create_tmp_variable(dtype)
@@ -1574,13 +1621,13 @@ def sequence_pool(input, pool_type):
    .. code-block:: text

       x is a 1-level LoDTensor:
-         x.lod = [[0, 2, 5, 7]]
+         x.lod = [[2, 3, 2]]
         x.data = [1, 3, 2, 4, 6, 5, 1]
         x.dims = [7, 1]

       then output is a Tensor:
         out.dim = [3, 1]
-         with condition len(x.lod[-1]) - 1 == out.dims[0]
+         with condition len(x.lod[-1]) == out.dims[0]

       for different pool_type:
         average: out.data = [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2
@@ -1639,13 +1686,13 @@ def sequence_first_step(input):
    .. code-block:: text

       x is a 1-level LoDTensor:
-         x.lod = [[0, 2, 5, 7]]
+         x.lod = [[2, 3, 2]]
         x.data = [1, 3, 2, 4, 6, 5, 1]
         x.dims = [7, 1]

       then output is a Tensor:
         out.dim = [3, 1]
-         with condition len(x.lod[-1]) - 1 == out.dims[0]
+         with condition len(x.lod[-1]) == out.dims[0]
         out.data = [1, 2, 5], where 1=first(1,3), 2=first(2,4,6), 5=first(5,1)

    Args:
@@ -1672,13 +1719,13 @@ def sequence_last_step(input):
    .. code-block:: text

       x is a 1-level LoDTensor:
-         x.lod = [[0, 2, 5, 7]]
+         x.lod = [[2, 3, 2]]
         x.data = [1, 3, 2, 4, 6, 5, 1]
         x.dims = [7, 1]

       then output is a Tensor:
         out.dim = [3, 1]
-         with condition len(x.lod[-1]) - 1 == out.dims[0]
+         with condition len(x.lod[-1]) == out.dims[0]
         out.data = [3, 6, 1], where 3=last(1,3), 6=last(2,4,6), 1=last(5,1)

    Args:
@@ -2451,18 +2498,18 @@ def sequence_expand(x, y, ref_level=-1, name=None):

        * Case 1
            x is a LoDTensor:
-                x.lod  = [[0,   2,        4]]
+                x.lod  = [[2,        2]]
                x.data = [[a], [b], [c], [d]]
                x.dims = [4, 1]

            y is a LoDTensor:
-                y.lod = [[0,    2,    4],
-                         [0, 3, 6, 7, 8]]
+                y.lod = [[2,    2],
+                         [3, 3, 1, 1]]

            ref_level: 0

            then output is a 1-level LoDTensor:
-                out.lod =  [[0,   2,        4,        6,        8]]
+                out.lod =  [[2,        2,        2,        2]]
                out.data = [[a], [b], [a], [b], [c], [d], [c], [d]]
                out.dims = [8, 1]

@@ -2472,7 +2519,7 @@ def sequence_expand(x, y, ref_level=-1, name=None):
                x.dims = [3, 1]

            y is a LoDTensor:
-                y.lod = [[0, 2, 2, 5]]
+                y.lod = [[2, 0, 3]]

            ref_level: -1

@@ -2731,23 +2778,24 @@ def reduce_sum(input, dim=None, keep_dim=False, name=None):

 def reduce_mean(input, dim=None, keep_dim=False, name=None):
    """
-    Computes the mean of tensor elements over the given dimension.
+    Computes the mean of the input tensor's elements along the given dimension.

    Args:
        input (Variable): The input variable which is a Tensor or LoDTensor.
-        dim (list|int|None): The dimensions along which the mean is computed. If
-            :attr:`None`, compute the mean over all elements of :attr:`input`
-            and return a Tensor variable with a single element, otherwise
+        dim (list|int|None): The dimension along which the mean is computed. If
+            `None`, compute the mean over all elements of :attr:`input`
+            and return a variable with a single element, otherwise it
            must be in the range :math:`[-rank(input), rank(input))`. If
-            :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`.
+            :math:`dim[i] < 0`, the dimension to reduce is 
+            :math:`rank(input) + dim[i]`.
        keep_dim (bool): Whether to reserve the reduced dimension in the
            output Tensor. The result tensor will have one fewer dimension
            than the :attr:`input` unless :attr:`keep_dim` is true.
-        name(str|None): A name for this layer(optional). If set None, the layer
+        name(str|None): A name for this layer(optional). If set `None`, the layer
                       will be named automatically.

    Returns:
-        Variable: The reduced Tensor variable.
+        Variable: The reduced mean Variable.

    Examples:
        .. code-block:: python
@@ -3020,32 +3068,33 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
    norm. For a 1-D tensor (`dim` is fixed to 0), this layer computes

    .. math::
-    y = \frac{x}{ \sqrt{\sum {x^2} + epsion }}
+
+        y = \\frac{x}{ \sqrt{\sum {x^2} + epsion }}

    For `x` with more dimensions, this layer independently normalizes each 1-D
    slice along dimension `axis`.

    Args:
        x(Variable|list): The input tensor to l2_normalize layer.
-        axis(int): The axis on which to apply normalization. If `axis < 0`,
+        axis(int): The axis on which to apply normalization. If `axis < 0`, \
            the dimension to normalization is rank(X) + axis. -1 is the
            last dimension.
-        epsilon(float): The epsilon value is used to avoid division by zero,
+        epsilon(float): The epsilon value is used to avoid division by zero, \
            the defalut value is 1e-10.
-        name(str|None): A name for this layer(optional). If set None, the layer
+        name(str|None): A name for this layer(optional). If set None, the layer \
            will be named automatically.

-
    Returns:
-        Variable: The output tensor variable.
+        Variable: The output tensor variable is the same shape with `x`.

    Examples:
+
        .. code-block:: python

-          data = fluid.layers.data(name="data",
-                                   shape=(3, 17, 13),
-                                   dtype="float32")
-          normed = fluid.layers.l2_normalize(x=data, axis=1)
+            data = fluid.layers.data(name="data",
+                                     shape=(3, 17, 13),
+                                     dtype="float32")
+            normed = fluid.layers.l2_normalize(x=data, axis=1)
    """

    if len(x.shape) == 1:
@@ -3324,7 +3373,7 @@ def ctc_greedy_decoder(input, blank, name=None):
                      [0.2, 0.2, 0.1, 0.5],
                      [0.5, 0.1, 0.3, 0.1]]

-        input.lod = [[0, 4, 8]]
+        input.lod = [[4, 4]]

        Then:

@@ -3332,7 +3381,7 @@ def ctc_greedy_decoder(input, blank, name=None):
                       [1],
                       [3]]

-        output.lod = [[0, 2, 3]]
+        output.lod = [[2, 1]]

    Args:

@@ -3349,7 +3398,7 @@ def ctc_greedy_decoder(input, blank, name=None):

    Returns:
        Variable: CTC greedy decode result. If all the sequences in result were
-        empty, the result LoDTensor will be [-1] with LoD [[0]] and dims [1, 1].
+        empty, the result LoDTensor will be [-1] with LoD [[]] and dims [1, 1].

    Examples:
        .. code-block:: python
@@ -3439,7 +3488,7 @@ def sequence_reshape(input, new_dim):
    .. code-block:: text

        x is a LoDTensor:
-            x.lod  = [[0, 2, 6]]
+            x.lod  = [[2, 4]]
            x.data = [[1, 2], [3, 4],
                      [5, 6], [7, 8], [9, 10], [11, 12]]
            x.dims = [6, 2]
@@ -3447,7 +3496,7 @@ def sequence_reshape(input, new_dim):
        set new_dim = 4

        then out is a LoDTensor:
-            out.lod  = [[0, 1, 3]]
+            out.lod  = [[1, 2]]
            out.data = [[1, 2, 3, 4],
                        [5, 6, 7, 8], [9, 10, 11, 12]]
            out.dims = [3, 4]
@@ -3499,13 +3548,41 @@ def nce(input,
        input (Variable): input variable.
        label (Variable): label.
        num_total_classes (int):${num_total_classes_comment}
-        sample_weight (int): ${sample_weight_comment}
+        sample_weight (Variable|None): A Variable of shape [batch_size, 1] 
+            storing a weight for each sample. The default weight for each 
+            sample is 1.0.
        param_attr (ParamAttr|None): attributes for parameter
        bias_attr (ParamAttr|None): attributes for bias
        num_neg_samples (int): ${num_neg_samples_comment}
    
    Returns:
-        Variable: output of nce layer.
+        Variable: The output nce loss.
+
+    Examples:
+        .. code-block:: python
+
+            window_size = 5
+            words = []
+            for i in xrange(window_size):
+                words.append(layers.data(
+                    name='word_{0}'.format(i), shape=[1], dtype='int64'))
+
+            dict_size = 10000
+            label_word = int(window_size / 2) + 1
+
+            embs = []
+            for i in xrange(window_size):
+                if i == label_word:
+                    continue
+
+                emb = layers.embedding(input=words[i], size=[dict_size, 32],
+                                       param_attr='emb.w', is_sparse=True)
+                embs.append(emb)
+
+            embs = layers.concat(input=embs, axis=1)
+            loss = layers.nce(input=embs, label=words[label_word],
+                          num_total_classes=dict_size, param_attr='nce.w',
+                          bias_attr='nce.b')
    """
    helper = LayerHelper('nce', **locals())
    assert isinstance(input, Variable)
@@ -3690,7 +3767,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):

            output.dims = {8, 9}

-            output.lod = [[0, 4, 8]]
+            output.lod = [[4, 4]]

        The simple usage is:

@@ -3863,31 +3940,30 @@ def softmax_with_cross_entropy(logits, label, soft_label=False):

 def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
    """
-    **Smooth L1 Loss Operator. **
-
-    This operator computes the smooth L1 loss for X and Y.
-    The operator takes the first dimension of X and Y as batch size.
+    This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`.
+    It takes the first dimension of :attr:`x` and :attr:`y` as batch size.
    For each instance, it computes the smooth L1 loss element by element first
-    and then sums all the losses. So the shape of Out is [batch_size, 1].
+    and then sums all the losses. So the shape of ouput Variable is 
+    [batch_size, 1].

    Args:
        x (Variable): A tensor with rank at least 2. The input value of smooth
            L1 loss op with shape [batch_size, dim1, ..., dimN].
        y (Variable): A tensor with rank at least 2. The target value of smooth
-            L1 loss op with same shape as x.
+            L1 loss op with same shape as :attr:`x`.
        inside_weight (Variable|None):  A tensor with rank at least 2. This
-            input is optional and should have same shape with x. If provided,
-            the result of (x - y) will be multiplied by this tensor element by
-            element.
+            input is optional and should have same shape with :attr:`x`. If 
+            provided, the result of (:attr:`x` - :attr:`y`) will be multiplied 
+            by this tensor element by element.
        outside_weight (Variable|None): A tensor with rank at least 2. This
-            input is optional and should have same shape with x. If provided,
-            the out smooth L1 loss will be multiplied by this tensor element
-            by element.
-        sigma (float|None): Hyper parameter of smooth L1 loss op. A float scalar
-            with default value 1.0.
+            input is optional and should have same shape with :attr:`x`. If 
+            provided, the out smooth L1 loss will be multiplied by this tensor 
+            element by element.
+        sigma (float|None): Hyper parameter of smooth L1 loss layer. A float 
+           scalar with default value 1.0.
+
    Returns:
-        Variable: A tensor with rank be 2. The output smooth L1 loss with
-            shape [batch_size, 1].
+        Variable: The output smooth L1 loss with shape [batch_size, 1].

    Examples:
        .. code-block:: python
@@ -3898,6 +3974,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
            fc = fluid.layers.fc(input=data, size=100)
            out = fluid.layers.smooth_l1(x=fc, y=label)
    """
+
    helper = LayerHelper('smooth_l1_loss', **locals())
    diff = helper.create_tmp_variable(dtype=x.dtype)
    loss = helper.create_tmp_variable(dtype=x.dtype)
@@ -3917,32 +3994,20 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):

 def one_hot(input, depth):
    """
-    One Hot Operator. This operator creates the one-hot representations for input
-    index values. The following example will help to explain the function of this
-    operator.
+    This layer creates the one-hot representations for input indices.

    Args:
-        input(variable):  A Tensor/LodTensor of indices, last dimension must be 1.
-        depth(scalar): an interger defining the depth of the one hot dimension.
+        input(Variable): Input indices, last dimension must be 1.
+        depth(scalar): An interger defining the depth of the one-hot dimension.

    Returns:
-         The one-hot tensor or LodTensor, same as input.
+        Variable: The one-hot representations of input.

    Examples:
        .. code-block:: python
-
-        X is a LoDTensor:
-          X.lod = [[0, 1, 4]]
-          X.shape = [4, 1]
-          X.data = [[1], [1], [3], [0]]
-        set depth = 4
-        Out is a LoDTensor:
-          Out.lod = [[0, 1, 4]]
-          Out.shape = [4, 4]
-          Out.data = [[0., 1., 0., 0.],
-                      [0., 1., 0., 0.],
-                      [0., 0., 0., 1.],
-                      [1., 0., 0., 0.]]
+        
+            label = layers.data(name="label", shape=[1], dtype="float32")
+            one_hot_label = layers.one_hot(input=label, depth=10)
    """
    helper = LayerHelper("one_hot", **locals())
    one_hot_out = helper.create_tmp_variable(dtype='float32')
@@ -4086,73 +4151,74 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):

 def lod_reset(x, y=None, target_lod=None):
    """
-    LoD Reset Operator. Set LoD of **x** to a new one specified by **y** or
-    **target_lod**. When **y** provided, **y.lod** would be considered as target
-    LoD first, otherwise **y.data** would be considered as target LoD. If **y**
-    is not provided, target LoD should be specified by **target_lod**.
-    If target LoD is specified by **Y.data** or **target_lod**, only one level
-    LoD is supported.
+    Set LoD of :attr:`x` to a new one specified by :attr:`y` or
+    :attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be 
+    considered as target LoD first, otherwise :attr:`y.data` would be 
+    considered as target LoD. If :attr:`y` is not provided, target LoD should 
+    be specified by :attr:`target_lod`. If target LoD is specified by 
+    :attr:`Y.data` or :attr:`target_lod`, only one level LoD is supported.

    .. code-block:: text

        * Example 1:

            Given a 1-level LoDTensor x:
-                x.lod =  [[ 0,     2,                   5      6 ]]
+                x.lod =  [[ 2,           3,                   1 ]]
                x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
                x.dims = [6, 1]

-            target_lod: [0, 4, 6]
+            target_lod: [4, 2]

            then we get a 1-level LoDTensor:
-                out.lod =  [[ 0,                   4,            6 ]]
+                out.lod =  [[4,                          2]]
                out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
                out.dims = [6, 1]

        * Example 2:

            Given a 1-level LoDTensor x:
-                x.lod =  [[ 0,     2,                   5      6 ]]
+                x.lod =  [[2,            3,                   1]]
                x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
                x.dims = [6, 1]

            y is a Tensor:
-                y.data = [[0, 2, 6]]
+                y.data = [[2, 4]]
                y.dims = [1, 3]

            then we get a 1-level LoDTensor:
-                out.lod =  [[ 0,     2,                          6 ]]
+                out.lod =  [[2,            4]]
                out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
                out.dims = [6, 1]

        * Example 3:

            Given a 1-level LoDTensor x:
-                x.lod =  [[ 0,      2,                   5     6 ]]
+                x.lod =  [[2,            3,                   1]]
                x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
                x.dims = [6, 1]

            y is a 2-level LoDTensor:
-                y.lod =  [[0, 2, 4], [0, 2, 5, 6]]
+                y.lod =  [[2, 2], [2, 2, 1, 1]]
                y.data = [[1.1], [2.1], [3.1], [4.1], [5.1], [6.1]]
                y.dims = [6, 1]

            then we get a 2-level LoDTensor:
-                out.lod =  [[0, 2, 4], [0, 2, 5, 6]]
+                out.lod =  [[2, 2], [2, 2, 1, 1]]
                out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
                out.dims = [6, 1]

    Args:
        x (Variable): Input variable which could be a Tensor or LodTensor.
-        y (Variable|None): If provided, output's LoD would be derived from y.
+        y (Variable|None): If provided, output's LoD would be derived 
+                           from :attr:`y`.
        target_lod (list|tuple|None): One level LoD which should be considered
-                                      as target LoD when y not provided.
+                                      as target LoD when :attr:`y` not provided.

    Returns:
-        Variable: Output variable with LoD specified by this operator.
+        Variable: Output variable with LoD specified by this layer.

    Raises:
-        ValueError: If y and target_lod are both None.
+        ValueError: If :attr:`y` and :attr:`target_lod` are both None.

    Examples:
        .. code-block:: python
@@ -4659,10 +4725,6 @@ def random_crop(x, shape, seed=None):
    """
    ${comment}

-    Examples:
-        >>> img = fluid.layers.data("img", [3, 256, 256])
-        >>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
-
    Args:
        x(${x_type}): ${x_comment}
        shape(${shape_type}): ${shape_comment}
@@ -4671,7 +4733,10 @@ def random_crop(x, shape, seed=None):

    Returns:
        ${out_comment}
-
+    
+    Examples:
+        >>> img = fluid.layers.data("img", [3, 256, 256])
+        >>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
    """
    helper = LayerHelper("random_crop", **locals())
    dtype = helper.input_dtype()

--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -129,8 +129,21 @@ def create_global_var(shape,

 def cast(x, dtype):
    """
-    This function takes in the input with input_dtype
-    and casts it to the output_dtype as the output.
+    This layer takes in the Variable :attr:`x` with :attr:`x.dtype` and casts 
+    it to the output with :attr:`dtype`.
+
+    Args:
+        x (Variable): The input Variable for casting.
+        dtype(np.dtype|core.VarDesc.VarType|str): Data type of the output Variable.
+
+    Returns:
+        Variable: The output Variable after casting.
+
+    Examples:
+        .. code-block:: python
+             
+            data = fluid.layers.data(name='x', shape=[13], dtype='float32')
+            result = fluid.layers.cast(x=data, dtype='float64')
    """
    helper = LayerHelper('cast', **locals())
    out = helper.create_tmp_variable(dtype=dtype)
@@ -515,11 +528,27 @@ def save_combine(x, file_path, overwrite=True):
    Saves a list of variables into a single file.

    Args:
-        x(list): A list of Tensor/LoDTensor to be saved together in a single file.
+        x(list): A list of Tensor/LoDTensor variables to be saved together in
+                 a single file.
        file_path(str): The file path where variables will be saved.
-        overwrite(bool): Whether or not cover the given file when it has already 
+        overwrite(bool): Whether or not cover the given file when it has already
            existed. If it's set 'False' and the file is existed, a runtime 
            error will be thrown. 
+
+    Returns:
+        There is no return value.
+
+    Examples:
+
+        .. code-block:: python
+
+            v1 = fluid.layers.data(name="data",
+                                   shape=(4, 6),
+                                   dtype="float32")
+            v2 = fluid.layers.data(name="data",
+                                   shape=(6, 8, 4),
+                                   dtype="float32")
+            normed = fluid.layers.save_combine([v1, v2], file_path="output")
    """
    helper = LayerHelper("save_combine", **locals())
    helper.append_op(

--- a/python/paddle/fluid/lod_tensor.py
+++ b/python/paddle/fluid/lod_tensor.py
@@ -18,80 +18,6 @@ import numpy as np
 __all__ = ['create_lod_tensor', 'create_random_int_lodtensor']


-def _validate_lod(lod, tensor_height=-1):
-    """Check whether the input length-based lod info is valid.
-
-    There are several things to check:
-    1. lod should be a list of lists. Empty list is fine.
-    2. The length of each sublist (a lod level) should be at least one.
-    3. Each element in each lod level should be an integer greater than 0.
-    4. The sum of one lod level should be equal to the length of the next lod level.
-    5. The sum of the last lod level should be equal to the tensor height. 
-       Bypass this check if user does not provide tensor_height as input.
-
-    Args:
-        lod: the length-based lod info, e.g., [[2, 3], [2, 1, 2, 3, 4]].
-        tensor_height: the outermost dimension of the tensor with which the input 
-            lod is associated with. 
-
-    Returns:
-        A boolean indicating whether the input lod is valid or not.
-    """
-    assert isinstance(lod, list), "lod should be a list"
-    # Empty lod is fine
-    if len(lod) == 0:
-        return True
-
-    lod_sum = []
-    for level in lod:
-        assert isinstance(level, list), "each item in lod should be a list"
-        # Each level of lod should have at least one length info
-        if len(level) < 1:
-            return False
-        level_sum = 0
-        for lod_len in level:
-            # Each length in a level should be > 0
-            if lod_len <= 0:
-                return False
-            level_sum += lod_len
-        lod_sum.append(level_sum)
-
-    for idx, val in enumerate(lod_sum[:-1]):
-        # Each level's sum should be equal to 
-        # the number of items in the next level
-        if val != len(lod[idx + 1]):
-            return False
-
-    if tensor_height == -1:
-        return True
-    else:
-        # Last level's sum should be equal to the tensor height
-        return lod_sum[-1] == tensor_height
-
-
-def _convert_lod(lod):
-    """Convert a length-based lod to a offset-based lod.
-
-    If the length-based lod is [[2, 3], [2, 1, 2, 3, 4]],
-    then the offset-based lod is [[0, 2, 5], [0, 2, 3, 5, 8, 12]].
-
-    Args:
-        lod: a length-based lod info. 
-
-    Returns:
-        A list of lists as the offset-based lod converted to from the input lod.
-    """
-    new_lod = []
-    for level in lod:
-        cur_len = 0
-        new_level = [cur_len]
-        for lod_len in level:
-            cur_len += lod_len
-            new_level.append(cur_len)
-        new_lod.append(new_level)
-    return new_lod
-
-
 def create_lod_tensor(data, lod, place):
    """Create a lod tensor from a numpy array, a list, or an existing lod tensor.

@@ -139,11 +65,11 @@ def create_lod_tensor(data, lod, place):
        flattened_data = flattened_data.reshape([len(flattened_data), 1])
        return create_lod_tensor(flattened_data, lod, place)
    elif isinstance(data, np.ndarray):
-        assert _validate_lod(lod,
-                             data.shape[0]), "the provided lod info is invalid"
        tensor = core.LoDTensor()
        tensor.set(data, place)
-        tensor.set_lod(_convert_lod(lod))
+        tensor.set_recursive_sequence_lengths(lod)
+        assert tensor.has_valid_recursive_sequence_lengths(
+        ), "the provided lod info is invalid"
        return tensor
    else:
        raise TypeError(
@@ -181,9 +107,8 @@ def create_random_int_lodtensor(lod, base_shape, place, low, high):
        A fluid LoDTensor object with tensor data and lod info. 
    """
    assert isinstance(base_shape, list), "base_shape should be a list"
-    converted_lod = _convert_lod(lod)
    # append the total number of basic elements to the front of its shape
-    overall_shape = [converted_lod[-1][-1]] + base_shape
+    overall_shape = [sum(lod[-1])] + base_shape
    # the range of integer data elements is [low, high]    
    data = np.random.random_integers(low, high, overall_shape).astype("int64")
    return create_lod_tensor(data, lod, place)
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import re
 from collections import defaultdict
-from paddle.fluid.framework import Program
+from paddle.fluid.framework import Program, Variable
 import framework
 import layers
 from backward import append_backward
@@ -41,7 +41,10 @@ class Optimizer(object):
    but need to use one of it's implementation.
    """

-    def __init__(self, learning_rate, regularization=None):
+    def __init__(self,
+                 learning_rate,
+                 regularization=None,
+                 LARS_weight_decay=0.0):
        if not isinstance(learning_rate, float) and \
                not isinstance(learning_rate, framework.Variable):
            raise TypeError("learning rate should be float or Variable")
@@ -61,6 +64,7 @@ class Optimizer(object):
        # {accum_name : { paramter_name : accumulator_for_parameter, ...}, ...}
        self._accumulators = defaultdict(lambda: dict())
        self.helper = None
+        self._LARS_weight_decay = LARS_weight_decay

    def _create_global_learning_rate(self):
        lr = self.global_learning_rate()
@@ -100,10 +104,15 @@ class Optimizer(object):
        # create learning rate variable for every parameter
        param = param_and_grad[0]
        param_lr = param.optimize_attr['learning_rate']
-        if param_lr == 1.0:
-            return self.global_learning_rate()
+        if type(param_lr) == Variable:
+            # param learning rate has been updated (LARS)
+            print("returns updated param lr ", param_lr)
+            return param_lr
        else:
-            return self.global_learning_rate() * param_lr
+            if param_lr == 1.0:
+                return self.global_learning_rate()
+            else:
+                return self.global_learning_rate() * param_lr

    def _create_accumulators(self, block, parameters):
        """Create all accumulators needed by the parameters
@@ -210,6 +219,10 @@ class Optimizer(object):
            self._create_accumulators(loss.block,
                                      [p[0] for p in parameters_and_grads])
            self._create_global_learning_rate()
+            if self._LARS_weight_decay > 0.0:
+                layers.append_LARS(parameters_and_grads,
+                                   self.global_learning_rate(),
+                                   self._LARS_weight_decay)

            optimize_ops = []
            for param_and_grad in parameters_and_grads:

--- a/python/paddle/fluid/tests/book/test_recognize_digits.py
+++ b/python/paddle/fluid/tests/book/test_recognize_digits.py
@@ -94,7 +94,7 @@ def train(nn_type,

    test_program = fluid.default_main_program().clone(for_test=True)

-    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
+    optimizer = fluid.optimizer.Adam(learning_rate=0.001, LARS_weight_decay=0.3)
    optimizer.minimize(avg_loss)

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()

--- a/python/paddle/fluid/tests/test_data_feeder.py
+++ b/python/paddle/fluid/tests/test_data_feeder.py
@@ -22,12 +22,11 @@ class TestDataFeeder(unittest.TestCase):
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
        result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
-        print(result)

        self.assertEqual(result['image'].shape(), [2, 1, 28, 28])
        self.assertEqual(result['label'].shape(), [2, 1])
-        self.assertEqual(result['image'].lod(), [])
-        self.assertEqual(result['label'].lod(), [])
+        self.assertEqual(result['image'].recursive_sequence_lengths(), [])
+        self.assertEqual(result['label'].recursive_sequence_lengths(), [])

    def test_lod_level_1_converter(self):
        # lod_level = 1
@@ -42,12 +41,12 @@ class TestDataFeeder(unittest.TestCase):
        # label = [1] * len(data)
        result = feeder.feed(
            [([1, 2, 3], [1]), ([4, 5], [1]), ([6, 7, 8, 9], [1])])
-        print(result)

        self.assertEqual(result['sentences'].shape(), [9, 1])
        self.assertEqual(result['label'].shape(), [3, 1])
-        self.assertEqual(result['sentences'].lod(), [[0, 3, 5, 9]])
-        self.assertEqual(result['label'].lod(), [])
+        self.assertEqual(result['sentences'].recursive_sequence_lengths(),
+                         [[3, 2, 4]])
+        self.assertEqual(result['label'].recursive_sequence_lengths(), [])

    def test_lod_level_2_converter(self):
        # lod_level = 2
@@ -62,12 +61,12 @@ class TestDataFeeder(unittest.TestCase):
        # label = [1] * len(data)
        result = feeder.feed(
            [([[1, 2, 3], [4, 5]], [1]), ([[6, 7, 8, 9]], [1])])
-        print(result)

        self.assertEqual(result['paragraphs'].shape(), [9, 1])
        self.assertEqual(result['label'].shape(), [2, 1])
-        self.assertEqual(result['paragraphs'].lod(), [[0, 2, 3], [0, 3, 5, 9]])
-        self.assertEqual(result['label'].lod(), [])
+        self.assertEqual(result['paragraphs'].recursive_sequence_lengths(),
+                         [[2, 1], [3, 2, 4]])
+        self.assertEqual(result['label'].recursive_sequence_lengths(), [])


 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/test_lod_tensor.py
+++ b/python/paddle/fluid/tests/test_lod_tensor.py
@@ -13,44 +13,41 @@
 # limitations under the License.

 import paddle.fluid as fluid
-from paddle.fluid.lod_tensor import create_lod_tensor, create_random_int_lodtensor, _validate_lod, _convert_lod
-import numpy
+from paddle.fluid.lod_tensor import create_lod_tensor, create_random_int_lodtensor
+import numpy as np
 import unittest


 class TestLoDTensor(unittest.TestCase):
-    def test_validate_lod(self):
-        lod = (1, 2, 1)
-        self.assertRaises(AssertionError, _validate_lod, lod, -1)
-        lod = [[1, 2], (2, 3)]
-        self.assertRaises(AssertionError, _validate_lod, lod, -1)
-        lod = [1, 2, 3]
-        self.assertRaises(AssertionError, _validate_lod, lod, -1)
-
+    def test_pybind_lod(self):
+        tensor = fluid.LoDTensor()
        lod = []
-        self.assertTrue(_validate_lod(lod, -1))
+        tensor.set_recursive_sequence_lengths(lod)
        lod = [[], [1], [3]]
-        self.assertFalse(_validate_lod(lod, -1))
-        lod = [[0], [-1], [3]]
-        self.assertFalse(_validate_lod(lod, -1))
+        self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, lod)
+        lod = [[0], [2], [3]]
+        self.assertRaises(Exception, tensor.set_recursive_sequence_lengths, lod)

-        # Each level's sum should be equal to the number of items in the next level
-        # Moreover, last level's sum should be equal to the tensor height
-        lod = [[2, 3], [1, 3, 1, 2, 1]]
-        self.assertTrue(_validate_lod(lod, tensor_height=8))
-        lod = [[1, 3], [2, 1, 3]]
-        self.assertFalse(_validate_lod(lod, tensor_height=6))
-        lod = [[1, 3], [2, 1, 3, 4]]
-        self.assertFalse(_validate_lod(lod, tensor_height=5))
-
-    def test_convert_lod(self):
        lod = [[1, 2, 3]]
-        converted_lod = [[0, 1, 3, 6]]
-        self.assertEqual(_convert_lod(lod), converted_lod)
+        tensor.set_recursive_sequence_lengths(lod)
+        self.assertEqual(tensor.recursive_sequence_lengths(), lod)
+        tensor.set(np.random.random([6, 1]), fluid.CPUPlace())
+        self.assertTrue(tensor.has_valid_recursive_sequence_lengths())
+        tensor.set(np.random.random([9, 1]), fluid.CPUPlace())
+        self.assertFalse(tensor.has_valid_recursive_sequence_lengths())

+        # Each level's sum should be equal to the number of items in the next level
+        # Moreover, last level's sum should be equal to the tensor height
+        lod = [[2, 3], [1, 3, 1, 2, 2]]
+        tensor.set_recursive_sequence_lengths(lod)
+        self.assertEqual(tensor.recursive_sequence_lengths(), lod)
+        tensor.set(np.random.random([8, 1]), fluid.CPUPlace())
+        self.assertFalse(tensor.has_valid_recursive_sequence_lengths())
        lod = [[2, 3], [1, 3, 1, 2, 1]]
-        converted_lod = [[0, 2, 5], [0, 1, 4, 5, 7, 8]]
-        self.assertEqual(_convert_lod(lod), converted_lod)
+        tensor.set_recursive_sequence_lengths(lod)
+        self.assertTrue(tensor.has_valid_recursive_sequence_lengths())
+        tensor.set(np.random.random([9, 1]), fluid.CPUPlace())
+        self.assertFalse(tensor.has_valid_recursive_sequence_lengths())

    def test_create_lod_tensor(self):
        # Create LoDTensor from a list
@@ -60,19 +57,19 @@ class TestLoDTensor(unittest.TestCase):
        self.assertRaises(AssertionError, create_lod_tensor, data, wrong_lod,
                          fluid.CPUPlace())
        tensor = create_lod_tensor(data, correct_lod, fluid.CPUPlace())
-        self.assertEqual(tensor.lod(), [[0, 3, 5]])
+        self.assertEqual(tensor.recursive_sequence_lengths(), correct_lod)

        # Create LoDTensor from numpy array
-        data = numpy.random.random([10, 1])
+        data = np.random.random([10, 1])
        lod = [[2, 1], [3, 3, 4]]
        tensor = create_lod_tensor(data, lod, fluid.CPUPlace())
-        self.assertEqual(tensor.lod(), [[0, 2, 3], [0, 3, 6, 10]])
+        self.assertEqual(tensor.recursive_sequence_lengths(), lod)

        # Create LoDTensor from another LoDTensor, they are differnt instances
        new_lod = [[2, 2, 1], [1, 2, 2, 3, 2]]
        new_tensor = create_lod_tensor(tensor, new_lod, fluid.CPUPlace())
-        self.assertEqual(tensor.lod(), [[0, 2, 3], [0, 3, 6, 10]])
-        self.assertEqual(new_tensor.lod(), [[0, 2, 4, 5], [0, 1, 3, 5, 8, 10]])
+        self.assertEqual(tensor.recursive_sequence_lengths(), lod)
+        self.assertEqual(new_tensor.recursive_sequence_lengths(), new_lod)

    def test_create_random_int_lodtensor(self):
        # The shape of a word, commonly used in speech and NLP problem, is [1]
@@ -83,7 +80,7 @@ class TestLoDTensor(unittest.TestCase):
        high = dict_size - 1
        tensor = create_random_int_lodtensor(lod, shape,
                                             fluid.CPUPlace(), low, high)
-        self.assertEqual(tensor.lod(), [[0, 2, 5, 10]])
+        self.assertEqual(tensor.recursive_sequence_lengths(), lod)
        self.assertEqual(tensor.shape(), [10, 1])



--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@@ -162,7 +162,7 @@ class OpTest(unittest.TestCase):
                    tensor = core.LoDTensor()
                    if isinstance(np_value, tuple):
                        tensor.set(np_value[0], place)
-                        tensor.set_lod(np_value[1])
+                        tensor.set_recursive_sequence_lengths(np_value[1])
                    else:
                        tensor.set(np_value, place)
                    feed_map[name] = tensor
@@ -170,7 +170,8 @@ class OpTest(unittest.TestCase):
                tensor = core.LoDTensor()
                if isinstance(self.inputs[var_name], tuple):
                    tensor.set(self.inputs[var_name][0], place)
-                    tensor.set_lod(self.inputs[var_name][1])
+                    tensor.set_recursive_sequence_lengths(self.inputs[var_name][
+                        1])
                else:
                    tensor.set(self.inputs[var_name], place)
                feed_map[var_name] = tensor
@@ -293,7 +294,8 @@ class OpTest(unittest.TestCase):
                        str(place))
                    if isinstance(expect, tuple):
                        self.assertListEqual(
-                            actual.lod(), expect[1], "Output (" + sub_out_name +
+                            actual.recursive_sequence_lengths(), expect[1],
+                            "Output (" + sub_out_name +
                            ") has different lod at " + str(place))
            else:
                idx = find_actual(out_name, fetch_list)
@@ -307,8 +309,8 @@ class OpTest(unittest.TestCase):
                    "Output (" + out_name + ") has diff at " + str(place) +
                    str(actual_t) + "\n" + str(expect_t))
                if isinstance(expect, tuple):
-                    self.assertListEqual(actual.lod(), expect[1],
-                                         "Output (" + out_name +
+                    self.assertListEqual(actual.recursive_sequence_lengths(),
+                                         expect[1], "Output (" + out_name +
                                         ") has different lod at " + str(place))

    def _get_places(self):
@@ -408,7 +410,7 @@ class OpTest(unittest.TestCase):
        tensor = core.LoDTensor()
        tensor.set(np_value, place)
        if lod is not None:
-            tensor.set_lod(lod)
+            tensor.set_recursive_sequence_lengths(lod)
        return tensor

    @staticmethod

--- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
@@ -128,7 +128,7 @@ def create_or_get_tensor(scope, var_name, var, place):
    tensor = scope.var(var_name).get_tensor()
    if var is not None:
        assert isinstance(var, np.ndarray)
-        tensor.set_lod([[]])
+        tensor.set_recursive_sequence_lengths([])
        tensor.set_dims(var.shape)
        tensor.set(var, place)
    return tensor

--- a/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py
+++ b/python/paddle/fluid/tests/unittests/test_beam_search_decode_op.py
@@ -26,36 +26,36 @@ class TestBeamSearchDecodeOp(unittest.TestCase):

    def append_lod_tensor(self, tensor_array, lod, data):
        lod_tensor = core.LoDTensor()
-        lod_tensor.set_lod(lod)
+        lod_tensor.set_recursive_sequence_lengths(lod)
        lod_tensor.set(data, self.place)
        tensor_array.append(lod_tensor)

    def test_get_set(self):
        ids = self.scope.var("ids").get_lod_tensor_array()
        self.append_lod_tensor(
-            ids, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]],
+            ids, [[3, 3], [1, 1, 1, 1, 1, 1]],
            np.array(
                [1, 2, 3, 4, 5, 6], dtype="int64"))
        self.append_lod_tensor(
-            ids, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]],
+            ids, [[3, 3], [1, 0, 2, 2, 0, 1]],
            np.array(
                [0, 1, 2, 3, 4, 5], dtype="int64"))
        self.append_lod_tensor(
-            ids, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]],
+            ids, [[3, 3], [0, 1, 1, 1, 1, 1]],
            np.array(
                [0, 1, 2, 3, 4], dtype="int64"))

        scores = self.scope.var("scores").get_lod_tensor_array()
        self.append_lod_tensor(
-            scores, [[0, 3, 6], [0, 1, 2, 3, 4, 5, 6]],
+            scores, [[3, 3], [1, 1, 1, 1, 1, 1]],
            np.array(
                [1, 2, 3, 4, 5, 6], dtype="float64"))
        self.append_lod_tensor(
-            scores, [[0, 3, 6], [0, 1, 1, 3, 5, 5, 6]],
+            scores, [[3, 3], [1, 0, 2, 2, 0, 1]],
            np.array(
                [0, 1, 2, 3, 4, 5], dtype="float64"))
        self.append_lod_tensor(
-            scores, [[0, 3, 6], [0, 0, 1, 2, 3, 4, 5]],
+            scores, [[3, 3], [0, 1, 1, 1, 1, 1]],
            np.array(
                [0, 1, 2, 3, 4], dtype="float64"))

@@ -73,9 +73,11 @@ class TestBeamSearchDecodeOp(unittest.TestCase):

        beam_search_decode_op.run(self.scope, self.place)

-        expected_lod = [[0, 4, 8], [0, 1, 3, 6, 9, 10, 13, 16, 19]]
-        self.assertEqual(sentence_ids.lod(), expected_lod)
-        self.assertEqual(sentence_scores.lod(), expected_lod)
+        expected_lod = [[4, 4], [1, 2, 3, 3, 1, 3, 3, 3]]
+        self.assertEqual(sentence_ids.recursive_sequence_lengths(),
+                         expected_lod)
+        self.assertEqual(sentence_scores.recursive_sequence_lengths(),
+                         expected_lod)

        expected_data = np.array(
            [2, 1, 0, 3, 1, 0, 3, 2, 1, 5, 4, 3, 2, 4, 4, 3, 6, 5, 4], "int64")

--- a/python/paddle/fluid/tests/unittests/test_beam_search_op.py
+++ b/python/paddle/fluid/tests/unittests/test_beam_search_op.py
@@ -48,18 +48,18 @@ class BeamSearchOpTester(unittest.TestCase):
        op.run(self.scope, core.CPUPlace())
        selected_ids = self.scope.find_var("selected_ids").get_tensor()
        print 'selected_ids', np.array(selected_ids)
-        print 'lod', selected_ids.lod()
+        print 'lod', selected_ids.recursive_sequence_lengths()

    def _create_pre_ids(self):
        np_data = np.array([[1, 2, 3, 4]], dtype='int64')
        tensor = create_tensor(self.scope, "pre_ids", np_data)

    def _create_ids(self):
-        self.lod = [[0, 1, 4], [0, 1, 2, 3, 4]]
+        self.lod = [[1, 3], [1, 1, 1, 1]]
        np_data = np.array(
            [[4, 2, 5], [2, 1, 3], [3, 5, 2], [8, 2, 1]], dtype='int64')
        tensor = create_tensor(self.scope, "ids", np_data)
-        tensor.set_lod(self.lod)
+        tensor.set_recursive_sequence_lengths(self.lod)

    def _create_scores(self):
        np_data = np.array(
@@ -71,7 +71,7 @@ class BeamSearchOpTester(unittest.TestCase):
            ],
            dtype='float32')
        tensor = create_tensor(self.scope, "scores", np_data)
-        tensor.set_lod(self.lod)
+        tensor.set_recursive_sequence_lengths(self.lod)


 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py
+++ b/python/paddle/fluid/tests/unittests/test_bipartite_match_op.py
@@ -65,23 +65,25 @@ def batch_bipartite_match(distance, lod, match_type=None, dist_threshold=None):
        distance (numpy.array) : The distance of two entries with shape [M, N].
        lod (list of int): The offsets of each input in this batch.
    """
-    n = len(lod) - 1
+    n = len(lod)
    m = distance.shape[1]
    match_indices = -1 * np.ones((n, m), dtype=np.int)
    match_dist = np.zeros((n, m), dtype=np.float32)
-    for i in range(len(lod) - 1):
-        bipartite_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :],
-                        match_dist[i, :])
+    cur_offset = 0
+    for i in range(n):
+        bipartite_match(distance[cur_offset:(cur_offset + lod[i]), :],
+                        match_indices[i, :], match_dist[i, :])
        if match_type == 'per_prediction':
-            argmax_match(distance[lod[i]:lod[i + 1], :], match_indices[i, :],
-                         match_dist[i, :], dist_threshold)
+            argmax_match(distance[cur_offset:(cur_offset + lod[i]), :],
+                         match_indices[i, :], match_dist[i, :], dist_threshold)
+        cur_offset += lod[i]
    return match_indices, match_dist


 class TestBipartiteMatchOpWithLoD(OpTest):
    def setUp(self):
        self.op_type = 'bipartite_match'
-        lod = [[0, 5, 11, 23]]
+        lod = [[5, 6, 12]]
        dist = np.random.random((23, 217)).astype('float32')
        match_indices, match_dist = batch_bipartite_match(dist, lod[0])

@@ -98,7 +100,7 @@ class TestBipartiteMatchOpWithLoD(OpTest):
 class TestBipartiteMatchOpWithoutLoD(OpTest):
    def setUp(self):
        self.op_type = 'bipartite_match'
-        lod = [[0, 8]]
+        lod = [[8]]
        dist = np.random.random((8, 17)).astype('float32')
        match_indices, match_dist = batch_bipartite_match(dist, lod[0])

@@ -115,7 +117,7 @@ class TestBipartiteMatchOpWithoutLoD(OpTest):
 class TestBipartiteMatchOpWithPerPredictionType(OpTest):
    def setUp(self):
        self.op_type = 'bipartite_match'
-        lod = [[0, 5, 11, 23]]
+        lod = [[5, 6, 12]]
        dist = np.random.random((23, 237)).astype('float32')
        match_indices, match_dist = batch_bipartite_match(dist, lod[0],
                                                          'per_prediction', 0.5)

--- a/python/paddle/fluid/tests/unittests/test_box_coder_op.py
+++ b/python/paddle/fluid/tests/unittests/test_box_coder_op.py
@@ -81,15 +81,19 @@ def batch_box_coder(prior_box, prior_box_var, target_box, lod, code_type,
    n = target_box.shape[0]
    m = prior_box.shape[0]
    output_box = np.zeros((n, m, 4), dtype=np.float32)
-    for i in range(len(lod) - 1):
+    cur_offset = 0
+    for i in range(len(lod)):
        if (code_type == "EncodeCenterSize"):
-            box_coder(target_box[lod[i]:lod[i + 1], :], prior_box,
-                      prior_box_var, output_box[lod[i]:lod[i + 1], :, :],
+            box_coder(target_box[cur_offset:(cur_offset + lod[i]), :],
+                      prior_box, prior_box_var,
+                      output_box[cur_offset:(cur_offset + lod[i]), :, :],
                      code_type, box_normalized)
        elif (code_type == "DecodeCenterSize"):
-            box_coder(target_box[lod[i]:lod[i + 1], :, :], prior_box,
-                      prior_box_var, output_box[lod[i]:lod[i + 1], :, :],
+            box_coder(target_box[cur_offset:(cur_offset + lod[i]), :, :],
+                      prior_box, prior_box_var,
+                      output_box[cur_offset:(cur_offset + lod[i]), :, :],
                      code_type, box_normalized)
+        cur_offset += lod[i]
    return output_box


@@ -99,7 +103,7 @@ class TestBoxCoderOp(OpTest):

    def setUp(self):
        self.op_type = "box_coder"
-        lod = [[0, 1, 2, 3, 4, 5]]
+        lod = [[1, 1, 1, 1, 1]]
        prior_box = np.random.random((10, 4)).astype('float32')
        prior_box_var = np.random.random((10, 4)).astype('float32')
        target_box = np.random.random((5, 10, 4)).astype('float32')
@@ -152,7 +156,7 @@ class TestBoxCoderOpWithLoD(OpTest):

    def setUp(self):
        self.op_type = "box_coder"
-        lod = [[0, 4, 12, 20]]
+        lod = [[4, 8, 8]]
        prior_box = np.random.random((10, 4)).astype('float32')
        prior_box_var = np.random.random((10, 4)).astype('float32')
        target_box = np.random.random((20, 4)).astype('float32')

--- a/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
+++ b/python/paddle/fluid/tests/unittests/test_chunk_eval_op.py
@@ -144,10 +144,10 @@ class TestChunkEvalOp(OpTest):
        starts = sorted(starts)
        self.num_correct_chunks, self.num_infer_chunks, self.num_label_chunks = self.gen_chunks(
            infer, label, starts)
-        self.inputs = {
-            'Inference': (infer, [starts]),
-            'Label': (label, [starts])
-        }
+        lod = []
+        for i in range(len(starts) - 1):
+            lod.append(starts[i + 1] - starts[i])
+        self.inputs = {'Inference': (infer, [lod]), 'Label': (label, [lod])}
        precision = float(
            self.num_correct_chunks
        ) / self.num_infer_chunks if self.num_infer_chunks else 0

--- a/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py
+++ b/python/paddle/fluid/tests/unittests/test_crf_decoding_op.py
@@ -22,9 +22,9 @@ from op_test import OpTest
 class CRFDecoding(object):
    def __init__(self, emission_weights, transition_weights,
                 seq_start_positions):
-        assert (emission_weights.shape[0] == seq_start_positions[-1])
+        assert (emission_weights.shape[0] == sum(seq_start_positions))
        self.tag_num = emission_weights.shape[1]
-        self.seq_num = len(seq_start_positions) - 1
+        self.seq_num = len(seq_start_positions)

        self.seq_start_positions = seq_start_positions
        self.x = emission_weights
@@ -34,9 +34,9 @@ class CRFDecoding(object):
        self.w = transition_weights[2:, :]

        self.track = np.zeros(
-            (seq_start_positions[-1], self.tag_num), dtype="int64")
+            (sum(seq_start_positions), self.tag_num), dtype="int64")
        self.decoded_path = np.zeros(
-            (seq_start_positions[-1], 1), dtype="int64")
+            (sum(seq_start_positions), 1), dtype="int64")

    def _decode_one_sequence(self, decoded_path, x):
        seq_len, tag_num = x.shape
@@ -71,9 +71,11 @@ class CRFDecoding(object):
            decoded_path[i - 1] = max_idx = track[i, max_idx]

    def decode(self):
+        cur_pos = 0
        for i in range(self.seq_num):
-            start = self.seq_start_positions[i]
-            end = self.seq_start_positions[i + 1]
+            start = cur_pos
+            cur_pos += self.seq_start_positions[i]
+            end = cur_pos
            self._decode_one_sequence(self.decoded_path[start:end, :],
                                      self.x[start:end, :])
        return self.decoded_path
@@ -90,11 +92,13 @@ class TestCRFDecodingOp1(OpTest):
        TAG_NUM = 17
        MAX_SEQ_LEN = 10

-        lod = [[0]]
+        lod = [[]]
+        total_len = 0
        for i in range(SEQ_NUM):
-            lod[-1].append(lod[-1][-1] + random.randint(1, MAX_SEQ_LEN))
+            lod[-1].append(random.randint(1, MAX_SEQ_LEN))
+            total_len += lod[-1][-1]
        emission = np.random.uniform(-1, 1,
-                                     [lod[-1][-1], TAG_NUM]).astype("float64")
+                                     [total_len, TAG_NUM]).astype("float64")
        transition = np.random.uniform(-0.5, 0.5,
                                       [TAG_NUM + 2, TAG_NUM]).astype("float64")

@@ -126,7 +130,8 @@ class TestCRFDecodingOp2(OpTest):
        self.op_type = "crf_decoding"
        TAG_NUM = 5

-        lod = [[0, 1, 3, 6, 10]]
+        lod = [[1, 2, 3, 4]]
+        total_len = sum(lod[-1])
        transition = np.repeat(
            np.arange(
                TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
@@ -135,13 +140,13 @@ class TestCRFDecodingOp2(OpTest):
        emission = np.repeat(
            np.arange(
                TAG_NUM, dtype="float64").reshape(1, TAG_NUM),
-            lod[-1][-1],
+            total_len,
            axis=0)

        labels = np.random.randint(
-            low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64")
+            low=0, high=TAG_NUM, size=(total_len, 1), dtype="int64")
        predicted_labels = np.ones(
-            (lod[-1][-1], 1), dtype="int64") * (TAG_NUM - 1)
+            (total_len, 1), dtype="int64") * (TAG_NUM - 1)
        expected_output = (labels == predicted_labels).astype("int64")

        self.inputs = {

--- a/python/paddle/fluid/tests/unittests/test_ctc_align.py
+++ b/python/paddle/fluid/tests/unittests/test_ctc_align.py
@@ -22,14 +22,16 @@ from test_softmax_op import stable_softmax
 def CTCAlign(input, lod, blank, merge_repeated):
    lod0 = lod[0]
    result = []
-    for i in range(len(lod0) - 1):
+    cur_offset = 0
+    for i in range(len(lod0)):
        prev_token = -1
-        for j in range(lod0[i], lod0[i + 1]):
+        for j in range(cur_offset, cur_offset + lod0[i]):
            token = input[j][0]
            if (token != blank) and not (merge_repeated and
                                         token == prev_token):
                result.append(token)
            prev_token = token
+        cur_offset += lod0[i]
    result = np.array(result).reshape([len(result), 1]).astype("int32")
    if len(result) == 0:
        result = np.array([-1])
@@ -39,7 +41,7 @@ def CTCAlign(input, lod, blank, merge_repeated):
 class TestCTCAlignOp(OpTest):
    def config(self):
        self.op_type = "ctc_align"
-        self.input_lod = [[0, 11, 18]]
+        self.input_lod = [[11, 7]]
        self.blank = 0
        self.merge_repeated = False
        self.input = np.array(
@@ -66,7 +68,7 @@ class TestCTCAlignOp(OpTest):
 class TestCTCAlignOpCase1(TestCTCAlignOp):
    def config(self):
        self.op_type = "ctc_align"
-        self.input_lod = [[0, 11, 19]]
+        self.input_lod = [[11, 8]]
        self.blank = 0
        self.merge_repeated = True
        self.input = np.array(
@@ -77,7 +79,7 @@ class TestCTCAlignOpCase1(TestCTCAlignOp):
 class TestCTCAlignOpCase2(TestCTCAlignOp):
    def config(self):
        self.op_type = "ctc_align"
-        self.input_lod = [[0, 4]]
+        self.input_lod = [[4]]
        self.blank = 0
        self.merge_repeated = True
        self.input = np.array([0, 0, 0, 0]).reshape([4, 1]).astype("int32")

--- a/python/paddle/fluid/tests/unittests/test_detection_map_op.py
+++ b/python/paddle/fluid/tests/unittests/test_detection_map_op.py
@@ -74,13 +74,13 @@ class TestDetectionMAPOp(OpTest):
        self.evaluate_difficult = True
        self.ap_type = "integral"

-        self.label_lod = [[0, 2, 4]]
+        self.label_lod = [[2, 2]]
        # label difficult xmin ymin xmax ymax
        self.label = [[1, 0, 0.1, 0.1, 0.3, 0.3], [1, 1, 0.6, 0.6, 0.8, 0.8],
                      [2, 0, 0.3, 0.3, 0.6, 0.5], [1, 0, 0.7, 0.1, 0.9, 0.3]]

        # label score xmin ymin xmax ymax difficult
-        self.detect_lod = [[0, 3, 7]]
+        self.detect_lod = [[3, 4]]
        self.detect = [
            [1, 0.3, 0.1, 0.0, 0.4, 0.3], [1, 0.7, 0.0, 0.1, 0.2, 0.3],
            [1, 0.9, 0.7, 0.6, 0.8, 0.8], [2, 0.8, 0.2, 0.1, 0.4, 0.4],
@@ -89,7 +89,7 @@ class TestDetectionMAPOp(OpTest):
        ]

        # label score true_pos false_pos
-        self.tf_pos_lod = [[0, 3, 7]]
+        self.tf_pos_lod = [[3, 4]]
        self.tf_pos = [[1, 0.9, 1, 0], [1, 0.7, 1, 0], [1, 0.3, 0, 1],
                       [1, 0.2, 1, 0], [2, 0.8, 0, 1], [2, 0.1, 1, 0],
                       [3, 0.2, 0, 1]]
@@ -112,15 +112,19 @@ class TestDetectionMAPOp(OpTest):
            for i, count in enumerate(class_pos_count):
                class_pos_count_dict[i] = count

-            for i in range(len(true_pos_lod[0]) - 1):
-                start = true_pos_lod[0][i]
-                end = true_pos_lod[0][i + 1]
+            cur_pos = 0
+            for i in range(len(true_pos_lod[0])):
+                start = cur_pos
+                cur_pos += true_pos_lod[0][i]
+                end = cur_pos
                for j in range(start, end):
                    true_pos_dict[i].append(true_pos[j])

-            for i in range(len(false_pos_lod[0]) - 1):
-                start = false_pos_lod[0][i]
-                end = false_pos_lod[0][i + 1]
+            cur_pos = 0
+            for i in range(len(false_pos_lod[0])):
+                start = cur_pos
+                cur_pos += false_pos_lod[0][i]
+                end = cur_pos
                for j in range(start, end):
                    false_pos_dict[i].append(false_pos[j])

@@ -130,19 +134,19 @@ class TestDetectionMAPOp(OpTest):
            label_number = self.class_num

            out_class_pos_count = []
-            out_true_pos_lod = [0]
+            out_true_pos_lod = []
            out_true_pos = []
-            out_false_pos_lod = [0]
+            out_false_pos_lod = []
            out_false_pos = []

            for i in range(label_number):
                out_class_pos_count.append([label_count[i]])
                true_pos_list = true_pos[i]
                out_true_pos += true_pos_list
-                out_true_pos_lod.append(len(out_true_pos))
+                out_true_pos_lod.append(len(true_pos_list))
                false_pos_list = false_pos[i]
                out_false_pos += false_pos_list
-                out_false_pos_lod.append(len(out_false_pos))
+                out_false_pos_lod.append(len(false_pos_list))

            return out_class_pos_count, out_true_pos, [
                out_true_pos_lod
@@ -241,7 +245,7 @@ class TestDetectionMAPOpSkipDiff(TestDetectionMAPOp):

        self.evaluate_difficult = False

-        self.tf_pos_lod = [[0, 2, 6]]
+        self.tf_pos_lod = [[2, 4]]
        # label score true_pos false_pos
        self.tf_pos = [[1, 0.7, 1, 0], [1, 0.3, 0, 1], [1, 0.2, 1, 0],
                       [2, 0.8, 0, 1], [2, 0.1, 1, 0], [3, 0.2, 0, 1]]
@@ -267,9 +271,9 @@ class TestDetectionMAPOpMultiBatch(TestDetectionMAPOp):
    def init_test_case(self):
        super(TestDetectionMAPOpMultiBatch, self).init_test_case()
        self.class_pos_count = [0, 2, 1]
-        self.true_pos_lod = [[0, 0, 3, 5]]
+        self.true_pos_lod = [[0, 3, 2]]
        self.true_pos = [[0.7, 1.], [0.3, 0.], [0.2, 1.], [0.8, 0.], [0.1, 1.]]
-        self.false_pos_lod = [[0, 0, 3, 5]]
+        self.false_pos_lod = [[0, 3, 2]]
        self.false_pos = [[0.7, 0.], [0.3, 1.], [0.2, 0.], [0.8, 1.], [0.1, 0.]]



--- a/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py
+++ b/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py
@@ -136,16 +136,16 @@ class BaseRNN(object):
        feed_dict = dict()

        for iname in self.inputs:
-            lod = [0]
+            lod = []
            np_flatten = []
            for seq_id in xrange(len(self.inputs[iname])):
                seq_len = len(self.inputs[iname][seq_id])
-                lod.append(lod[-1] + seq_len)
+                lod.append(seq_len)
                np_flatten.extend(self.inputs[iname][seq_id])

            t = fluid.Tensor()
            t.set(numpy.array(np_flatten), place)
-            t.set_lod([lod])
+            t.set_recursive_sequence_lengths([lod])
            feed_dict[iname] = t

        for pname in self.params:

--- a/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py
+++ b/python/paddle/fluid/tests/unittests/test_dynrnn_static_input.py
@@ -39,20 +39,20 @@ class TestDyRnnStaticInput(unittest.TestCase):

    def prepare_x_tensor(self):
        self.x_tensor_dim = 10
-        lod = [[0, 2, 3, 6]]
-        shape = [lod[0][-1], self.x_tensor_dim]
+        lod = [[2, 1, 3]]
+        shape = [sum(lod[0]), self.x_tensor_dim]
        self.x_tensor_data = np.random.random(shape).astype('float32')
        self.x_tensor = core.LoDTensor()
-        self.x_tensor.set_lod(lod)
+        self.x_tensor.set_recursive_sequence_lengths(lod)
        self.x_tensor.set(self.x_tensor_data, self.place)

    def prepare_static_input_tensor(self):
        self.static_input_tensor_dim = 4
-        lod = [[0, 1, 3, 6]]
-        shape = [lod[0][-1], self.static_input_tensor_dim]
+        lod = [[1, 2, 3]]
+        shape = [sum(lod[0]), self.static_input_tensor_dim]
        self.static_input_data = np.random.random(shape).astype('float32')
        self.static_input_tensor = core.LoDTensor()
-        self.static_input_tensor.set_lod(lod)
+        self.static_input_tensor.set_recursive_sequence_lengths(lod)
        self.static_input_tensor.set(self.static_input_data, self.place)

    def fetch_value(self, var):
@@ -69,7 +69,7 @@ class TestDyRnnStaticInput(unittest.TestCase):
        ndarray = np.zeros(shape=dims).astype('float32')
        for i in xrange(np.product(dims)):
            ndarray.ravel()[i] = lod_tensor.get_float_element(i)
-        return ndarray, lod_tensor.lod()
+        return ndarray, lod_tensor.recursive_sequence_lengths()

    def build_graph(self, only_forward=False):
        x_tensor = fluid.layers.data(
@@ -131,21 +131,20 @@ class TestDyRnnStaticInput(unittest.TestCase):
            framework.grad_var_name('static_input_tensor'))
        return static_input_grad, loss

-    def get_seq_len_from_lod(self, lod):
-        return [lod[0][i + 1] - lod[0][i] for i in xrange(len(lod[0]) - 1)]
-
    def get_expected_static_step_outs(self):
-        x_lod = self.x_tensor.lod()
-        x_seq_len = self.get_seq_len_from_lod(x_lod)
+        x_lod = self.x_tensor.recursive_sequence_lengths()
+        x_seq_len = x_lod[0]
        x_seq_len_sorted = sorted(x_seq_len)
        x_sorted_indices = np.argsort(x_seq_len)[::-1]

-        static_lod = self.static_input_tensor.lod()
-        static_sliced = [
-            self.static_input_data[static_lod[0][i]:static_lod[0][i + 1]]
-            for i in xrange(len(static_lod[0]) - 1)
-        ]
-        static_seq_len = self.get_seq_len_from_lod(static_lod)
+        static_lod = self.static_input_tensor.recursive_sequence_lengths()
+        static_sliced = []
+        cur_offset = 0
+        for i in xrange(len(static_lod[0])):
+            static_sliced.append(self.static_input_data[cur_offset:(
+                cur_offset + static_lod[0][i])])
+            cur_offset += static_lod[0][i]
+        static_seq_len = static_lod[0]
        static_reordered = []
        for i in xrange(len(x_sorted_indices)):
            static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist())
@@ -159,11 +158,13 @@ class TestDyRnnStaticInput(unittest.TestCase):

        for i in xrange(self._max_sequence_len):
            end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1)
-            lod = [0]
+            lod = []
+            total_len = 0
            for i in xrange(end):
-                lod.append(static_seq_len_reordered[i] + lod[-1])
+                lod.append(static_seq_len_reordered[i])
+                total_len += lod[-1]
            static_step_lods.append([lod])
-            end = lod[-1]
+            end = total_len
            static_step_outs.append(
                np.array(static_reordered[:end]).astype('float32'))

@@ -199,7 +200,9 @@ class TestDyRnnStaticInput(unittest.TestCase):
            self.static_input_tensor.set_float_element(i, origin)
            numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2
        self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001))
-        self.assertTrue(np.allclose(actual_lod, self.static_input_tensor.lod()))
+        self.assertTrue(
+            np.allclose(actual_lod,
+                        self.static_input_tensor.recursive_sequence_lengths()))


 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_edit_distance_op.py
+++ b/python/paddle/fluid/tests/unittests/test_edit_distance_op.py
@@ -52,23 +52,29 @@ class TestEditDistanceOp(OpTest):
    def setUp(self):
        self.op_type = "edit_distance"
        normalized = False
-        x1 = np.array([[0, 12, 3, 5, 8, 2]]).astype("int64")
-        x2 = np.array([[0, 12, 4, 7, 8]]).astype("int64")
+        x1 = np.array([[12, 3, 5, 8, 2]]).astype("int64")
+        x2 = np.array([[12, 4, 7, 8]]).astype("int64")
        x1 = np.transpose(x1)
        x2 = np.transpose(x2)
-        x1_lod = [0, 1, 5]
-        x2_lod = [0, 3, 4]
+        x1_lod = [1, 4]
+        x2_lod = [3, 1]

-        num_strs = len(x1_lod) - 1
+        num_strs = len(x1_lod)
        distance = np.zeros((num_strs, 1)).astype("float32")
        sequence_num = np.array(2).astype("int64")
+
+        x1_offset = 0
+        x2_offset = 0
        for i in range(0, num_strs):
            distance[i] = Levenshtein(
-                hyp=x1[x1_lod[i]:x1_lod[i + 1]],
-                ref=x2[x2_lod[i]:x2_lod[i + 1]])
+                hyp=x1[x1_offset:(x1_offset + x1_lod[i])],
+                ref=x2[x2_offset:(x2_offset + x2_lod[i])])
+            x1_offset += x1_lod[i]
+            x2_offset += x2_lod[i]
            if normalized is True:
-                len_ref = x2_lod[i + 1] - x2_lod[i]
+                len_ref = x2_lod[i]
                distance[i] = distance[i] / len_ref
+
        self.attrs = {'normalized': normalized}
        self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
        self.outputs = {'Out': distance, 'SequenceNum': sequence_num}
@@ -81,23 +87,29 @@ class TestEditDistanceOpNormalized(OpTest):
    def setUp(self):
        self.op_type = "edit_distance"
        normalized = True
-        x1 = np.array([[0, 10, 3, 6, 5, 8, 2]]).astype("int64")
-        x2 = np.array([[0, 10, 4, 6, 7, 8]]).astype("int64")
+        x1 = np.array([[10, 3, 6, 5, 8, 2]]).astype("int64")
+        x2 = np.array([[10, 4, 6, 7, 8]]).astype("int64")
        x1 = np.transpose(x1)
        x2 = np.transpose(x2)
-        x1_lod = [0, 1, 3, 6]
-        x2_lod = [0, 2, 3, 5]
+        x1_lod = [1, 2, 3]
+        x2_lod = [2, 1, 2]

-        num_strs = len(x1_lod) - 1
+        num_strs = len(x1_lod)
        distance = np.zeros((num_strs, 1)).astype("float32")
        sequence_num = np.array(3).astype("int64")
+
+        x1_offset = 0
+        x2_offset = 0
        for i in range(0, num_strs):
            distance[i] = Levenshtein(
-                hyp=x1[x1_lod[i]:x1_lod[i + 1]],
-                ref=x2[x2_lod[i]:x2_lod[i + 1]])
+                hyp=x1[x1_offset:(x1_offset + x1_lod[i])],
+                ref=x2[x2_offset:(x2_offset + x2_lod[i])])
+            x1_offset += x1_lod[i]
+            x2_offset += x2_lod[i]
            if normalized is True:
-                len_ref = x2_lod[i + 1] - x2_lod[i]
+                len_ref = x2_lod[i]
                distance[i] = distance[i] / len_ref
+
        self.attrs = {'normalized': normalized}
        self.inputs = {'Hyps': (x1, [x1_lod]), 'Refs': (x2, [x2_lod])}
        self.outputs = {'Out': distance, 'SequenceNum': sequence_num}

--- a/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py
+++ b/python/paddle/fluid/tests/unittests/test_feed_fetch_method.py
@@ -24,17 +24,16 @@ class TestFeedFetch(unittest.TestCase):
        input_array = np.ones((4, 4, 6)).astype("float32")
        input_array[0, 0, 0] = 3
        input_array[3, 3, 5] = 10
-        input_tensor = core.LoDTensor([[0, 2, 4]])
+        input_tensor = core.LoDTensor([[2, 2]])
        input_tensor.set(input_array, place)

        core.set_feed_variable(scope, input_tensor, "feed", 0)

        output_tensor = core.get_fetch_variable(scope, "feed", 0)

-        output_lod = output_tensor.lod()
-        self.assertEqual(0, output_lod[0][0])
+        output_lod = output_tensor.recursive_sequence_lengths()
+        self.assertEqual(2, output_lod[0][0])
        self.assertEqual(2, output_lod[0][1])
-        self.assertEqual(4, output_lod[0][2])

        output_array = np.array(output_tensor)
        self.assertEqual(3, output_array[0, 0, 0])

--- a/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fill_constant_batch_size_like_op.py
@@ -55,7 +55,7 @@ class TestFillConstantBatchSizeLikeWithLoDTensor(OpTest):
        self.op_type = "fill_constant_batch_size_like"
        self.inputs = {
            'Input': (np.random.random((31, 28)).astype("float32"),
-                      [[0, 9, 23, 31]])
+                      [[9, 14, 8]])
        }
        self.attrs = {
            'value': 3.5,

--- a/python/paddle/fluid/tests/unittests/test_gru_op.py
+++ b/python/paddle/fluid/tests/unittests/test_gru_op.py
@@ -20,8 +20,8 @@ from test_lstm_op import identity, sigmoid, tanh, relu


 class TestGRUOp(OpTest):
-    lod = [[0, 2, 6, 9]]
-    batch_size = lod[0][-1]
+    lod = [[2, 4, 3]]
+    batch_size = sum(lod[0])
    frame_size = 5
    activate = {
        'identity': identity,
@@ -33,10 +33,10 @@ class TestGRUOp(OpTest):
    @staticmethod
    def seq_to_batch(lod, is_reverse):
        idx_in_seq_list = []
-        seq_starts = lod[0]
-        seq_lens = []
-        for i in range(len(seq_starts) - 1):
-            seq_lens.append(seq_starts[i + 1] - seq_starts[i])
+        seq_lens = lod[0]
+        seq_starts = [0]
+        for i in range(len(seq_lens)):
+            seq_starts.append(seq_starts[-1] + seq_lens[i])
        sorted_seqs = sorted(
            range(len(seq_lens)), lambda x, y: seq_lens[y] - seq_lens[x])
        num_batch = seq_lens[sorted_seqs[0]]

--- a/python/paddle/fluid/tests/unittests/test_initializer.py
+++ b/python/paddle/fluid/tests/unittests/test_initializer.py
@@ -364,5 +364,22 @@ class TestMSRAInitializer(unittest.TestCase):
        self.assertEqual(init_op.attr('seed'), 134)


+class TestMSRAInitializer(unittest.TestCase):
+    def test_bilinear_initializer(self):
+        """Test the bilinear initializer with supplied arguments
+        """
+        program = framework.Program()
+        block = program.global_block()
+        block.create_parameter(
+            dtype="float32",
+            shape=[8, 1, 3, 3],
+            lod_level=0,
+            name="param",
+            initializer=initializer.BilinearInitializer())
+        self.assertEqual(len(block.ops), 1)
+        init_op = block.ops[0]
+        self.assertEqual(init_op.type, 'assign_value')
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py
+++ b/python/paddle/fluid/tests/unittests/test_iou_similarity_op.py
@@ -58,8 +58,8 @@ class TestIOUSimilarityOpWithLoD(TestIOUSimilarityOp):

    def setUp(self):
        super(TestIOUSimilarityOpWithLoD, self).setUp()
-        self.boxes1_lod = [[0, 1, 2]]
-        self.output_lod = [[0, 1, 2]]
+        self.boxes1_lod = [[1, 1]]
+        self.output_lod = [[1, 1]]

        self.inputs = {'X': (self.boxes1, self.boxes1_lod), 'Y': self.boxes2}
        self.outputs = {'Out': (self.output, self.output_lod)}

--- a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py
+++ b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py
@@ -105,11 +105,13 @@ class TestLinearChainCrfOp(OpTest):
        MAX_SEQ_LEN = 5

        # the linear_chain_crf operator only supports sequence (LoD level = 1)
-        lod = [[0]]
+        lod = [[]]
+        seq_start_pos = [0]
        for i in range(SEQ_NUM):
-            lod[-1].append(lod[-1][-1] + random.randint(1, MAX_SEQ_LEN))
-        emission = np.random.uniform(-1, 1,
-                                     [lod[-1][-1], TAG_NUM]).astype("float64")
+            lod[-1].append(random.randint(1, MAX_SEQ_LEN))
+            seq_start_pos.append(seq_start_pos[-1] + lod[-1][-1])
+        emission = np.random.uniform(
+            -1, 1, [seq_start_pos[-1], TAG_NUM]).astype("float64")
        emission_row_max = np.amax(emission, axis=1, keepdims=True)
        emission_exps = np.exp(emission - emission_row_max)

@@ -118,14 +120,14 @@ class TestLinearChainCrfOp(OpTest):
        transition_exps = np.exp(transition)

        labels = np.random.randint(
-            low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64")
+            low=0, high=TAG_NUM, size=(seq_start_pos[-1], 1), dtype="int64")

        self.inputs = {
            "Emission": (emission, lod),
            "Transition": transition,
            "Label": (labels, lod)
        }
-        crf = LinearChainCrfForward(lod[0], emission, emission_row_max,
+        crf = LinearChainCrfForward(seq_start_pos, emission, emission_row_max,
                                    emission_exps, transition, transition_exps,
                                    labels)
        alpha, log_likelihood = crf.crf_forward_compute()

--- a/python/paddle/fluid/tests/unittests/test_lod_rank_table.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_rank_table.py
@@ -30,7 +30,8 @@ class TestLoDRankTable(unittest.TestCase):

        tensor = core.LoDTensor()
        tensor.set(numpy.random.random(size=(17, 100)), cpu)
-        tensor.set_lod([[0, 1, 3], [0, 5, 6, 7], [0, 3, 4, 9, 10, 13, 16, 17]])
+        tensor.set_recursive_sequence_lengths(
+            [[1, 2], [5, 1, 1], [3, 1, 5, 1, 3, 3, 1]])
        exe.run(scope=scope, feed={'x': tensor})
        var = scope.find_var(rank_table.name)
        table = var.get_lod_rank_table()

--- a/python/paddle/fluid/tests/unittests/test_lod_reset_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_reset_op.py
@@ -21,11 +21,15 @@ class TestLodResetOpByAttr(OpTest):
    def setUp(self):
        self.op_type = "lod_reset"
        x = np.random.random((10, 20)).astype("float32")
-        lod = [[0, 3, 5, 10]]
-        target_lod_0 = [0, 7, 10]
+        lod = [[3, 2, 5]]
+        # target_offset_lod and target_lod are the same lod info represented
+        # in offset-based format and length-based format, respectively.
+        target_offset_lod = [0, 7, 10]
+        target_lod = [7, 3]
        self.inputs = {'X': (x, lod)}
-        self.attrs = {'target_lod': target_lod_0}
-        self.outputs = {'Out': (x, [target_lod_0])}
+        # The `target_lod` attribute is still based on offset
+        self.attrs = {'target_lod': target_offset_lod}
+        self.outputs = {'Out': (x, [target_lod])}

    def test_check_output(self):
        self.check_output()
@@ -38,13 +42,16 @@ class TestLodResetOpByInput(OpTest):
    def setUp(self):
        self.op_type = "lod_reset"
        x = np.random.random((10, 20)).astype("float32")
-        lod = [[0, 3, 5, 10]]
-        target_lod_0 = [0, 4, 7, 10]
+        lod = [[3, 2, 5]]
+        # target_offset_lod and target_lod are the same lod info represented
+        # in offset-based format and length-based format, respectively.
+        target_offset_lod = [0, 4, 7, 10]
+        target_lod = [4, 3, 3]
        self.inputs = {
            'X': (x, lod),
-            'Y': np.array([target_lod_0]).astype('int32')
+            'Y': np.array([target_offset_lod]).astype('int32')
        }
-        self.outputs = {'Out': (x, [target_lod_0])}
+        self.outputs = {'Out': (x, [target_lod])}

    def test_check_output(self):
        self.check_output()
@@ -57,15 +64,16 @@ class TestLodResetOpBoth(OpTest):
    def setUp(self):
        self.op_type = "lod_reset"
        x = np.random.random((10, 20)).astype("float32")
-        lod = [[0, 3, 5, 10]]
-        target_lod_0_attr = [0, 7, 10]
-        target_lod_0_in = [0, 4, 7, 10]
+        lod = [[3, 2, 5]]
+        target_offset_lod_attr = [0, 7, 10]
+        target_offset_lod_in = [0, 4, 7, 10]
+        target_lod_in = [4, 3, 3]
        self.inputs = {
            'X': (x, lod),
-            'Y': np.array(target_lod_0_in).astype('int32')
+            'Y': np.array(target_offset_lod_in).astype('int32')
        }
-        self.attrs = {'target_lod': target_lod_0_attr}
-        self.outputs = {'Out': (x, [target_lod_0_in])}
+        self.attrs = {'target_lod': target_offset_lod_attr}
+        self.outputs = {'Out': (x, [target_lod_in])}

    def test_check_output(self):
        self.check_output()
@@ -78,11 +86,11 @@ class TestLodResetOpYIsLoDTensor(OpTest):
    def setUp(self):
        self.op_type = "lod_reset"
        x = np.random.random((10, 20)).astype("float32")
-        lod = [[0, 3, 5, 10]]
+        lod = [[3, 2, 5]]
        y = np.random.random((10, 10)).astype("float32")
-        target_lod_0 = [[0, 4, 7, 10]]
-        self.inputs = {'X': (x, lod), 'Y': (y, target_lod_0)}
-        self.outputs = {'Out': (x, target_lod_0)}
+        target_lod = [[4, 3, 3]]
+        self.inputs = {'X': (x, lod), 'Y': (y, target_lod)}
+        self.outputs = {'Out': (x, target_lod)}

    def test_check_output(self):
        self.check_output()

--- a/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_tensor_array.py
@@ -27,7 +27,7 @@ class TestLoDTensorArray(unittest.TestCase):
        for i in xrange(10):
            t = core.LoDTensor()
            t.set(numpy.array([i], dtype='float32'), cpu)
-            t.set_lod([[0, 1]])
+            t.set_recursive_sequence_lengths([[1]])
            tensor_array.append(t)

        self.assertEqual(10, len(tensor_array))
@@ -35,17 +35,17 @@ class TestLoDTensorArray(unittest.TestCase):
        for i in xrange(10):
            t = tensor_array[i]
            self.assertEqual(numpy.array(t), numpy.array([i], dtype='float32'))
-            self.assertEqual([[0, 1]], t.lod())
+            self.assertEqual([[1]], t.recursive_sequence_lengths())

            t = core.LoDTensor()
            t.set(numpy.array([i + 10], dtype='float32'), cpu)
-            t.set_lod([[0, 2]])
+            t.set_recursive_sequence_lengths([[1]])
            tensor_array[i] = t
            t = tensor_array[i]
            self.assertEqual(
                numpy.array(t), numpy.array(
                    [i + 10], dtype='float32'))
-            self.assertEqual([[0, 2]], t.lod())
+            self.assertEqual([[1]], t.recursive_sequence_lengths())


 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
+++ b/python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
@@ -29,7 +29,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
        tensor = core.LoDTensor()
        tensor.set(
            numpy.arange(10).reshape(10, 1).astype('int32'), self.place())
-        tensor.set_lod([[0, 3, 9, 10]])
+        tensor.set_recursive_sequence_lengths([[3, 6, 1]])
        expect = map(lambda x: numpy.array(x).astype('int32'),
                     [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]])
        self.main(
@@ -42,7 +42,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
        tensor = core.LoDTensor()
        tensor.set(
            numpy.arange(10).reshape(10, 1).astype('int32'), self.place())
-        tensor.set_lod([[0, 3, 9, 9, 10]])
+        tensor.set_recursive_sequence_lengths([[3, 6, 0, 1]])
        expect = map(lambda x: numpy.array(x).astype('int32'),
                     [[3, 0, 9], [4, 1], [5, 2], [6], [7], [8]])
        self.main(
@@ -55,7 +55,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
        tensor = core.LoDTensor()
        tensor.set(
            numpy.arange(20).reshape(20, 1).astype('int32'), self.place())
-        tensor.set_lod([[0, 2, 5], [0, 3, 9, 11, 17, 20]])
+        tensor.set_recursive_sequence_lengths([[2, 3], [3, 6, 2, 6, 3]])

        expect = [
            numpy.array(
@@ -65,7 +65,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
                [17, 18, 19], dtype='int32')
        ]

-        lod = [[[0, 2, 5]], [[0, 6, 12]], [[0, 3]]]
+        lod = [[[2, 3]], [[6, 6]], [[3]]]
        self.main(
            tensor=tensor,
            expect_array=expect,
@@ -77,8 +77,8 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
        tensor.set(
            numpy.arange(31).reshape(31, 1).astype('int32'), self.place())

-        tensor.set_lod([[0, 3, 5, 9, 11],
-                        [0, 3, 7, 11, 11, 12, 17, 19, 21, 23, 30, 31]])
+        tensor.set_recursive_sequence_lengths(
+            [[3, 2, 4, 2], [3, 4, 4, 0, 1, 5, 2, 2, 2, 7, 1]])

        expect = [
            numpy.array(
@@ -88,7 +88,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
            ], [17, 18, 3, 4, 5, 6, 11, 30], [19, 20, 7, 8, 9, 10], [21, 22]]
        ]

-        lod = [[[0, 5, 8, 8, 15]], [[0, 2, 6, 7, 8]], [[0, 2, 6]], [[0, 2]]]
+        lod = [[[5, 3, 0, 7]], [[2, 4, 1, 1]], [[2, 4]], [[2]]]
        self.main(
            tensor=tensor,
            expect_array=expect,
@@ -99,8 +99,9 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
        tensor = core.LoDTensor()
        tensor.set(
            numpy.arange(50).reshape(50, 1).astype('int32'), self.place())
-        tensor.set_lod([[0, 2, 5, 6], [0, 2, 5, 6, 10, 12, 13],
-                        [0, 3, 7, 11, 17, 21, 22, 23, 27, 31, 39, 45, 46, 50]])
+        tensor.set_recursive_sequence_lengths(
+            [[2, 3, 1], [2, 3, 1, 4, 2, 1],
+             [3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4]])

        expect = [
            numpy.array(
@@ -108,8 +109,8 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
            for item in [[21, 0, 1, 2, 3, 4, 5, 6, 46, 47, 48, 49], range(
                22, 39) + range(7, 21), range(39, 46)]
        ]
-        lod = [[[0, 1, 3, 4], [0, 1, 4, 8, 12]],
-               [[0, 4, 7], [0, 1, 5, 9, 17, 21, 27, 31]], [[0, 2], [0, 6, 7]]]
+        lod = [[[1, 2, 1], [1, 3, 4, 4]], [[4, 3], [1, 4, 4, 8, 4, 6, 4]],
+               [[2], [6, 1]]]
        self.main(
            tensor=tensor,
            expect_array=expect,
@@ -120,8 +121,9 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
        tensor = core.LoDTensor()
        tensor.set(
            numpy.arange(50).reshape(50, 1).astype('int32'), self.place())
-        tensor.set_lod([[0, 2, 5, 6], [0, 2, 5, 6, 10, 12, 13],
-                        [0, 3, 7, 11, 17, 21, 22, 23, 27, 31, 39, 45, 46, 50]])
+        tensor.set_recursive_sequence_lengths(
+            [[2, 3, 1], [2, 3, 1, 4, 2, 1],
+             [3, 4, 4, 6, 4, 1, 1, 4, 4, 8, 6, 1, 4]])
        self.main(
            tensor=tensor,
            expect_array=None,
@@ -162,12 +164,13 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
            exp_tensor, exp_lod = exp
            exp_tensor = numpy.expand_dims(exp_tensor, axis=1)
            self.assertTrue(numpy.allclose(exp_tensor, numpy.array(array[i])))
-            self.assertEqual(exp_lod, array[i].lod())
+            self.assertEqual(exp_lod, array[i].recursive_sequence_lengths())

    def check_tensor_same(self, actual, expect):
        self.assertTrue(
            numpy.allclose(numpy.array(actual), numpy.array(expect)))
-        self.assertEqual(actual.lod(), expect.lod())
+        self.assertEqual(actual.recursive_sequence_lengths(),
+                         expect.recursive_sequence_lengths())


 class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
@@ -188,7 +191,7 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):

        tensor = core.LoDTensor()
        tensor.set(numpy.arange(10).reshape(10, 1).astype('float32'), place)
-        tensor.set_lod([[0, 3, 9, 10]])
+        tensor.set_recursive_sequence_lengths([[3, 6, 1]])

        g_vars = program.global_block().var(x.name + "@GRAD")


--- a/python/paddle/fluid/tests/unittests/test_lstm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstm_op.py
@@ -84,15 +84,17 @@ def lstm(
        h = g_o * act_cell(c)
        return h, c

-    def _reverse(x, lod):
+    def _reverse(x, offset):
        y = np.zeros_like(x)
-        for i in range(len(lod) - 1):
-            b, e = lod[i], lod[i + 1]
+        for i in range(len(offset) - 1):
+            b, e = offset[i], offset[i + 1]
            y[b:e, :] = np.flip(x[b:e, :], 0)
        return y

-    offset = lod[0]
-    batch_size = len(offset) - 1
+    offset = [0]
+    for l in lod[0]:
+        offset.append(offset[-1] + l)
+    batch_size = len(lod[0])
    hidden = []
    cell = []
    input = _reverse(input, offset) if is_reverse else input
@@ -100,7 +102,7 @@ def lstm(
        input = input + np.tile(w_b, (offset[-1], 1))
    for i in range(batch_size):
        # compute one sequence
-        seq_len = offset[i + 1] - offset[i]
+        seq_len = lod[0][i]
        x = input[offset[i]:offset[i + 1], :]
        h_pre = h0[i]  # 1 x D
        c_pre = c0[i]  # 1 x D
@@ -124,7 +126,7 @@ def lstm(

 class TestLstmOp(OpTest):
    def set_argument(self):
-        self.lod = [[0, 2, 5, 7]]
+        self.lod = [[2, 3, 2]]
        self.D = 16

        self.act_gate = 'sigmoid'
@@ -139,8 +141,8 @@ class TestLstmOp(OpTest):
        self.set_argument()
        self.op_type = 'lstm'

-        T = self.lod[0][-1]
-        N = len(self.lod[0]) - 1
+        T = sum(self.lod[0])
+        N = len(self.lod[0])

        x = np.random.normal(size=(T, 4 * self.D)).astype('float64')
        if self.has_initial_state:
@@ -186,7 +188,7 @@ class TestLstmOp(OpTest):

    def test_check_grad(self):
        # TODO(qingqing) remove folowing lines after the check_grad is refined.
-        N = len(self.lod[0]) - 1
+        N = len(self.lod[0])
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
@@ -196,7 +198,7 @@ class TestLstmOp(OpTest):

 # class TestLstmOpHasInitial(TestLstmOp):
 #     def set_argument(self):
-#         self.lod = [[0, 2, 5, 7]]
+#         self.lod = [[2, 3, 2]]
 #         self.D = 16

 #         self.act_gate = 'sigmoid'
@@ -209,7 +211,7 @@ class TestLstmOp(OpTest):

 #     def test_check_grad(self):
 #         # TODO(qingqing) remove folowing lines after the check_grad is refined.
-#         N = len(self.lod[0]) - 1
+#         N = len(self.lod[0])
 #         self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
 #         self.outputs['BatchCellPreAct'] = np.zeros(
 #             (N, self.D)).astype('float64')
@@ -218,7 +220,7 @@ class TestLstmOp(OpTest):
 #             max_relative_error=5e-4)

 #     def test_check_grad_ingore_bias(self):
-#         N = len(self.lod[0]) - 1
+#         N = len(self.lod[0])
 #         self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
 #         self.outputs['BatchCellPreAct'] = np.zeros(
 #             (N, self.D)).astype('float64')
@@ -228,7 +230,7 @@ class TestLstmOp(OpTest):
 #             no_grad_set=set('Bias'))

 #     def test_check_grad_ingore_weight(self):
-#         N = len(self.lod[0]) - 1
+#         N = len(self.lod[0])
 #         self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
 #         self.outputs['BatchCellPreAct'] = np.zeros(
 #             (N, self.D)).astype('float64')
@@ -238,7 +240,7 @@ class TestLstmOp(OpTest):
 #             no_grad_set=set('Weight'))

 #     def test_check_grad_ingore_input(self):
-#         N = len(self.lod[0]) - 1
+#         N = len(self.lod[0])
 #         self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
 #         self.outputs['BatchCellPreAct'] = np.zeros(
 #             (N, self.D)).astype('float64')
@@ -248,7 +250,7 @@ class TestLstmOp(OpTest):
 #             no_grad_set=set('Input'))

 #     def test_check_grad_ingore_h0(self):
-#         N = len(self.lod[0]) - 1
+#         N = len(self.lod[0])
 #         self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
 #         self.outputs['BatchCellPreAct'] = np.zeros(
 #             (N, self.D)).astype('float64')
@@ -258,7 +260,7 @@ class TestLstmOp(OpTest):
 #             no_grad_set=set('H0'))

 #     def test_check_grad_ingore_c0(self):
-#         N = len(self.lod[0]) - 1
+#         N = len(self.lod[0])
 #         self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
 #         self.outputs['BatchCellPreAct'] = np.zeros(
 #             (N, self.D)).astype('float64')
@@ -269,7 +271,7 @@ class TestLstmOp(OpTest):

 # class TestLstmOpRerverse(TestLstmOp):
 #     def set_argument(self):
-#         self.lod = [[0, 2, 5, 7]]
+#         self.lod = [[2, 3, 2]]
 #         self.D = 16

 #         self.act_gate = 'sigmoid'
@@ -282,7 +284,7 @@ class TestLstmOp(OpTest):

 # class TestLstmOpNotUsePeepholes(TestLstmOp):
 #     def set_argument(self):
-#         self.lod = [[0, 2, 5, 7]]
+#         self.lod = [[2, 3, 2]]
 #         self.D = 16

 #         self.act_gate = 'sigmoid'

--- a/python/paddle/fluid/tests/unittests/test_lstmp_op.py
+++ b/python/paddle/fluid/tests/unittests/test_lstmp_op.py
@@ -64,15 +64,17 @@ def lstmp(
        r = act_proj(r)
        return r, c

-    def _reverse(x, lod):
+    def _reverse(x, offset):
        y = np.zeros_like(x)
-        for i in range(len(lod) - 1):
-            b, e = lod[i], lod[i + 1]
+        for i in range(len(offset) - 1):
+            b, e = offset[i], offset[i + 1]
            y[b:e, :] = np.flip(x[b:e, :], 0)
        return y

-    offset = lod[0]
-    batch_size = len(offset) - 1
+    offset = [0]
+    for l in lod[0]:
+        offset.append(offset[-1] + l)
+    batch_size = len(lod[0])
    # recurrent projection state
    projection = []
    cell = []
@@ -81,7 +83,7 @@ def lstmp(
        input = input + np.tile(w_b, (offset[-1], 1))
    for i in range(batch_size):
        # compute one sequence
-        seq_len = offset[i + 1] - offset[i]
+        seq_len = lod[0][i]
        x = input[offset[i]:offset[i + 1], :]
        r_pre = np.dot(h0[i], w_rh)  # 1 x P
        r_pre = act_proj(r_pre)
@@ -117,8 +119,8 @@ class TestLstmpOp(LstmTest.TestLstmOp):
        self.reset_argument()
        self.op_type = 'lstmp'

-        T = self.lod[0][-1]
-        N = len(self.lod[0]) - 1
+        T = sum(self.lod[0])
+        N = len(self.lod[0])

        x = np.random.normal(size=(T, 4 * self.D)).astype('float64')
        if self.has_initial_state:
@@ -166,7 +168,7 @@ class TestLstmpOp(LstmTest.TestLstmOp):

    def test_check_grad(self):
        # TODO(qingqing) remove folowing lines after the check_grad is refined.
-        N = len(self.lod[0]) - 1
+        N = len(self.lod[0])
        self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
@@ -183,7 +185,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):

    def test_check_grad(self):
        # TODO(qingqing) remove folowing lines after the check_grad is refined.
-        N = len(self.lod[0]) - 1
+        N = len(self.lod[0])
        self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
@@ -195,7 +197,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
            max_relative_error=1e-2)

    def test_check_grad_ingore_bias(self):
-        N = len(self.lod[0]) - 1
+        N = len(self.lod[0])
        self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
@@ -207,7 +209,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
            no_grad_set=set('Bias'))

    def test_check_grad_ingore_weight(self):
-        N = len(self.lod[0]) - 1
+        N = len(self.lod[0])
        self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
@@ -219,7 +221,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
            no_grad_set=set('Weight'))

    def test_check_grad_ingore_proj_weight(self):
-        N = len(self.lod[0]) - 1
+        N = len(self.lod[0])
        self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
@@ -231,7 +233,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
            no_grad_set=set('ProjWeight'))

    def test_check_grad_ingore_input(self):
-        N = len(self.lod[0]) - 1
+        N = len(self.lod[0])
        self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
@@ -243,7 +245,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
            no_grad_set=set('Input'))

    def test_check_grad_ingore_h0(self):
-        N = len(self.lod[0]) - 1
+        N = len(self.lod[0])
        self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
@@ -255,7 +257,7 @@ class TestLstmpOpHasInitial(TestLstmpOp):
            no_grad_set=set('H0'))

    def test_check_grad_ingore_c0(self):
-        N = len(self.lod[0]) - 1
+        N = len(self.lod[0])
        self.outputs['OrderedP0'] = np.zeros((N, self.P)).astype('float64')
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')

--- a/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py
+++ b/python/paddle/fluid/tests/unittests/test_mine_hard_examples_op.py
@@ -70,7 +70,7 @@ class TestMineHardExamplesOp(OpTest):

        self.updated_match_indices = self.match_indices

-        self.neg_indices_lod = [[0, 1, 2]]
+        self.neg_indices_lod = [[1, 1]]
        self.neg_indices = np.array([[1], [0]]).astype('int32')


@@ -92,7 +92,7 @@ class TestMineHardExamplesOpHardExample(TestMineHardExamplesOp):
        self.updated_match_indices = np.array([[0, -1, -1],
                                               [-1, -1, -1]]).astype('int32')

-        self.neg_indices_lod = [[0, 1, 3]]
+        self.neg_indices_lod = [[1, 2]]
        self.neg_indices = np.array([[2], [0], [2]]).astype('int32')



--- a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py
+++ b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py
@@ -135,12 +135,12 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold,
    batch_size = scores.shape[0]

    det_outs = []
-    lod = [0]
+    lod = []
    for n in range(batch_size):
        nmsed_outs, nmsed_num = multiclass_nms(boxes[n], scores[n], background,
                                               score_threshold, nms_threshold,
                                               nms_top_k, keep_top_k)
-        lod.append(lod[-1] + nmsed_num)
+        lod.append(nmsed_num)
        if nmsed_num == 0: continue

        for c, indices in nmsed_outs.iteritems():

--- a/python/paddle/fluid/tests/unittests/test_one_hot_op.py
+++ b/python/paddle/fluid/tests/unittests/test_one_hot_op.py
@@ -27,9 +27,9 @@ class TestOneHotOp(OpTest):
        self.op_type = 'one_hot'
        depth = 10
        dimension = 12
-        x_lod = [[0, 4, 5, 8, 11]]
-        x = [np.random.randint(0, depth - 1) for i in xrange(x_lod[0][-1])]
-        x = np.array(x).astype('int').reshape([x_lod[0][-1], 1])
+        x_lod = [[4, 1, 3, 3]]
+        x = [np.random.randint(0, depth - 1) for i in xrange(sum(x_lod[0]))]
+        x = np.array(x).astype('int').reshape([sum(x_lod[0]), 1])

        out = np.zeros(shape=(np.product(x.shape[:-1]),
                              depth)).astype('float32')
@@ -50,9 +50,9 @@ class TestOneHotOp_default_dtype(OpTest):
        self.op_type = 'one_hot'
        depth = 10
        dimension = 12
-        x_lod = [[0, 4, 5, 8, 11]]
-        x = [np.random.randint(0, depth - 1) for i in xrange(x_lod[0][-1])]
-        x = np.array(x).astype('int').reshape([x_lod[0][-1], 1])
+        x_lod = [[4, 1, 3, 3]]
+        x = [np.random.randint(0, depth - 1) for i in xrange(sum(x_lod[0]))]
+        x = np.array(x).astype('int').reshape([sum(x_lod[0]), 1])

        out = np.zeros(shape=(np.product(x.shape[:-1]),
                              depth)).astype('float32')
@@ -75,11 +75,11 @@ class TestOneHotOp_exception(OpTest):
        self.place = core.CPUPlace()
        self.dimension = 12
        self.x = core.LoDTensor()
-        x_lod = [[0, 4, 5, 8, 11]]
-        data = [np.random.randint(11, 20) for i in xrange(x_lod[0][-1])]
-        data = np.array(data).astype('int').reshape([x_lod[0][-1], 1])
+        x_lod = [[4, 1, 3, 3]]
+        data = [np.random.randint(11, 20) for i in xrange(sum(x_lod[0]))]
+        data = np.array(data).astype('int').reshape([sum(x_lod[0]), 1])
        self.x.set(data, self.place)
-        self.x.set_lod(x_lod)
+        self.x.set_recursive_sequence_lengths(x_lod)

    def test_check_output(self):
        program = Program()

--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py
@@ -173,6 +173,7 @@ class TestCRFModel(unittest.TestCase):
                          pe.run(feed=feeder.feed(cur_batch),
                                 fetch_list=[avg_cost.name]))[0]

+    @unittest.skip(reason="CI hangs")
    def test_update_sparse_parameter_all_reduce(self):
        build_strategy = fluid.BuildStrategy()
        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce
@@ -181,6 +182,7 @@ class TestCRFModel(unittest.TestCase):
        self.check_network_convergence(
            is_sparse=True, build_strategy=build_strategy, use_cuda=False)

+    @unittest.skip(reason="CI hangs")
    def test_update_dense_parameter_all_reduce(self):
        build_strategy = fluid.BuildStrategy()
        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce
@@ -189,6 +191,7 @@ class TestCRFModel(unittest.TestCase):
        self.check_network_convergence(
            is_sparse=False, build_strategy=build_strategy, use_cuda=False)

+    @unittest.skip(reason="CI hangs")
    def test_update_sparse_parameter_reduce(self):
        build_strategy = fluid.BuildStrategy()
        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
@@ -197,6 +200,7 @@ class TestCRFModel(unittest.TestCase):
        self.check_network_convergence(
            is_sparse=True, build_strategy=build_strategy, use_cuda=False)

+    @unittest.skip(reason="CI hangs")
    def test_update_dense_parameter_reduce(self):
        build_strategy = fluid.BuildStrategy()
        build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce

--- a/python/paddle/fluid/tests/unittests/test_print_op.py
+++ b/python/paddle/fluid/tests/unittests/test_print_op.py
@@ -28,7 +28,7 @@ class TestPrintOpCPU(unittest.TestCase):
        self.x_tensor = core.LoDTensor()
        tensor_np = np.random.random(size=(2, 3)).astype('float32')
        self.x_tensor.set(tensor_np, self.place)
-        self.x_tensor.set_lod([[0, 1, 1]])
+        self.x_tensor.set_recursive_sequence_lengths([[1, 1]])

    def build_network(self, only_forward, **kargs):
        x = layers.data('x', shape=[3], dtype='float32', lod_level=1)
@@ -62,7 +62,7 @@ class TestPrintOpGPU(TestPrintOpCPU):
        self.x_tensor = core.LoDTensor()
        tensor_np = np.random.random(size=(2, 3)).astype('float32')
        self.x_tensor.set(tensor_np, self.place)
-        self.x_tensor.set_lod([[0, 1, 1]])
+        self.x_tensor.set_recursive_sequence_lengths([[1, 1]])


 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
@@ -70,11 +70,10 @@ class TestReorderLoDTensor(unittest.TestCase):
                lod_level_i = numpy.random.randint(
                    low=1,
                    high=5,
-                    size=self.num_seq if i == 0 else lod_level_i[-1])
-                lod_level_i = [0] + numpy.cumsum(lod_level_i).tolist()
+                    size=self.num_seq if i == 0 else sum(lod_level_i)).tolist()
                data_lod.append(lod_level_i)
            data_value = numpy.random.random(
-                size=[data_lod[-1][-1] if data_lod else self.num_seq
+                size=[sum(data_lod[-1]) if data_lod else self.num_seq
                      ] + data_shape).astype('float32')
            self.data[data_name] = (data_value, data_lod)

@@ -84,29 +83,36 @@ class TestReorderLoDTensor(unittest.TestCase):
            tensor = fluid.Tensor()
            tensor.set(self.data[desc[0]][0], place)
            if self.data[desc[0]][1]:
-                tensor.set_lod(self.data[desc[0]][1])
+                tensor.set_recursive_sequence_lengths(self.data[desc[0]][1])
            self.inputs[desc[0]] = tensor

    def reorder(self):
-        level = 0
+        def convert_to_offset(lod):
+            offset_lod = [[0] for i in lod]
+            for i, level in enumerate(lod):
+                for seq_len in level:
+                    offset_lod[i].append(offset_lod[i][-1] + seq_len)
+            return offset_lod

+        level = 0
        # compute the rank_table according to ref_lod
        ref_lod = self.data[self.data_desc[1][0]][1][level]
        rank_table = []  # list of (index, length)
-        for i in range(len(ref_lod) - 1):
-            rank_table.append((i, ref_lod[i + 1] - ref_lod[i]))
+        for i in range(len(ref_lod)):
+            rank_table.append((i, ref_lod[i]))
        rank_table = sorted(rank_table, lambda x, y: y[1] - x[1])

        # compute the input sequence info according to input_lod
        input_value, input_lod = self.data[self.data_desc[0][0]]
+        offset_lod = convert_to_offset(input_lod)

        input_table = []  # list of (offset, length, sub_lod)
-        if input_lod:
-            for i in range(len(input_lod[level]) - 1):
+        if offset_lod:
+            for i in range(len(offset_lod[level]) - 1):
                start_idx = i
                end_idx = i + 1
                sub_lod = []
-                for lod_level_i in input_lod[level:]:
+                for lod_level_i in offset_lod[level:]:
                    sub_lod_i = []
                    for idx in range(start_idx, end_idx):
                        sub_lod_i.append(lod_level_i[idx + 1] - lod_level_i[
@@ -132,10 +138,9 @@ class TestReorderLoDTensor(unittest.TestCase):

            input_seq_sub_lod = input_table[index][2]
            if len(output_lod) == 0:
-                output_lod = [[0] for i in input_seq_sub_lod]
-            for i, sub_lod_i in enumerate(input_seq_sub_lod):
-                for idx_sub in sub_lod_i:
-                    output_lod[i].append(output_lod[i][-1] + idx_sub)
+                output_lod = [[] for i in input_seq_sub_lod]
+            for i, level in enumerate(input_seq_sub_lod):
+                output_lod[i].extend(level)
        return output_value, output_lod

    def test_reorder_lod_tensor(self):
@@ -148,7 +153,8 @@ class TestReorderLoDTensor(unittest.TestCase):
            self.assertTrue(
                numpy.allclose(
                    numpy.array(actual_output), expect_output, atol=0.001))
-            self.assertEqual(expect_output_lod, actual_output.lod())
+            self.assertEqual(expect_output_lod,
+                             actual_output.recursive_sequence_lengths())
        # check gradient
        expect_grad = numpy.ones_like(self.data[self.data_desc[0][0]][0])
        expect_grad_lod = self.data[self.data_desc[0][0]][1]
@@ -156,7 +162,8 @@ class TestReorderLoDTensor(unittest.TestCase):
            self.assertTrue(
                numpy.allclose(
                    numpy.array(actual_grad), expect_grad, atol=0.001))
-            self.assertEqual(expect_grad_lod, actual_grad.lod())
+            self.assertEqual(expect_grad_lod,
+                             actual_grad.recursive_sequence_lengths())

    def test_reorder_tensor(self):
        self.data_desc[0][-1] = 0  # input is tensor
@@ -168,7 +175,8 @@ class TestReorderLoDTensor(unittest.TestCase):
            self.assertTrue(
                numpy.allclose(
                    numpy.array(actual_output), expect_output, atol=0.001))
-            self.assertEqual(expect_output_lod, actual_output.lod())
+            self.assertEqual(expect_output_lod,
+                             actual_output.recursive_sequence_lengths())
        # check gradient
        expect_grad = numpy.ones_like(self.data[self.data_desc[0][0]][0])
        expect_grad_lod = self.data[self.data_desc[0][0]][1]
@@ -176,14 +184,14 @@ class TestReorderLoDTensor(unittest.TestCase):
            self.assertTrue(
                numpy.allclose(
                    numpy.array(actual_grad), expect_grad, atol=0.001))
-            self.assertEqual(expect_grad_lod, actual_grad.lod())
+            self.assertEqual(expect_grad_lod,
+                             actual_grad.recursive_sequence_lengths())

        # compare outputs between LodTensors with explicit and implicit lod
        # use the same data but set the input lod explicitly
-        input_lod = [[
-            i for i in range(len(self.data[self.data_desc[0][0]][0]) + 1)
-        ]]
-        self.inputs[self.data_desc[0][0]].set_lod(input_lod)
+        input_lod = [[1] * len(self.data[self.data_desc[0][0]][0])]
+        self.inputs[self.data_desc[0][0]].set_recursive_sequence_lengths(
+            input_lod)
        # preserve the output of LodTensor with implicit lod to compare
        expect_output = [
            numpy.array(actual_output) for actual_output in self.actual_outputs

--- a/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_roi_pool_op.py
@@ -107,7 +107,7 @@ class TestROIPoolOp(OpTest):
        rois = []
        self.rois_lod = [[]]
        for bno in range(self.batch_size):
-            self.rois_lod[0].append(len(rois))
+            self.rois_lod[0].append(bno + 1)
            for i in range(bno + 1):
                x1 = np.random.random_integers(
                    0, self.width / self.spatial_scale - self.pooled_width)
@@ -121,7 +121,6 @@ class TestROIPoolOp(OpTest):

                roi = [bno, x1, y1, x2, y2]
                rois.append(roi)
-        self.rois_lod[0].append(len(rois))
        self.rois_num = len(rois)
        self.rois = np.array(rois).astype("int64")


--- a/python/paddle/fluid/tests/unittests/test_row_conv_op.py
+++ b/python/paddle/fluid/tests/unittests/test_row_conv_op.py
@@ -19,8 +19,10 @@ from op_test import OpTest

 def row_conv_forward(x, lod, wt):
    out = np.zeros_like(x)
-    seq_info = lod[0]
-    num_sequences = len(seq_info) - 1
+    num_sequences = len(lod[0])
+    seq_info = [0]
+    for seq_len in lod[0]:
+        seq_info.append(seq_info[-1] + seq_len)
    context_length = wt.shape[0]

    for i in range(num_sequences):  # loop over number of sequences
@@ -32,7 +34,6 @@ def row_conv_forward(x, lod, wt):
        cur_timesteps = end - start
        for j in range(cur_timesteps):  # loop over different timesteps
            for k in range(context_length):
-
                if j + k >= cur_timesteps:
                    continue
                curoutput[j, :] += curinput[j + k, :] * wt[k, :]
@@ -44,8 +45,8 @@ class TestRowConvOp1(OpTest):
    def setUp(self):

        self.op_type = "row_conv"
-        lod = [[0, 2, 5, 7]]
-        T = lod[0][-1]
+        lod = [[2, 3, 2]]
+        T = sum(lod[0])
        D = 16
        context_length = 2

@@ -75,8 +76,8 @@ class TestRowConvOp2(OpTest):
    def setUp(self):

        self.op_type = "row_conv"
-        lod = [[0, 20, 50, 100]]
-        T = lod[0][-1]
+        lod = [[20, 30, 50]]
+        T = sum(lod[0])
        D = 35
        context_length = 35


--- a/python/paddle/fluid/tests/unittests/test_seq_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_seq_concat_op.py
@@ -18,14 +18,19 @@ import sys
 from op_test import OpTest


-def to_abs_lod(lod):
-    if len(lod) == 0 or len(lod) == 1:
-        return lod
+def to_abs_offset_lod(lod):
+    offset_lod = [[0] for i in lod]
+    for i, level in enumerate(lod):
+        for seq_len in level:
+            offset_lod[i].append(offset_lod[i][-1] + seq_len)
+
+    if len(offset_lod) == 0 or len(offset_lod) == 1:
+        return offset_lod
    import copy
-    new_lod = copy.deepcopy(lod)
-    for idx, val in enumerate(lod[0]):
-        new_lod[0][idx] = lod[1][val]
-    return new_lod
+    new_offset_lod = copy.deepcopy(offset_lod)
+    for idx, val in enumerate(offset_lod[0]):
+        new_offset_lod[0][idx] = offset_lod[1][val]
+    return new_offset_lod


 def seq_concat(inputs, level):
@@ -35,11 +40,11 @@ def seq_concat(inputs, level):
    x1 = inputs['X'][1][1][0]
    level_idx = len(lod0) - level - 1
    outs = []
-    for i in range(len(lod0[level_idx]) - 1):
-        sub_x0 = x0[to_abs_lod(lod0)[level_idx][i]:to_abs_lod(lod0)[level_idx][
-            i + 1], :]
-        sub_x1 = x1[to_abs_lod(lod1)[level_idx][i]:to_abs_lod(lod1)[level_idx][
-            i + 1], :]
+    for i in range(len(lod0[level_idx])):
+        sub_x0 = x0[to_abs_offset_lod(lod0)[level_idx][i]:to_abs_offset_lod(
+            lod0)[level_idx][i + 1], :]
+        sub_x1 = x1[to_abs_offset_lod(lod1)[level_idx][i]:to_abs_offset_lod(
+            lod1)[level_idx][i + 1], :]
        outs.append(np.concatenate((sub_x0, sub_x1), axis=0))
    return np.concatenate(outs, axis=0)

@@ -48,9 +53,9 @@ class TestSeqConcatOp(OpTest):
    def set_data(self):
        # two level, batch size is 3
        x0 = np.random.random((4, 6, 3)).astype('float32')
-        lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]]
+        lod0 = [[2, 2], [1, 1, 1, 1]]
        x1 = np.random.random((4, 8, 3)).astype('float32')
-        lod1 = [[0, 2, 4], [0, 1, 2, 3, 4]]
+        lod1 = [[2, 2], [1, 1, 1, 1]]
        axis = 1
        level = 1
        self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
@@ -72,14 +77,14 @@ class TestSeqConcatOpLevelZeroNestedSequence(TestSeqConcatOp):
    def set_data(self):
        # two level, batch size is 3
        x0 = np.random.random((4, 6, 3)).astype('float32')
-        lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]]
+        lod0 = [[2, 2], [1, 1, 1, 1]]
        x1 = np.random.random((7, 6, 3)).astype('float32')
-        lod1 = [[0, 2, 4], [0, 1, 3, 5, 7]]
+        lod1 = [[2, 2], [1, 2, 2, 2]]
        axis = 0
        level = 0
        self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
        self.attrs = {'axis': axis, 'level': level}
-        out_lod = [[0, 2, 4], [0, 2, 5, 8, 11]]
+        out_lod = [[2, 2], [2, 3, 3, 3]]
        self.outputs = {'Out': (seq_concat(self.inputs, level), out_lod)}


@@ -87,14 +92,14 @@ class TestSeqConcatOplevelOneNestedSequence(TestSeqConcatOp):
    def set_data(self):
        # two level, batch size is 3
        x0 = np.random.random((4, 6, 3)).astype('float32')
-        lod0 = [[0, 2, 4], [0, 1, 2, 3, 4]]
+        lod0 = [[2, 2], [1, 1, 1, 1]]
        x1 = np.random.random((7, 6, 3)).astype('float32')
-        lod1 = [[0, 3, 4], [0, 1, 3, 5, 7]]
+        lod1 = [[3, 1], [1, 2, 2, 2]]
        axis = 0
        level = 1
        self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
        self.attrs = {'axis': axis, 'level': level}
-        out_lod = [[0, 5, 8], [0, 1, 2, 3, 5, 7, 8, 9, 11]]
+        out_lod = [[5, 3], [1, 1, 1, 2, 2, 1, 1, 2]]
        self.outputs = {'Out': (seq_concat(self.inputs, level), out_lod)}


@@ -102,14 +107,14 @@ class TestSeqConcatOpLevelZeroSequence(TestSeqConcatOp):
    def set_data(self):
        # two level, batch size is 3
        x0 = np.random.random((4, 3, 4)).astype('float32')
-        lod0 = [[0, 1, 2, 3, 4]]
+        lod0 = [[1, 1, 1, 1]]
        x1 = np.random.random((7, 3, 4)).astype('float32')
-        lod1 = [[0, 1, 3, 5, 7]]
+        lod1 = [[1, 2, 2, 2]]
        axis = 0
        level = 0
        self.inputs = {'X': [('x0', (x0, lod0)), ('x1', (x1, lod1))]}
        self.attrs = {'axis': axis, 'level': level}
-        out_lod = [[0, 2, 5, 8, 11]]
+        out_lod = [[2, 3, 3, 3]]
        self.outputs = {'Out': (seq_concat(self.inputs, level), out_lod)}



--- a/python/paddle/fluid/tests/unittests/test_seq_conv.py
+++ b/python/paddle/fluid/tests/unittests/test_seq_conv.py
@@ -75,35 +75,38 @@ class TestSeqProject(OpTest):
        pading_data = self.pad_data
        out = np.zeros((self.input_size[0], self.context_length *
                        self.input_size[1])).astype('float32')
-        lod = lod[0]
+        offset = [0]
+        for seq_len in lod[0]:
+            offset.append(offset[-1] + seq_len)
        begin_pad = np.max([0, -self.context_start])

-        for i in range(len(lod) - 1):
+        for i in range(len(offset) - 1):
            for j in range(self.context_length):
-                in_begin = lod[i] + self.context_start + j
-                in_end = lod[i + 1] + self.context_start + j
-                out_begin = lod[i]
-                out_end = lod[i + 1]
-                if in_begin < lod[i]:
-                    pad_size = np.min([lod[i] - in_begin, lod[i + 1] - lod[i]])
+                in_begin = offset[i] + self.context_start + j
+                in_end = offset[i + 1] + self.context_start + j
+                out_begin = offset[i]
+                out_end = offset[i + 1]
+                if in_begin < offset[i]:
+                    pad_size = np.min(
+                        [offset[i] - in_begin, offset[i + 1] - offset[i]])
                    if self.padding_trainable:
                        sub_w = pading_data[j:j + pad_size, :]
-                        out[lod[i]:lod[i] + pad_size, j * self.input_size[1]:(
-                            j + 1) * self.input_size[1]] = sub_w
-                    out_begin = lod[i] + pad_size
-                    in_begin = lod[i]
+                        out[offset[i]:offset[i] + pad_size, j * self.input_size[
+                            1]:(j + 1) * self.input_size[1]] = sub_w
+                    out_begin = offset[i] + pad_size
+                    in_begin = offset[i]

-                if in_end > lod[i + 1]:
+                if in_end > offset[i + 1]:
                    pad_size = np.min(
-                        [in_end - lod[i + 1], lod[i + 1] - lod[i]])
+                        [in_end - offset[i + 1], offset[i + 1] - offset[i]])
                    if self.padding_trainable:
                        sub_w = pading_data[begin_pad + self.context_start + j -
                                            pad_size:begin_pad +
                                            self.context_start + j, :]
-                        out[lod[i + 1] - pad_size:lod[i + 1], j * self.
+                        out[offset[i + 1] - pad_size:offset[i + 1], j * self.
                            input_size[1]:(j + 1) * self.input_size[1]] = sub_w
-                    in_end = lod[i + 1]
-                    out_end = lod[i + 1] - pad_size
+                    in_end = offset[i + 1]
+                    out_end = offset[i + 1] - pad_size
                if in_end <= in_begin:
                    continue

@@ -175,7 +178,11 @@ class TestSeqProject(OpTest):
        self.context_stride = 1

        self.input_size = [self.input_row, 23]
-        self.lod = [[0, 4, 5, 8, self.input_row]]
+        offset_lod = [[0, 4, 5, 8, self.input_row]]
+        self.lod = [[]]
+        # convert from offset-based lod to length-based lod
+        for i in range(len(offset_lod[0]) - 1):
+            self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
        self.output_represention = 8  # output feature size


@@ -188,7 +195,11 @@ class TestSeqProjectCase1(TestSeqProject):
        self.context_stride = 1

        self.input_size = [self.input_row, 23]
-        self.lod = [[0, 4, 5, 8, self.input_row]]
+        offset_lod = [[0, 4, 5, 8, self.input_row]]
+        self.lod = [[]]
+        # convert from offset-based lod to length-based lod
+        for i in range(len(offset_lod[0]) - 1):
+            self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
        self.output_represention = 8  # output feature size


@@ -203,8 +214,12 @@ class TestSeqProjectCase2(TestSeqProject):
        self.input_size = [self.input_row, 23]
        idx = range(self.input_size[0])
        del idx[0]
-        self.lod = [[0] + np.sort(random.sample(idx, 8)).tolist() +
-                    [self.input_size[0]]]
+        offset_lod = [[0] + np.sort(random.sample(idx, 8)).tolist() +
+                      [self.input_size[0]]]
+        self.lod = [[]]
+        # convert from offset-based lod to length-based lod
+        for i in range(len(offset_lod[0]) - 1):
+            self.lod[0].append(offset_lod[0][i + 1] - offset_lod[0][i])
        self.output_represention = 8  # output feature size



--- a/python/paddle/fluid/tests/unittests/test_seq_pool.py
+++ b/python/paddle/fluid/tests/unittests/test_seq_pool.py
@@ -18,26 +18,34 @@ from op_test import OpTest


 class TestSeqAvgPool(OpTest):
+    def convert_to_offset(self, lod):
+        offset = [[0] for i in lod]
+        for i, level in enumerate(lod):
+            for seq_len in level:
+                offset[i].append(offset[i][-1] + seq_len)
+        return offset
+
    def set_data(self):
        self.op_type = 'sequence_pool'
        # one level, batch size is 4
        x = np.random.uniform(0.1, 1, [11, 23]).astype('float32')
-        lod = [[0, 4, 5, 8, 11]]
+        lod = [[4, 1, 3, 3]]
        self.inputs = {'X': (x, lod)}
+        offset = self.convert_to_offset(lod)

        out = np.zeros((4, 23)).astype('float32')
        self.outputs = {'Out': out}
-        return x, lod, out
+        return x, offset, out

-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "AVERAGE"}
-        for i in range(4):
-            sub_x = x[lod[0][i]:lod[0][i + 1], :]
+        for i in range(len(offset[0]) - 1):
+            sub_x = x[offset[0][i]:offset[0][i + 1], :]
            out[i] = sub_x.mean(axis=0)

    def setUp(self):
-        x, lod, out = self.set_data()
-        self.compute(x, lod, out)
+        x, offset, out = self.set_data()
+        self.compute(x, offset, out)

    def test_check_output(self):
        self.check_output()
@@ -50,10 +58,10 @@ class TestSeqAvgPool(OpTest):


 class TestSeqSumPool(TestSeqAvgPool):
-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "SUM"}
-        for i in range(4):
-            sub_x = x[lod[0][i]:lod[0][i + 1], :]
+        for i in range(len(offset[0]) - 1):
+            sub_x = x[offset[0][i]:offset[0][i + 1], :]
            out[i] = sub_x.sum(axis=0)


@@ -61,46 +69,47 @@ class TestSeqMaxPool(TestSeqAvgPool):
    def set_data(self):
        self.op_type = 'sequence_pool'
        x = np.random.uniform(0.1, 1, [13, 23]).astype('float32')
-        lod = [[0, 4, 5, 8, 13]]
-        for i in range(4):
-            l = lod[0][i + 1] - lod[0][i]
-            x[lod[0][i] + np.random.randint(l), :] += 2.0
+        lod = [[4, 1, 3, 5]]
+        offset = self.convert_to_offset(lod)
+        for i in range(len(offset[0]) - 1):
+            l = offset[0][i + 1] - offset[0][i]
+            x[offset[0][i] + np.random.randint(l), :] += 2.0

        self.inputs = {'X': (x, lod)}

        out = np.zeros((4, 23)).astype('float32')
        self.outputs = {'Out': out}
-        return x, lod, out
+        return x, offset, out

-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "MAX"}
-        for i in range(4):
-            sub_x = x[lod[0][i]:lod[0][i + 1], :]
+        for i in range(len(offset[0]) - 1):
+            sub_x = x[offset[0][i]:offset[0][i + 1], :]
            out[i] = np.amax(sub_x, axis=0)


 class TestSeqSqrtPool(TestSeqAvgPool):
-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "SQRT"}
-        for i in range(4):
-            sub_x = x[lod[0][i]:lod[0][i + 1], :]
-            len = lod[0][i + 1] - lod[0][i]
-            out[i] = sub_x.sum(axis=0) / np.sqrt(len)
+        for i in range(len(offset[0]) - 1):
+            sub_x = x[offset[0][i]:offset[0][i + 1], :]
+            seq_len = offset[0][i + 1] - offset[0][i]
+            out[i] = sub_x.sum(axis=0) / np.sqrt(seq_len)


 class TestSeqLastPool(TestSeqAvgPool):
-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "LAST"}
-        for i in range(4):
-            sub_x = x[lod[0][i]:lod[0][i + 1], :]
+        for i in range(len(offset[0]) - 1):
+            sub_x = x[offset[0][i]:offset[0][i + 1], :]
            out[i] = sub_x[-1, :]


 class TestSeqFirstPool(TestSeqAvgPool):
-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "FIRST"}
-        for i in range(4):
-            sub_x = x[lod[0][i]:lod[0][i + 1], :]
+        for i in range(len(offset[0]) - 1):
+            sub_x = x[offset[0][i]:offset[0][i + 1], :]
            out[i] = sub_x[0, :]


@@ -109,35 +118,39 @@ class TestSeqAvgPool2D(TestSeqAvgPool):
        self.op_type = 'sequence_pool'
        # one level, batch size is 4
        x = np.random.uniform(0.1, 1, [13, 3, 17]).astype('float32')
-        lod = [[0, 4, 5, 8, 13]]
+        lod = [[4, 1, 3, 5]]
        self.inputs = {'X': (x, lod)}
+        offset = self.convert_to_offset(lod)

        out = np.zeros((4, 3, 17)).astype('float32')
        self.outputs = {'Out': out}
-        return x, lod, out
+        return x, offset, out

-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "AVERAGE"}
-        for i in range(4):
-            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
+        for i in range(len(offset[0]) - 1):
+            sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
+                               (-1, 3 * 17))
            out[i] = np.reshape(sub_x.mean(axis=0), (3, 17))


 class TestSeqSumPool2D(TestSeqAvgPool2D):
-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "SUM"}
-        for i in range(4):
-            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
+        for i in range(len(offset[0]) - 1):
+            sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
+                               (-1, 3 * 17))
            out[i] = np.reshape(sub_x.sum(axis=0), (3, 17))


 class TestSeqSqrtPool2D(TestSeqAvgPool2D):
-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "SQRT"}
-        for i in range(4):
-            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
-            len = lod[0][i + 1] - lod[0][i]
-            out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(len), (3, 17))
+        for i in range(len(offset[0]) - 1):
+            sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
+                               (-1, 3 * 17))
+            seq_len = offset[0][i + 1] - offset[0][i]
+            out[i] = np.reshape(sub_x.sum(axis=0) / np.sqrt(seq_len), (3, 17))

    def test_check_grad(self):
        # Remove MaxIndex after check_grad is refined.
@@ -150,36 +163,40 @@ class TestSeqMaxPool2D(TestSeqAvgPool2D):
    def set_data(self):
        self.op_type = 'sequence_pool'
        x = np.random.uniform(0.1, 1, [13, 3, 11]).astype('float32')
-        lod = [[0, 4, 5, 8, 13]]
+        lod = [[4, 1, 3, 5]]
        self.inputs = {'X': (x, lod)}
-        for i in range(4):
-            l = lod[0][i + 1] - lod[0][i]
-            x[lod[0][i] + np.random.randint(l), :] += 1.0
+        offset = self.convert_to_offset(lod)
+        for i in range(len(offset[0]) - 1):
+            l = offset[0][i + 1] - offset[0][i]
+            x[offset[0][i] + np.random.randint(l), :] += 1.0

        out = np.zeros((4, 3, 11)).astype('float32')
        self.outputs = {'Out': out}
-        return x, lod, out
+        return x, offset, out

-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "MAX"}
-        for i in range(4):
-            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 11))
+        for i in range(len(offset[0]) - 1):
+            sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
+                               (-1, 3 * 11))
            out[i] = np.reshape(np.amax(sub_x, axis=0), (3, 11))


 class TestSeqLastPool2D(TestSeqAvgPool2D):
-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "LAST"}
-        for i in range(4):
-            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
+        for i in range(len(offset[0]) - 1):
+            sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
+                               (-1, 3 * 17))
            out[i] = np.reshape(sub_x[-1, :], (3, 17))


 class TestSeqFirstPool2D(TestSeqAvgPool2D):
-    def compute(self, x, lod, out):
+    def compute(self, x, offset, out):
        self.attrs = {'pooltype': "FIRST"}
-        for i in range(4):
-            sub_x = np.reshape(x[lod[0][i]:lod[0][i + 1], :], (-1, 3 * 17))
+        for i in range(len(offset[0]) - 1):
+            sub_x = np.reshape(x[offset[0][i]:offset[0][i + 1], :],
+                               (-1, 3 * 17))
            out[i] = np.reshape(sub_x[0, :], (3, 17))



--- a/python/paddle/fluid/tests/unittests/test_sequence_erase_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sequence_erase_op.py
@@ -18,15 +18,17 @@ from op_test import OpTest


 def sequence_erase(in_seq, lod0, tokens):
-    new_lod0 = [0]
+    new_lod0 = []
    out_seq = []
-    for i in range(0, len(lod0) - 1):
+    offset = 0
+    for i in range(0, len(lod0)):
        num_out = 0
-        for dat in in_seq[lod0[i]:lod0[i + 1]]:
+        for dat in in_seq[offset:(offset + lod0[i])]:
            if dat not in tokens:
                out_seq.append(dat)
                num_out += 1
-        new_lod0.append(new_lod0[-1] + num_out)
+        offset += lod0[i]
+        new_lod0.append(num_out)
    return np.array(out_seq).astype("int32"), new_lod0


@@ -34,7 +36,7 @@ class TestSequenceEraseOpInt32(OpTest):
    def setUp(self):
        self.op_type = "sequence_erase"
        in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")
-        lod = [[0, 9, 13, 24, 30]]
+        lod = [[9, 4, 11, 6]]
        tokens = [2, 3, 5]
        out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens)
        self.attrs = {'tokens': tokens}
@@ -49,7 +51,7 @@ class TestSequenceEraseOpInt64(OpTest):
    def setUp(self):
        self.op_type = "sequence_erase"
        in_seq = np.random.randint(0, 10, (30, 1)).astype("int64")
-        lod = [[0, 9, 13, 24, 30]]
+        lod = [[9, 4, 11, 6]]
        tokens = [2, 3, 5]
        out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens)
        self.attrs = {'tokens': tokens}
@@ -64,7 +66,7 @@ class TestSequenceEraseOpEmpty(OpTest):
    def setUp(self):
        self.op_type = "sequence_erase"
        in_seq = np.random.randint(0, 10, (30, 1)).astype("int32")
-        lod = [[0, 9, 13, 24, 30]]
+        lod = [[9, 4, 11, 6]]
        tokens = []
        out_seq, new_lod0 = sequence_erase(in_seq, lod[0], tokens)
        self.attrs = {'tokens': tokens}

--- a/python/paddle/fluid/tests/unittests/test_sequence_expand.py
+++ b/python/paddle/fluid/tests/unittests/test_sequence_expand.py
@@ -21,7 +21,7 @@ class TestSequenceExpand(OpTest):
    def set_data(self):
        x_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
        y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
-        y_lod = [[0, 1, 4, 8]]
+        y_lod = [[1, 3, 4]]
        self.inputs = {'X': x_data, 'Y': (y_data, y_lod)}

    def compute(self):
@@ -37,23 +37,27 @@ class TestSequenceExpand(OpTest):
        out = np.zeros(shape=((0, ) + x_data.shape[1:]), dtype=x_data.dtype)

        if x_lod is None:
-            x_idx = [i for i in xrange(x_data.shape[0] + 1)]
+            # x_idx = [i for i in xrange(x_data.shape[0] + 1)]
+            x_idx = [1] * x_data.shape[0]
        else:
            x_idx = x_lod[0]
-            out_lod = [[0]]
+            out_lod = [[]]
+
+        offset = 0
+        for i in xrange(len(y_lod[ref_level])):
+            repeat_num = y_lod[ref_level][i]
+            x_len = x_idx[i]

-        for i in xrange(1, len(y_lod[ref_level])):
-            repeat_num = y_lod[ref_level][i] - y_lod[ref_level][i - 1]
-            x_len = x_idx[i] - x_idx[i - 1]
            if repeat_num > 0:
-                x_sub = x_data[x_idx[i - 1]:x_idx[i], :]
+                x_sub = x_data[offset:(offset + x_len), :]
                stacked_x_sub = x_sub
                for r in range(repeat_num - 1):
                    stacked_x_sub = np.vstack((stacked_x_sub, x_sub))
                out = np.vstack((out, stacked_x_sub))
                if x_lod is not None:
                    for j in xrange(repeat_num):
-                        out_lod[0].append(out_lod[0][-1] + x_len)
+                        out_lod[0].append(x_len)
+            offset += x_len

        if x_lod is None:
            self.outputs = {'Out': out}
@@ -75,9 +79,9 @@ class TestSequenceExpand(OpTest):
 class TestSequenceExpandCase1(TestSequenceExpand):
    def set_data(self):
        x_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32')
-        x_lod = [[0, 2, 5]]
+        x_lod = [[2, 3]]
        y_data = np.random.uniform(0.1, 1, [13, 1]).astype('float32')
-        y_lod = [[0, 2, 5], [0, 2, 4, 7, 10, 13]]
+        y_lod = [[2, 3], [2, 2, 3, 3, 3]]
        self.inputs = {'X': x_data, 'Y': (y_data, y_lod)}
        self.attrs = {'ref_level': 0}

@@ -85,9 +89,9 @@ class TestSequenceExpandCase1(TestSequenceExpand):
 class TestSequenceExpandCase2(TestSequenceExpand):
    def set_data(self):
        x_data = np.random.uniform(0.1, 1, [1, 2, 2]).astype('float32')
-        x_lod = [[0, 1]]
+        x_lod = [[1]]
        y_data = np.random.uniform(0.1, 1, [2, 2, 2]).astype('float32')
-        y_lod = [[0, 2], [0, 2]]
+        y_lod = [[2], [1, 1]]
        self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}
        self.attrs = {'ref_level': 0}

@@ -95,9 +99,9 @@ class TestSequenceExpandCase2(TestSequenceExpand):
 class TestSequenceExpandCase3(TestSequenceExpand):
    def set_data(self):
        x_data = np.random.uniform(0.1, 1, [4, 1]).astype('float32')
-        x_lod = [[0, 1, 2, 3, 4]]
-        y_data = np.random.uniform(0.1, 1, [6, 1]).astype('float32')
-        y_lod = [[0, 2, 4, 4, 6]]
+        x_lod = [[1, 1, 1, 1]]
+        y_data = np.random.uniform(0.1, 1, [8, 1]).astype('float32')
+        y_lod = [[2, 2, 2, 2]]
        self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}


@@ -105,9 +109,9 @@ class TestSequenceExpandCase4(TestSequenceExpand):
    def set_data(self):
        data = np.random.uniform(0.1, 1, [5 * 2, 1])
        x_data = np.array(data).reshape([5, 2]).astype('float32')
-        x_lod = [[0, 2, 5]]
-        y_data = np.random.uniform(0.1, 1, [3, 1]).astype('float32')
-        y_lod = [[0, 1, 3], [0, 1, 3]]
+        x_lod = [[2, 3]]
+        y_data = np.random.uniform(0.1, 1, [5, 1]).astype('float32')
+        y_lod = [[2], [2, 3]]
        self.inputs = {'X': (x_data, x_lod), 'Y': (y_data, y_lod)}



--- a/python/paddle/fluid/tests/unittests/test_sequence_reshape.py
+++ b/python/paddle/fluid/tests/unittests/test_sequence_reshape.py
@@ -22,7 +22,7 @@ class TestSequenceReshape(OpTest):
    def setUp(self):
        self.op_type = 'sequence_reshape'
        dimension = 12
-        x_lod = [[0, 4, 5, 8, 11]]
+        x_lod = [[4, 1, 3, 3]]
        x = np.random.uniform(0.1, 1, [11, 24]).astype('float32')

        self.inputs = {'X': (x, x_lod)}
@@ -34,13 +34,13 @@ class TestSequenceReshape(OpTest):

    def compute_output(self, x, x_lod, dimension):
        x_width = x.shape[1]
-        out_lod = [[0]]
-        for i in xrange(len(x_lod[0]) - 1):
-            seq_len = x_lod[0][i + 1] - x_lod[0][i]
+        out_lod = [[]]
+        for i in xrange(len(x_lod[0])):
+            seq_len = x_lod[0][i]
            offset = (seq_len * x_width) / dimension
            assert int(offset) * dimension == seq_len * x_width
-            out_lod[0].append(out_lod[0][-1] + int(offset))
-        out = np.zeros(shape=(out_lod[0][-1], dimension)).astype('float32')
+            out_lod[0].append(int(offset))
+        out = np.zeros(shape=(sum(out_lod[0]), dimension)).astype('float32')
        out.ravel()[:] = x.ravel()[:]
        return out, out_lod

@@ -55,7 +55,7 @@ class TestSequenceReshape_reduce(TestSequenceReshape):
    def setUp(self):
        self.op_type = 'sequence_reshape'
        dimension = 24
-        x_lod = [[0, 4, 6, 8, 12]]
+        x_lod = [[4, 2, 2, 4]]
        x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')

        self.inputs = {'X': (x, x_lod)}
@@ -70,7 +70,7 @@ class TestSequenceReshape_same(TestSequenceReshape):
    def setUp(self):
        self.op_type = 'sequence_reshape'
        dimension = 12
-        x_lod = [[0, 4, 6, 8, 12]]
+        x_lod = [[4, 2, 2, 4]]
        x = np.random.uniform(0.1, 1, [12, 12]).astype('float32')

        self.inputs = {'X': (x, x_lod)}

--- a/python/paddle/fluid/tests/unittests/test_sequence_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sequence_slice_op.py
@@ -29,20 +29,20 @@ class TestSequenceSliceOp(OpTest):

        self.inputs = {'X': (x, lod), 'Offset': offset, 'Length': length}
        outs = []  #np.zeros((100, 3, 2)).astype('float32')
-        out_lod = [[0]]
-        out_lod_offset = 0
+        out_lod = [[]]
+        lod_offset = 0
        for i in range(len(offset)):
-            sub_x = x[lod[0][i] + offset[i, 0]:lod[0][i] + offset[i, 0] +
+            sub_x = x[lod_offset + offset[i, 0]:lod_offset + offset[i, 0] +
                      length[i, 0], :]
-            out_lod_offset = out_lod_offset + len(sub_x)
            outs.append(sub_x)
-            out_lod[0].append(out_lod_offset)
+            out_lod[0].append(len(sub_x))
+            lod_offset += lod[0][i]
        outs = np.concatenate(outs, axis=0)
        self.outputs = {'Out': (outs, out_lod)}

    def init_test_case(self):
        self.x_dim = (100, 3, 2)
-        self.x_lod = [[0, 20, 40, 60, 80, 100]]
+        self.x_lod = [[20, 20, 20, 20, 20]]
        self.offset = [[1], [2], [3], [4], [5]]
        self.length = [[10], [8], [6], [4], [2]]


--- a/python/paddle/fluid/tests/unittests/test_sequence_softmax_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sequence_softmax_op.py
@@ -26,15 +26,16 @@ class TestSequenceSoftmaxOp(OpTest):
        self.init_op_type()

        x = np.random.uniform(0.1, 1, (11, 1)).astype("float32")
-        lod = [[0, 4, 5, 8, 11]]
+        lod = [[4, 1, 3, 3]]

        out = np.zeros((11, 1)).astype("float32")
-        for i in range(4):
-            sub_x = x[lod[0][i]:lod[0][i + 1], :]
-            sub_x = sub_x.reshape(1, lod[0][i + 1] - lod[0][i])
+        offset = 0
+        for i in range(len(lod[0])):
+            sub_x = x[offset:offset + lod[0][i], :]
+            sub_x = sub_x.reshape(1, lod[0][i])
            sub_out = stable_softmax(sub_x)
-            out[lod[0][i]:lod[0][i + 1], :] = sub_out.reshape(
-                lod[0][i + 1] - lod[0][i], 1)
+            out[offset:offset + lod[0][i], :] = sub_out.reshape(lod[0][i], 1)
+            offset += lod[0][i]

        self.inputs = {"X": (x, lod)}
        self.outputs = {"Out": out}

--- a/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
+++ b/python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
@@ -54,12 +54,12 @@ class TestShrinkRNNMemoryReferLoD(TestShrinkRNNMemoryBase):
    def test_refer_lod(self):
        cpu = core.CPUPlace()
        x_tensor = core.LoDTensor()
-        x_tensor.set_lod([[0, 2, 5, 6]])
+        x_tensor.set_recursive_sequence_lengths([[2, 3, 1]])
        tensor_np = np.random.random(size=(6, 100)).astype('float32')
        x_tensor.set(tensor_np, cpu)

        rank_table_tensor = core.LoDTensor()
-        rank_table_tensor.set_lod([[0, 1, 3, 6]])
+        rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]])
        rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'),
                              cpu)

@@ -83,7 +83,7 @@ class TestShrinkRNNMemoryNoLoD(TestShrinkRNNMemoryBase):
        x_tensor.set(tensor_np, cpu)

        rank_table_tensor = core.LoDTensor()
-        rank_table_tensor.set_lod([[0, 1, 3, 6]])
+        rank_table_tensor.set_recursive_sequence_lengths([[1, 2, 3]])
        rank_table_tensor.set(np.random.random(size=(6, 1)).astype('float32'),
                              cpu)


--- a/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
+++ b/python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
@@ -56,7 +56,7 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
    def test_split_and_merge_lod_tensor_level_0(self):
        tensor = core.LoDTensor()
        tensor.set(np.arange(10).reshape(10, 1).astype('int32'), self.place())
-        tensor.set_lod([[0, 3, 9, 10]])
+        tensor.set_recursive_sequence_lengths([[3, 6, 1]])

        mask_np = np.array([0, 1, 0]).astype('bool')
        mask_np = np.expand_dims(mask_np, axis=1)
@@ -68,15 +68,15 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
        expect_true_tensor = np.expand_dims(expect_true_tensor, axis=1)
        expect_true = core.LoDTensor()
        expect_true.set(expect_true_tensor, self.place())
-        expect_true.set_lod([[0, 6]])
+        expect_true.set_recursive_sequence_lengths([[6]])

        expect_false_tensor = np.array([0, 1, 2, 9]).astype('int32')
        expect_false_tensor = np.expand_dims(expect_false_tensor, axis=1)
-        expect_false_lod = [[0, 3, 4]]
+        expect_false_lod = [[3, 1]]

        expect_false = core.LoDTensor()
        expect_false.set(expect_false_tensor, self.place())
-        expect_false.set_lod(expect_false_lod)
+        expect_false.set_recursive_sequence_lengths(expect_false_lod)

        self.main(
            tensor=tensor,
@@ -126,7 +126,8 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):

    def check_tensor_same(self, actual, expect):
        self.assertTrue(np.allclose(np.array(actual), np.array(expect)))
-        self.assertEqual(actual.lod(), expect.lod())
+        self.assertEqual(actual.recursive_sequence_lengths(),
+                         expect.recursive_sequence_lengths())


 class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
@@ -151,7 +152,7 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):

        tensor = core.LoDTensor()
        tensor.set(np.arange(10).reshape(10, 1).astype('float32'), place)
-        tensor.set_lod([[0, 3, 9, 10]])
+        tensor.set_recursive_sequence_lengths([[3, 6, 1]])

        mask_np = np.array([0, 1, 0]).astype('bool')
        mask_np = np.expand_dims(mask_np, axis=1)

--- a/python/paddle/fluid/tests/unittests/test_target_assign_op.py
+++ b/python/paddle/fluid/tests/unittests/test_target_assign_op.py
@@ -22,22 +22,23 @@ def gen_match_and_neg_indices(num_prior, gt_lod, neg_lod):
    if len(gt_lod) != len(neg_lod):
        raise AssertionError("The input arguments are illegal.")

-    batch_size = len(gt_lod) - 1
+    batch_size = len(gt_lod)

    match_indices = -1 * np.ones((batch_size, num_prior)).astype('int32')
-    neg_indices = np.zeros((neg_lod[-1], 1)).astype('int32')
+    neg_indices = np.zeros((sum(neg_lod), 1)).astype('int32')

+    offset = 0
    for n in range(batch_size):
-        gt_num = gt_lod[n + 1] - gt_lod[n]
+        gt_num = gt_lod[n]
        ids = random.sample([i for i in range(num_prior)], gt_num)
        match_indices[n, ids] = [i for i in range(gt_num)]

        ret_ids = set([i for i in range(num_prior)]) - set(ids)
-        s = neg_lod[n]
-        e = neg_lod[n + 1]
-        l = e - s
+        l = neg_lod[n]
        neg_ids = random.sample(ret_ids, l)
-        neg_indices[s:e, :] = np.array(neg_ids).astype('int32').reshape(l, 1)
+        neg_indices[offset:offset + neg_lod[n], :] = np.array(neg_ids).astype(
+            'int32').reshape(l, 1)
+        offset += neg_lod[n]

    return match_indices, neg_indices

@@ -56,24 +57,28 @@ def target_assign(encoded_box, gt_label, match_indices, neg_indices, gt_lod,
    # init weight for target label
    trg_label_wt = np.zeros((batch_size, num_prior, 1)).astype('float32')

+    gt_offset = 0
+    neg_offset = 0
    for i in range(batch_size):
        cur_indices = match_indices[i]
        col_ids = np.where(cur_indices > -1)
        col_val = cur_indices[col_ids]

-        gt_start = gt_lod[i]
        # target bbox
-        for v, c in zip(col_val + gt_start, col_ids[0].tolist()):
+        for v, c in zip(col_val + gt_offset, col_ids[0].tolist()):
            trg_box[i][c][:] = encoded_box[v][c][:]
        # weight for target bbox
        trg_box_wt[i][col_ids] = 1.0

-        trg_label[i][col_ids] = gt_label[col_val + gt_start]
+        trg_label[i][col_ids] = gt_label[col_val + gt_offset]
        trg_label_wt[i][col_ids] = 1.0
        # set target label weight to 1.0 for the negative samples
        if neg_indices is not None:
-            neg_ids = neg_indices[neg_lod[i]:neg_lod[i + 1]]
+            neg_ids = neg_indices[neg_offset:neg_offset + neg_lod[i]]
            trg_label_wt[i][neg_ids] = 1.0
+        # update offset
+        gt_offset += gt_lod[i]
+        neg_offset += neg_lod[i]

    return trg_box, trg_box_wt, trg_label, trg_label_wt

@@ -83,11 +88,11 @@ class TestTargetAssginFloatType(OpTest):
        self.op_type = "target_assign"
        num_prior = 120
        num_class = 21
-        gt_lod = [0, 5, 11, 23]
-        neg_lod = [0, 4, 7, 13]
+        gt_lod = [5, 6, 12]
+        neg_lod = [4, 3, 6]
        mismatch_value = 0
-        batch_size = len(gt_lod) - 1
-        num_gt = gt_lod[-1]
+        batch_size = len(gt_lod)
+        num_gt = sum(gt_lod)

        encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32')
        gt_label = np.random.randint(
@@ -121,11 +126,11 @@ class TestTargetAssginIntType(OpTest):
        self.op_type = "target_assign"
        num_prior = 120
        num_class = 21
-        gt_lod = [0, 5, 11, 23]
-        neg_lod = [0, 4, 7, 13]
+        gt_lod = [5, 6, 12]
+        neg_lod = [4, 3, 6]
        mismatch_value = 0
-        batch_size = len(gt_lod) - 1
-        num_gt = gt_lod[-1]
+        batch_size = len(gt_lod)
+        num_gt = sum(gt_lod)

        encoded_box = np.random.random((num_gt, num_prior, 4)).astype('float32')
        gt_label = np.random.randint(

--- a/python/paddle/fluid/tests/unittests/test_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor.py
@@ -69,15 +69,14 @@ class TestTensor(unittest.TestCase):
        array[0, 0, 0] = 3
        array[3, 3, 5] = 10
        lod_tensor.set(array, place)
-        lod_tensor.set_lod([[0, 2, 4]])
+        lod_tensor.set_recursive_sequence_lengths([[2, 2]])

        lod_v = numpy.array(lod_tensor)
        self.assertTrue(numpy.alltrue(array == lod_v))

-        lod = lod_tensor.lod()
-        self.assertEqual(0, lod[0][0])
+        lod = lod_tensor.recursive_sequence_lengths()
+        self.assertEqual(2, lod[0][0])
        self.assertEqual(2, lod[0][1])
-        self.assertEqual(4, lod[0][2])

    def test_float_lod_tensor(self):
        place = core.CPUPlace()
@@ -97,21 +96,21 @@ class TestTensor(unittest.TestCase):
        lod_v = numpy.array(lod_tensor)
        self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0])
        self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1])
-        self.assertEqual(len(lod_tensor.lod()), 0)
+        self.assertEqual(len(lod_tensor.recursive_sequence_lengths()), 0)

-        lod_py = [[0, 2, 5], [0, 2, 4, 5]]
-        lod_tensor.set_lod(lod_py)
-        lod = lod_tensor.lod()
+        lod_py = [[2, 1], [1, 2, 2]]
+        lod_tensor.set_recursive_sequence_lengths(lod_py)
+        lod = lod_tensor.recursive_sequence_lengths()
        self.assertListEqual(lod_py, lod)

    def test_lod_tensor_init(self):
        scope = core.Scope()
        place = core.CPUPlace()
-        lod_py = [[0, 2, 5], [0, 2, 4, 5]]
+        lod_py = [[2, 1], [1, 2, 2]]
        lod_tensor = core.LoDTensor()

        lod_tensor.set_dims([5, 2, 3, 4])
-        lod_tensor.set_lod(lod_py)
+        lod_tensor.set_recursive_sequence_lengths(lod_py)
        lod_tensor.alloc_float(place)
        tensor_array = numpy.array(lod_tensor)
        tensor_array[0, 0, 0, 0] = 1.0
@@ -121,17 +120,17 @@ class TestTensor(unittest.TestCase):
        lod_v = numpy.array(lod_tensor)
        self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0])
        self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1])
-        self.assertListEqual(lod_py, lod_tensor.lod())
+        self.assertListEqual(lod_py, lod_tensor.recursive_sequence_lengths())

    def test_lod_tensor_gpu_init(self):
        if not core.is_compiled_with_cuda():
            return
        place = core.CUDAPlace(0)
-        lod_py = [[0, 2, 5], [0, 2, 4, 5]]
+        lod_py = [[2, 1], [1, 2, 2]]
        lod_tensor = core.LoDTensor()

        lod_tensor.set_dims([5, 2, 3, 4])
-        lod_tensor.set_lod(lod_py)
+        lod_tensor.set_recursive_sequence_lengths(lod_py)
        lod_tensor.alloc_float(place)
        tensor_array = numpy.array(lod_tensor)
        tensor_array[0, 0, 0, 0] = 1.0
@@ -141,7 +140,7 @@ class TestTensor(unittest.TestCase):
        lod_v = numpy.array(lod_tensor)
        self.assertAlmostEqual(1.0, lod_v[0, 0, 0, 0])
        self.assertAlmostEqual(2.0, lod_v[0, 0, 0, 1])
-        self.assertListEqual(lod_py, lod_tensor.lod())
+        self.assertListEqual(lod_py, lod_tensor.recursive_sequence_lengths())

    def test_empty_tensor(self):
        place = core.CPUPlace()

--- a/python/paddle/fluid/tests/unittests/test_warpctc_op.py
+++ b/python/paddle/fluid/tests/unittests/test_warpctc_op.py
@@ -34,8 +34,8 @@ class CTCForward(object):

        self.level = 0
        self.num_classes = softmax.shape[1]
-        self.batch_size = len(softmax_lod[self.level]) - 1
-        assert self.batch_size == len(labels_lod[self.level]) - 1
+        self.batch_size = len(softmax_lod[self.level])
+        assert self.batch_size == len(labels_lod[self.level])

        self.loss = np.zeros([self.batch_size, 1], dtype="float32")
        self.gradient = np.zeros(self.softmax.shape, dtype="float32")
@@ -156,16 +156,20 @@ class CTCForward(object):
        return -log_prob

    def forward(self):
+        softmax_offset = 0
+        labels_offset = 0
        for i in range(self.batch_size):
-            softmax_start_i = self.softmax_lod[self.level][i]
-            softmax_end_i = self.softmax_lod[self.level][i + 1]
-            labels_start_i = self.labels_lod[self.level][i]
-            labels_end_i = self.labels_lod[self.level][i + 1]
+            softmax_start_i = softmax_offset
+            softmax_end_i = softmax_offset + self.softmax_lod[self.level][i]
+            labels_start_i = labels_offset
+            labels_end_i = labels_offset + self.labels_lod[self.level][i]

            softmax_a_sequence = self.softmax[softmax_start_i:softmax_end_i, :]
            labels_a_sequence = self.labels[labels_start_i:labels_end_i, :]
            self.loss[i] = self.forward_a_sequence(softmax_a_sequence,
                                                   labels_a_sequence)
+            softmax_offset += self.softmax_lod[self.level][i]
+            labels_offset += self.labels_lod[self.level][i]
        return self.loss


@@ -173,8 +177,8 @@ class TestWarpCTCOp(OpTest):
    def config(self):
        self.batch_size = 4
        self.num_classes = 8
-        self.logits_lod = [[0, 4, 5, 8, 11]]
-        self.labels_lod = [[0, 3, 4, 8, 12]]
+        self.logits_lod = [[4, 1, 3, 3]]
+        self.labels_lod = [[3, 1, 4, 4]]
        self.blank = self.num_classes - 1
        self.norm_by_times = False

@@ -184,11 +188,13 @@ class TestWarpCTCOp(OpTest):

        logits = np.random.uniform(
            0.1, 1.0,
-            [self.logits_lod[0][-1], self.num_classes]).astype("float32")
+            [sum(self.logits_lod[0]), self.num_classes]).astype("float32")
        softmax = np.apply_along_axis(stable_softmax, 1, logits)
        # labels should not be blank
        labels = np.random.randint(
-            0, self.num_classes - 1, [self.labels_lod[0][-1], 1], dtype="int32")
+            0,
+            self.num_classes - 1, [sum(self.labels_lod[0]), 1],
+            dtype="int32")

        ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod,
                         self.blank, self.norm_by_times)
@@ -196,9 +202,8 @@ class TestWarpCTCOp(OpTest):

        max_sequence_length = 0
        for i in range(self.batch_size):
-            max_sequence_length = max(
-                max_sequence_length,
-                self.logits_lod[0][i + 1] - self.logits_lod[0][i])
+            max_sequence_length = max(max_sequence_length,
+                                      self.logits_lod[0][i])
        self.gradient = np.zeros(
            [max_sequence_length, self.batch_size, self.num_classes],
            dtype="float32")
@@ -222,8 +227,8 @@ class TestWarpCTCOpCase1(TestWarpCTCOp):
    def config(self):
        self.batch_size = 4
        self.num_classes = CUDA_BLOCK_SIZE + 2
-        self.logits_lod = [[0, 4, 5, 8, 11]]
-        self.labels_lod = [[0, 3, 4, 8, 12]]
+        self.logits_lod = [[4, 1, 3, 3]]
+        self.labels_lod = [[3, 1, 4, 4]]
        self.blank = 0
        self.norm_by_times = False


--- a/python/paddle/fluid/tests/unittests/test_weight_normalization.py
+++ b/python/paddle/fluid/tests/unittests/test_weight_normalization.py
@@ -76,11 +76,11 @@ class TestWeightNormalization(unittest.TestCase):
                lod_level_i = numpy.random.randint(
                    low=1,
                    high=5,
-                    size=self.batch_size if i == 0 else lod_level_i[-1])
-                lod_level_i = [0] + numpy.cumsum(lod_level_i).tolist()
+                    size=self.batch_size
+                    if i == 0 else sum(lod_level_i)).tolist()
                data_lod.append(lod_level_i)
            data_value = numpy.random.random(
-                size=[data_lod[-1][-1] if data_lod else self.batch_size
+                size=[sum(data_lod[-1]) if data_lod else self.batch_size
                      ] + data_shape).astype('float32')
            self.data[data_name] = (data_value, data_lod)

@@ -90,7 +90,7 @@ class TestWeightNormalization(unittest.TestCase):
            tensor = fluid.Tensor()
            tensor.set(self.data[desc[0]][0], place)
            if self.data[desc[0]][1]:
-                tensor.set_lod(self.data[desc[0]][1])
+                tensor.set_recursive_sequence_lengths(self.data[desc[0]][1])
            self.inputs[desc[0]] = tensor

    def weight_normalize(self):

--- a/python/paddle/fluid/tests/unittests/testsuite.py
+++ b/python/paddle/fluid/tests/unittests/testsuite.py
@@ -22,7 +22,7 @@ def as_lodtensor(np_array, lod, place):
    tensor = core.LoDTensor()
    tensor.set(np_value, place)
    if lod is not None:
-        tensor.set_lod(lod)
+        tensor.set_recursive_sequence_lengths(lod)
    return tensor


@@ -73,7 +73,7 @@ def set_input(scope, op, inputs, place):
        if isinstance(var, tuple) or isinstance(var, np.ndarray):
            tensor = scope.find_var(var_name).get_tensor()
            if isinstance(var, tuple):
-                tensor.set_lod(var[1])
+                tensor.set_recursive_sequence_lengths(var[1])
                var = var[0]
            tensor.set_dims(var.shape)
            tensor.set(var, place)

--- a/tools/codestyle/cpplint_pre_commit.hook
+++ b/tools/codestyle/cpplint_pre_commit.hook
@@ -7,7 +7,7 @@ for file in $(git diff --cached --name-status | awk '$1 != "D" {print $2}'); do
    if [[ $file =~ ^(paddle/api/.*|paddle/capi/.*|paddle/contrib/.*|paddle/cuda/.*|paddle/function/.*|paddle/gserver/.*|paddle/math/.*|paddle/optimizer/.*|paddle/parameter/.*|paddle/pserver/.*|paddle/trainer/.*|paddle/utils/.*) ]]; then
        continue;
    else
-        cpplint $file;
+        cpplint --filter=-readability/fn_size $file;
        TOTAL_ERRORS=$(expr $TOTAL_ERRORS + $?);
    fi
 done