Merge branch 'develop' into inference_lib_dist

8833cfcf · Luo Tao · 59e4dd57 · a07b7510 · 8833cfcf · 8833cfcf
24 changed file
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -47,3 +47,5 @@ sphinx_add_target(paddle_docs_cn
                  ${SPHINX_CACHE_DIR_CN}
                  ${CMAKE_CURRENT_SOURCE_DIR}
                  ${SPHINX_HTML_DIR_CN})
+add_subdirectory(api)
--- a/doc/api/CMakeLists.txt
+++ b/doc/api/CMakeLists.txt
+# configured documentation tools and intermediate build results
+set(BINARY_BUILD_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_build")
+# Sphinx cache with pickled ReST documents
+set(SPHINX_CACHE_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/_doctrees")
+# HTML output director
+set(SPHINX_HTML_DIR_EN "${CMAKE_CURRENT_BINARY_DIR}/en/html")
+configure_file(
+    "${CMAKE_CURRENT_SOURCE_DIR}/../templates/conf.py.en.in"
+    "${BINARY_BUILD_DIR_EN}/conf.py"
+    @ONLY)
+sphinx_add_target(paddle_api_docs
+                  html
+                  ${BINARY_BUILD_DIR_EN}
+                  ${SPHINX_CACHE_DIR_EN}
+                  ${CMAKE_CURRENT_SOURCE_DIR}
+                  ${SPHINX_HTML_DIR_EN})
--- a/doc/design/csp.md
+++ b/doc/design/csp.md
@@ -42,7 +42,7 @@ The type *channel* is conceptually the blocking queue.  In Go, its implemented i
 The `select` operation has been in OS kernels long before Go language.  All Unix kernels implement system calls *poll* and *select*.  They monitor multiple file descriptors to see if I/O is possible on any of them.  This takes O(N) time.  Since Linux 2.6, a new system call, *epoll*, can do the same in O(1) time.  In BSD systems, there is a similar system call *kqueue*.  Go's Linux implementation uses epoll.
-It might be a good idea to implement Fluid's select using epoll too.  In this design doc, we start from the O(N) way, so we could focus on Python binding and the syntax.
+It might be a good idea to implement Fluid's select using epoll too.  In this design doc, we start from the O(N) way so that we could focus on Python binding and the syntax.
 ### Type Channel
@@ -71,14 +71,14 @@ ch1 := make(chan int, 100)  // a channel that can buffer 100 ints.
 In Fluid, we should be able to do the same:
 ```python
-ch  = fluid.make_chan(dtype=INT)
+ch  = fluid.make_channel(dtype=INT)
-ch1 = fluid.make_chan(dtype=INT, 100)
+ch1 = fluid.make_channel(dtype=INT, 100)
 ```
 In addition to that, we want channels that can hold more complex element types, e.g., Tensors of float16:
 ```python
-ch = fluid.make_chan(dtype=Tensor, etype=float16)
+ch = fluid.make_channel(dtype=Tensor, etype=float16)
 ```
 or Tensors of Tensors of float16 etc.
@@ -87,8 +87,135 @@ The point here is that we need a consistent way to compose types, like in C++ we
 ### Send and Recv
+Go's CSP implementation depends on data type *channel*. There are two types of channels:
+1. The unblocked channel, or buffered channel, is a blocking queue with a non-zero sized buffer. The sending to buffered channel blocks if the buffer is full, and the receive operation blocks if the buffer is empty.
+1. blocked channel, or unbuffered channel, is a blocking queue with no buffer.  Both sending and receiving block with unbuffered channels.
+There are four types of actions with a channel:
+1. Create a channel
+   ```go
+   ch := make(chan int) // this is an unbuffered channel
+   ch := make(chan int, 100) // this is a buffered channel of 100 ints.
+   ```
+1. Send
+   ```go
+   ch <- 111
+   ```
+1. Recv
+   ```go
+   y, ok <- ch
+   ```
+1. Close
+   ```go
+   close(ch)
+   ```
+   Please be aware that a closed channel is not a nil channel, which is `var ch chan int`.
+There are some [axioms with channels](https://dave.cheney.net/2014/03/19/channel-axioms):
+1. A send to a nil channel blocks forever
+1. A receive from a nil channel blocks forever
+1. A send to a closed channel panics
+1. A receive from a closed channel returns the residual values and then zeros.
+In Fluid, we have [buffered channels](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/details/buffered_channel.h) and [unbuffered channels](https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/framework/details/unbuffered_channel.h)
+The following program illustrates the Python syntax for accessing Fluid buffers.
+```python
+import fluid
+buffer_size = 10
+ch = fluid.make_channel(dtype=INT, buffer_size)
+# Now write three elements to the channel
+with fluid.while(steps=buffer_size):
+  fluid.send(ch, step)
+  fluid.close_channel(ch)
+with fluid.while(steps=buffer_size):
+  fluid.print(fluid.recv(ch))
+```
+The following example shows that to avoid the always-blocking behavior of unbuffered channels, we need to use Fluid's goroutines.
+```python
+import fluid
+ch = fluid.make_channel(dtype=INT)
+with fluid.go():
+  fluid.send(ch)
+y = fluid.recv(ch)
+fluid.close_channel(ch)
+```
 ### Select
+In Go, the `select` statement lets a goroutine wait on multiple communication operations. A `select` blocks until one of its cases can run, then it executes that case. It chooses one at random if multiple are ready.
+```go
+ch1  := make(chan int)       
+ch2  := make(chan int, 100)
+x := 0
+for {
+    select {
+    case ch1 <- x:
+      x := x + 1
+    case y <- ch2:
+      fmt.Println("Received on channel")
+    default:
+      fmt.Println("Default")
+    }
+  }
+```
+In Fluid, we should be able to do the same:
+```python
+ch1  = fluid.make_chan(dtype=INT)
+ch2 = fluid.make_chan(dtype=INT, 100)
+sel = fluid.select()
+with sel.case(ch1, 'w', X):
+    fluid.layers.increment(X)
+with sel.case(ch2, 'r', Y):
+    fluid.print("Received on Channel")
+with sel.default():
+    fluid.print("Default")
+```
+In the above code snippet, `X` and `Y` are variables. Now let us look at each of these statements one by one.
+- `sel.case(ch1, 'w', X)` : This specifies that we are writing to `ch1` and we want to write the integer in variable `X` to the channel. The character `w` is used here to make the syntax familiar to write syntax in Python I/O.
+- `sel.case(ch2, 'r', Y)` : This specifies that we would like to read the result from `ch2` into variable `Y`. The character `r` is used here to make the syntax familiar to read syntax in Python I/O.
+- `sel.default()` : This is equivalent to the default in Go `select`. If none of the channels are ready for read or write, then the fluid code in the default block will be executed.
 ## Example Programs
 ### 1. RPC between Trainers and Parameter Servers

--- a/paddle/framework/backward.cc
+++ b/paddle/framework/backward.cc
@@ -534,7 +534,7 @@ ParamGradInfoMap AppendBackward(
  auto root_block = program_desc.MutableBlock(root_block_idx);
  std::string fill_one_op_out = GradVarName(target.Name());
-  bool is_scalar = target.Shape() == std::vector<int64_t>{1};
+  bool is_scalar = target.GetShape() == std::vector<int64_t>{1};
  PADDLE_ENFORCE(is_scalar, "target should be scalar");
  VLOG(3) << "backward from loss=" << target.Name()
          << " data_type=" << target.GetDataType();
@@ -565,7 +565,7 @@ ParamGradInfoMap AppendBackward(
  auto var = root_block->Var(fill_one_op_out);
  var->SetDataType(target.GetDataType());
-  var->SetShape(target.Shape());
+  var->SetShape(target.GetShape());
  auto& target_grad = retv[target.Name()];
  target_grad.name_ = fill_one_op_out;
  target_grad.block_idx_ = root_block_idx;

--- a/paddle/framework/details/buffered_channel.h
+++ b/paddle/framework/details/buffered_channel.h
@@ -71,7 +71,7 @@ bool Buffered<T>::Receive(T* item) {
  std::unique_lock<std::mutex> lock(mu_);
  empty_cond_var_.wait(lock, [this]() { return !channel_.empty() || closed_; });
  bool ret = false;
-  if (!closed_) {
+  if (!channel_.empty()) {
    *item = std::move(channel_.front());
    channel_.pop_front();
    full_cond_var_.notify_one();

--- a/paddle/framework/framework.proto
+++ b/paddle/framework/framework.proto
@@ -116,6 +116,8 @@ message LoDTensorArrayDesc {
  optional int32 lod_level = 2 [ default = 0 ];
 }
+message Reader { repeated LoDTensorDesc lod_tensor = 1; }
 message VarDesc {
  enum VarType {
    LOD_TENSOR = 1;
@@ -126,13 +128,15 @@ message VarDesc {
    LOD_RANK_TABLE = 6;
    LOD_TENSOR_ARRAY = 7;
    PLACE_LIST = 8;
+    READER = 9;
  }
  required string name = 1;
  required VarType type = 2;
-  optional LoDTensorDesc lod_tensor = 3;
+  optional bool persistable = 3 [ default = false ];
-  optional TensorDesc selected_rows = 4;
+  optional LoDTensorDesc lod_tensor = 4;
+  optional TensorDesc selected_rows = 5;
  optional LoDTensorArrayDesc tensor_array = 6;
-  optional bool persistable = 5 [ default = false ];
+  optional Reader reader = 7;
 }
 message BlockDesc {

--- a/paddle/framework/op_desc.cc
+++ b/paddle/framework/op_desc.cc
@@ -458,11 +458,11 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
  auto var = block_.FindVarRecursive(name);
  PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name);
  try {
-    auto shape = var->Shape();
+    auto shape = var->GetShape();
    if (shape.empty()) {
      return framework::make_ddim({0UL});
    } else {
-      return framework::make_ddim(var->Shape());
+      return framework::make_ddim(var->GetShape());
    }
  } catch (...) {
    VLOG(5) << "GetDim of variable " << name << " error";

--- a/paddle/framework/program_desc_test.cc
+++ b/paddle/framework/program_desc_test.cc
@@ -53,7 +53,7 @@ TEST(ProgramDesc, copy_ctor) {
    ASSERT_NE(copy, var_before);
    ASSERT_EQ(copy->Name(), var_before->Name());
    ASSERT_EQ(copy->GetType(), var_before->GetType());
-    ASSERT_EQ(copy->Shape(), var_before->Shape());
+    ASSERT_EQ(copy->GetShape(), var_before->GetShape());
    ASSERT_EQ(copy->Proto()->SerializeAsString(),
              var_before->Proto()->SerializeAsString());
  };
@@ -117,7 +117,7 @@ TEST(ProgramDescBind, serialize_and_deserialize) {
    ASSERT_NE(restored, var_before);
    ASSERT_EQ(restored->Name(), var_before->Name());
    ASSERT_EQ(restored->GetType(), var_before->GetType());
-    ASSERT_EQ(restored->Shape(), var_before->Shape());
+    ASSERT_EQ(restored->GetShape(), var_before->GetShape());
    ASSERT_EQ(restored->Proto()->SerializeAsString(),
              var_before->Proto()->SerializeAsString());
  };

--- a/paddle/framework/var_desc.cc
+++ b/paddle/framework/var_desc.cc
@@ -26,18 +26,91 @@ void VarDesc::SetShape(const std::vector<int64_t> &dims) {
  VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims());
 }
+void VarDesc::SetTensorDescNum(size_t num) {
+  switch (desc_.type()) {
+    case proto::VarDesc::READER: {
+      auto *lod_tensors_ptr = desc_.mutable_reader()->mutable_lod_tensor();
+      lod_tensors_ptr->Clear();
+      for (size_t i = 0; i < num; ++i) {
+        lod_tensors_ptr->Add();
+      }
+      return;
+    } break;
+    default:
+      PADDLE_THROW(
+          "Setting 'sub_tensor_number' is not supported by the type of var %s.",
+          this->Name());
+  }
+}
+size_t VarDesc::GetTensorDescNum() const {
+  switch (desc_.type()) {
+    case proto::VarDesc::READER:
+      return desc_.reader().lod_tensor_size();
+      break;
+    default:
+      PADDLE_THROW(
+          "Getting 'sub_tensor_number' is not supported by the type of var %s.",
+          this->Name());
+  }
+}
+void VarDesc::SetShapes(
+    const std::vector<std::vector<int64_t>> &multiple_dims) {
+  PADDLE_ENFORCE_EQ(multiple_dims.size(), GetTensorDescNum(),
+                    "The number of given shapes(%d) doesn't equal to the "
+                    "number of sub tensor.",
+                    multiple_dims.size(), GetTensorDescNum());
+  std::vector<proto::TensorDesc *> tensors = mutable_tensor_descs();
+  for (size_t i = 0; i < multiple_dims.size(); ++i) {
+    VectorToRepeated(multiple_dims[i], tensors[i]->mutable_dims());
+  }
+}
+std::vector<int64_t> VarDesc::GetShape() const {
+  return RepeatedToVector(tensor_desc().dims());
+}
+std::vector<std::vector<int64_t>> VarDesc::GetShapes() const {
+  std::vector<proto::TensorDesc> descs = tensor_descs();
+  std::vector<std::vector<int64_t>> res;
+  res.reserve(descs.size());
+  for (const auto &tensor_desc : descs) {
+    res.push_back(RepeatedToVector(tensor_desc.dims()));
+  }
+  return res;
+}
 void VarDesc::SetDataType(proto::DataType data_type) {
  mutable_tensor_desc()->set_data_type(data_type);
 }
-std::vector<int64_t> VarDesc::Shape() const {
+void VarDesc::SetDataTypes(
-  return RepeatedToVector(tensor_desc().dims());
+    const std::vector<proto::DataType> &multiple_data_type) {
+  PADDLE_ENFORCE_EQ(multiple_data_type.size(), GetTensorDescNum(),
+                    "The number of given data types(%d) doesn't equal to the "
+                    "number of sub tensor.",
+                    multiple_data_type.size(), GetTensorDescNum());
+  std::vector<proto::TensorDesc *> tensor_descs = mutable_tensor_descs();
+  for (size_t i = 0; i < multiple_data_type.size(); ++i) {
+    tensor_descs[i]->set_data_type(multiple_data_type[i]);
+  }
 }
 proto::DataType VarDesc::GetDataType() const {
  return tensor_desc().data_type();
 }
+std::vector<proto::DataType> VarDesc::GetDataTypes() const {
+  std::vector<proto::TensorDesc> descs = tensor_descs();
+  std::vector<proto::DataType> res;
+  res.reserve(descs.size());
+  for (const auto &tensor_desc : descs) {
+    res.push_back(tensor_desc.data_type());
+  }
+  return res;
+}
 void VarDesc::SetLoDLevel(int32_t lod_level) {
  switch (desc_.type()) {
    case proto::VarDesc::LOD_TENSOR:
@@ -47,8 +120,28 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
      desc_.mutable_tensor_array()->set_lod_level(lod_level);
      break;
    default:
-      PADDLE_THROW("Tensor type=%d does not support LoDLevel",
+      PADDLE_THROW(
-                   desc_.tensor_array().lod_level());
+          "Setting 'lod_level' is not supported by the type of var %s.",
+          this->Name());
+  }
+}
+void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) {
+  PADDLE_ENFORCE_EQ(multiple_lod_level.size(), GetTensorDescNum(),
+                    "The number of given data types(%d) doesn't equal to the "
+                    "number of sub tensor.",
+                    multiple_lod_level.size(), GetTensorDescNum());
+  switch (desc_.type()) {
+    case proto::VarDesc::READER: {
+      size_t i = 0;
+      for (auto &lod_tensor : *desc_.mutable_reader()->mutable_lod_tensor()) {
+        lod_tensor.set_lod_level(multiple_lod_level[i++]);
+      }
+    } break;
+    default:
+      PADDLE_THROW(
+          "Setting 'lod_levels' is not supported by the type of var %s.",
+          this->Name());
  }
 }
@@ -59,13 +152,31 @@ int32_t VarDesc::GetLoDLevel() const {
    case proto::VarDesc::LOD_TENSOR_ARRAY:
      return desc_.tensor_array().lod_level();
    default:
-      PADDLE_THROW("Tensor type=%d does not support LoDLevel",
+      PADDLE_THROW(
-                   desc_.tensor_array().lod_level());
+          "Getting 'lod_level' is not supported by the type of var %s.",
+          this->Name());
+  }
+}
+std::vector<int32_t> VarDesc::GetLoDLevels() const {
+  std::vector<int32_t> res;
+  switch (desc_.type()) {
+    case proto::VarDesc::READER:
+      res.reserve(desc_.reader().lod_tensor_size());
+      for (auto &lod_tensor : desc_.reader().lod_tensor()) {
+        res.push_back(lod_tensor.lod_level());
+      }
+      return res;
+      break;
+    default:
+      PADDLE_THROW(
+          "Getting 'lod_levels' is not supported by the type of var %s.",
+          this->Name());
  }
 }
 const proto::TensorDesc &VarDesc::tensor_desc() const {
-  PADDLE_ENFORCE(desc_.has_type(), "invoke TensorDesc must after set type");
+  PADDLE_ENFORCE(desc_.has_type(), "The var's type hasn't been set.");
  switch (desc_.type()) {
    case proto::VarDesc::SELECTED_ROWS:
      return desc_.selected_rows();
@@ -74,13 +185,32 @@ const proto::TensorDesc &VarDesc::tensor_desc() const {
    case proto::VarDesc::LOD_TENSOR_ARRAY:
      return desc_.tensor_array().tensor();
    default:
-      PADDLE_THROW("The type of var %s is unsupported.", this->Name());
+      PADDLE_THROW(
+          "Getting 'tensor_desc' is not supported by the type of var %s.",
+          this->Name());
+  }
+}
+std::vector<proto::TensorDesc> VarDesc::tensor_descs() const {
+  PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set.");
+  std::vector<proto::TensorDesc> res;
+  res.reserve(GetTensorDescNum());
+  switch (desc_.type()) {
+    case proto::VarDesc::READER:
+      for (const auto &lod_tensor : desc_.reader().lod_tensor()) {
+        res.push_back(lod_tensor.tensor());
+      }
+      return res;
+    default:
+      PADDLE_THROW(
+          "Getting 'tensor_descs' is not supported by the type of var "
+          "%s.",
+          this->Name());
  }
 }
 proto::TensorDesc *VarDesc::mutable_tensor_desc() {
-  PADDLE_ENFORCE(desc_.has_type(),
+  PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set.");
-                 "invoke MutableTensorDesc must after set type");
  switch (desc_.type()) {
    case proto::VarDesc::SELECTED_ROWS:
      return desc_.mutable_selected_rows();
@@ -89,8 +219,30 @@ proto::TensorDesc *VarDesc::mutable_tensor_desc() {
    case proto::VarDesc::LOD_TENSOR_ARRAY:
      return desc_.mutable_tensor_array()->mutable_tensor();
    default:
-      PADDLE_THROW("Unexpected branch.");
+      PADDLE_THROW(
+          "Getting 'mutable_tensor_desc' is not supported by the type of var "
+          "%s.",
+          this->Name());
  }
 }
+std::vector<proto::TensorDesc *> VarDesc::mutable_tensor_descs() {
+  PADDLE_ENFORCE(desc_.has_type(), "The var type hasn't been set.");
+  std::vector<proto::TensorDesc *> res;
+  res.reserve(GetTensorDescNum());
+  switch (desc_.type()) {
+    case proto::VarDesc::READER:
+      for (auto &lod_tensor : *desc_.mutable_reader()->mutable_lod_tensor()) {
+        res.push_back(lod_tensor.mutable_tensor());
+      }
+      return res;
+    default:
+      PADDLE_THROW(
+          "Getting 'tensor_descs' is not supported by the type of var "
+          "%s.",
+          this->Name());
+  }
+}
 }  // namespace framework
 }  // namespace paddle
--- a/paddle/framework/var_desc.h
+++ b/paddle/framework/var_desc.h
@@ -68,18 +68,34 @@ class VarDesc {
  void SetName(std::string name) { desc_.set_name(name); }
+  void SetTensorDescNum(size_t num);
+  size_t GetTensorDescNum() const;
  void SetShape(const std::vector<int64_t> &dims);
+  void SetShapes(const std::vector<std::vector<int64_t>> &multiple_dims);
+  std::vector<int64_t> GetShape() const;
+  std::vector<std::vector<int64_t>> GetShapes() const;
  void SetDataType(proto::DataType data_type);
-  std::vector<int64_t> Shape() const;
+  void SetDataTypes(const std::vector<proto::DataType> &multiple_data_type);
  proto::DataType GetDataType() const;
+  std::vector<proto::DataType> GetDataTypes() const;
  void SetLoDLevel(int32_t lod_level);
+  void SetLoDLevels(const std::vector<int32_t> &multiple_lod_level);
  int32_t GetLoDLevel() const;
+  std::vector<int32_t> GetLoDLevels() const;
  proto::VarDesc::VarType GetType() const;
  void SetType(proto::VarDesc::VarType type);
@@ -90,7 +106,9 @@ class VarDesc {
 private:
  const proto::TensorDesc &tensor_desc() const;
+  std::vector<proto::TensorDesc> tensor_descs() const;
  proto::TensorDesc *mutable_tensor_desc();
+  std::vector<proto::TensorDesc *> mutable_tensor_descs();
  proto::VarDesc desc_;
 };

--- a/paddle/inference/io.cc
+++ b/paddle/inference/io.cc
@@ -55,7 +55,7 @@ void LoadPersistables(framework::Executor& executor,
      VLOG(3) << "parameter's name: " << var->Name();
      framework::VarDesc* new_var = load_block->Var(var->Name());
-      new_var->SetShape(var->Shape());
+      new_var->SetShape(var->GetShape());
      new_var->SetDataType(var->GetDataType());
      new_var->SetType(var->GetType());
      new_var->SetLoDLevel(var->GetLoDLevel());

--- a/paddle/inference/tests/book/test_inference_recognize_digits.cc
+++ b/paddle/inference/tests/book/test_inference_recognize_digits.cc
@@ -58,6 +58,47 @@ void TestInference(const std::string& dirname,
  delete scope;
 }
+template <typename T>
+void SetupTensor(paddle::framework::LoDTensor& input,
+                 paddle::framework::DDim dims,
+                 T lower,
+                 T upper) {
+  srand(time(0));
+  float* input_ptr = input.mutable_data<T>(dims, paddle::platform::CPUPlace());
+  for (int i = 0; i < input.numel(); ++i) {
+    input_ptr[i] =
+        (static_cast<T>(rand()) / static_cast<T>(RAND_MAX)) * (upper - lower) +
+        lower;
+  }
+}
+template <typename T>
+void CheckError(paddle::framework::LoDTensor& output1,
+                paddle::framework::LoDTensor& output2) {
+  // Check lod information
+  EXPECT_EQ(output1.lod(), output2.lod());
+  EXPECT_EQ(output1.dims(), output2.dims());
+  EXPECT_EQ(output1.numel(), output2.numel());
+  T err = static_cast<T>(0);
+  if (typeid(T) == typeid(float)) {
+    err = 1E-3;
+  } else if (typeid(T) == typeid(double)) {
+    err = 1E-6;
+  } else {
+    err = 0;
+  }
+  size_t count = 0;
+  for (int64_t i = 0; i < output1.numel(); ++i) {
+    if (fabs(output1.data<T>()[i] - output2.data<T>()[i]) > err) {
+      count++;
+    }
+  }
+  EXPECT_EQ(count, 0) << "There are " << count << " different elements.";
+}
 TEST(inference, recognize_digits) {
  if (FLAGS_dirname.empty()) {
    LOG(FATAL) << "Usage: ./example --dirname=path/to/your/model";
@@ -70,12 +111,10 @@ TEST(inference, recognize_digits) {
  // In unittests, this is done in paddle/testing/paddle_gtest_main.cc
  paddle::framework::LoDTensor input;
-  srand(time(0));
+  // Use normilized image pixels as input data,
-  float* input_ptr =
+  // which should be in the range [-1.0, 1.0].
-      input.mutable_data<float>({1, 28, 28}, paddle::platform::CPUPlace());
+  SetupTensor<float>(
-  for (int i = 0; i < 784; ++i) {
+      input, {1, 28, 28}, static_cast<float>(-1), static_cast<float>(1));
-    input_ptr[i] = rand() / (static_cast<float>(RAND_MAX));
-  }
  std::vector<paddle::framework::LoDTensor*> cpu_feeds;
  cpu_feeds.push_back(&input);
@@ -98,16 +137,6 @@ TEST(inference, recognize_digits) {
      dirname, cpu_feeds, cpu_fetchs2);
  LOG(INFO) << output2.dims();
-  EXPECT_EQ(output1.dims(), output2.dims());
+  CheckError<float>(output1, output2);
-  EXPECT_EQ(output1.numel(), output2.numel());
-  float err = 1E-3;
-  int count = 0;
-  for (int64_t i = 0; i < output1.numel(); ++i) {
-    if (fabs(output1.data<float>()[i] - output2.data<float>()[i]) > err) {
-      count++;
-    }
-  }
-  EXPECT_EQ(count, 0) << "There are " << count << " different elements.";
 #endif
 }
--- a/paddle/operators/multiclass_nms_op.cc
+++ b/paddle/operators/multiclass_nms_op.cc
@@ -85,7 +85,7 @@ static inline void GetMaxScoreIndex(
  std::stable_sort(sorted_indices->begin(), sorted_indices->end(),
                   SortScorePairDescend<int>);
  // Keep top_k scores if needed.
-  if (top_k > -1 && top_k < sorted_indices->size()) {
+  if (top_k > -1 && top_k < static_cast<int>(sorted_indices->size())) {
    sorted_indices->resize(top_k);
  }
 }
@@ -151,7 +151,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
    while (sorted_indices.size() != 0) {
      const int idx = sorted_indices.front().second;
      bool keep = true;
-      for (int k = 0; k < selected_indices->size(); ++k) {
+      for (size_t k = 0; k < selected_indices->size(); ++k) {
        if (keep) {
          const int kept_idx = (*selected_indices)[k];
          T overlap = JaccardOverlap<T>(bbox_data + idx * box_size,
@@ -201,7 +201,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
        int label = it.first;
        const T* sdata = scores_data + label * predict_dim;
        const std::vector<int>& label_indices = it.second;
-        for (int j = 0; j < label_indices.size(); ++j) {
+        for (size_t j = 0; j < label_indices.size(); ++j) {
          int idx = label_indices[j];
          PADDLE_ENFORCE_LT(idx, predict_dim);
          score_index_pairs.push_back(
@@ -215,7 +215,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
      // Store the new indices.
      std::map<int, std::vector<int>> new_indices;
-      for (int j = 0; j < score_index_pairs.size(); ++j) {
+      for (size_t j = 0; j < score_index_pairs.size(); ++j) {
        int label = score_index_pairs[j].second.first;
        int idx = score_index_pairs[j].second.second;
        new_indices[label].push_back(idx);
@@ -238,7 +238,7 @@ class MultiClassNMSKernel : public framework::OpKernel<T> {
      int label = it.first;
      const T* sdata = scores_data + label * predict_dim;
      const std::vector<int>& indices = it.second;
-      for (int j = 0; j < indices.size(); ++j) {
+      for (size_t j = 0; j < indices.size(); ++j) {
        int idx = indices[j];
        const T* bdata = bboxes_data + idx * kBBoxSize;
        odata[count * kOutputDim] = label;           // label

--- a/paddle/operators/prior_box_op.cc
+++ b/paddle/operators/prior_box_op.cc
@@ -44,12 +44,6 @@ class PriorBoxOp : public framework::OperatorWithKernel {
    auto aspect_ratios = ctx->Attrs().Get<std::vector<float>>("aspect_ratios");
    bool flip = ctx->Attrs().Get<bool>("flip");
-    PADDLE_ENFORCE_GT(min_sizes.size(), 0,
-                      "Size of min_sizes must be at least 1.");
-    for (size_t i = 0; i < min_sizes.size(); ++i) {
-      PADDLE_ENFORCE_GT(min_sizes[i], 0, "min_sizes[%d] must be positive.", i);
-    }
    std::vector<float> aspect_ratios_vec;
    ExpandAspectRatios(aspect_ratios, flip, aspect_ratios_vec);
@@ -65,17 +59,6 @@ class PriorBoxOp : public framework::OperatorWithKernel {
      }
    }
-    PADDLE_ENFORCE_EQ(variances.size(), 4, "Must and only provide 4 variance.");
-    for (size_t i = 0; i < variances.size(); ++i) {
-      PADDLE_ENFORCE_GT(variances[i], 0.0,
-                        "variance[%d] must be greater than 0.", i);
-    }
-    const float step_h = ctx->Attrs().Get<float>("step_h");
-    PADDLE_ENFORCE_GT(step_h, 0.0, "step_h should be larger than 0.");
-    const float step_w = ctx->Attrs().Get<float>("step_w");
-    PADDLE_ENFORCE_GT(step_w, 0.0, "step_w should be larger than 0.");
    std::vector<int64_t> dim_vec(4);
    dim_vec[0] = input_dims[2];
    dim_vec[1] = input_dims[3];
@@ -106,26 +89,54 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
              "PriorBoxOp. The layout is [H, W, num_priors, 4]. "
              "H is the height of input, W is the width of input, num_priors "
              "is the box count of each position.");
-    AddAttr<std::vector<int>>("min_sizes", "(vector<int>) ",
-                              "List of min sizes of generated prior boxes.");
+    AddAttr<std::vector<int>>("min_sizes",
-    AddAttr<std::vector<int>>("max_sizes", "(vector<int>) ",
+                              "(vector<int>) List of min sizes "
-                              "List of max sizes of generated prior boxes.");
+                              "of generated prior boxes.")
+        .AddCustomChecker([](const std::vector<int>& min_sizes) {
+          PADDLE_ENFORCE_GT(min_sizes.size(), 0,
+                            "Size of min_sizes must be at least 1.");
+          for (size_t i = 0; i < min_sizes.size(); ++i) {
+            PADDLE_ENFORCE_GT(min_sizes[i], 0,
+                              "min_sizes[%d] must be positive.", i);
+          }
+        });
+    AddAttr<std::vector<int>>(
+        "max_sizes",
+        "(vector<int>) List of max sizes of generated prior boxes.");
    AddAttr<std::vector<float>>(
-        "aspect_ratios", "(vector<float>) ",
+        "aspect_ratios",
-        "List of aspect ratios of generated prior boxes.");
+        "(vector<float>) List of aspect ratios of generated prior boxes.");
    AddAttr<std::vector<float>>(
-        "variances", "(vector<float>) ",
+        "variances",
-        "List of variances to be encoded in prior boxes.");
+        "(vector<float>) List of variances to be encoded in prior boxes.")
-    AddAttr<bool>("flip", "(bool) ", "Whether to flip aspect ratios.")
+        .AddCustomChecker([](const std::vector<float>& variances) {
+          PADDLE_ENFORCE_EQ(variances.size(), 4,
+                            "Must and only provide 4 variance.");
+          for (size_t i = 0; i < variances.size(); ++i) {
+            PADDLE_ENFORCE_GT(variances[i], 0.0,
+                              "variance[%d] must be greater than 0.", i);
+          }
+        });
+    AddAttr<bool>("flip", "(bool) Whether to flip aspect ratios.")
        .SetDefault(true);
-    AddAttr<bool>("clip", "(bool) ", "Whether to clip out-of-boundary boxes.")
+    AddAttr<bool>("clip", "(bool) Whether to clip out-of-boundary boxes.")
        .SetDefault(true);
    AddAttr<float>("step_w",
                   "Prior boxes step across width, 0 for auto calculation.")
-        .SetDefault(0.0);
+        .SetDefault(0.0)
+        .AddCustomChecker([](const float& step_w) {
+          PADDLE_ENFORCE_GT(step_w, 0.0, "step_w should be larger than 0.");
+        });
    AddAttr<float>("step_h",
                   "Prior boxes step across height, 0 for auto calculation.")
-        .SetDefault(0.0);
+        .SetDefault(0.0)
+        .AddCustomChecker([](const float& step_h) {
+          PADDLE_ENFORCE_GT(step_h, 0.0, "step_h should be larger than 0.");
+        });
    AddAttr<float>("offset",
                   "(float) "
                   "Prior boxes center offset.")

--- a/paddle/operators/prior_box_op.h
+++ b/paddle/operators/prior_box_op.h
@@ -25,7 +25,7 @@ inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
                               std::vector<float>& output_aspect_ratior) {
  constexpr float epsilon = 1e-6;
  output_aspect_ratior.clear();
-  output_aspect_ratior.push_back(1.);
+  output_aspect_ratior.push_back(1.0f);
  for (size_t i = 0; i < input_aspect_ratior.size(); ++i) {
    float ar = input_aspect_ratior[i];
    bool already_exist = false;
@@ -38,7 +38,7 @@ inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
    if (!already_exist) {
      output_aspect_ratior.push_back(ar);
      if (flip) {
-        output_aspect_ratior.push_back(1. / ar);
+        output_aspect_ratior.push_back(1.0f / ar);
      }
    }
  }
@@ -46,7 +46,7 @@ inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
 template <typename T>
 struct ClipFunctor {
-  HOSTDEVICE T operator()(T in) const {
+  HOSTDEVICE inline T operator()(T in) const {
    return std::min<T>(std::max<T>(in, 0.), 1.);
  }
 };
@@ -97,6 +97,9 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
    boxes->mutable_data<T>(ctx.GetPlace());
    vars->mutable_data<T>(ctx.GetPlace());
+    T inv_img_width = 1.0 / img_width;
+    T inv_img_height = 1.0 / img_height;
    auto e_boxes = framework::EigenTensor<T, 4>::From(*boxes);
    for (int h = 0; h < feature_height; ++h) {
      for (int w = 0; w < feature_width; ++w) {
@@ -109,13 +112,15 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
          // first prior: aspect_ratio = 1, size = min_size
          box_width = box_height = min_size;
          // xmin
-          e_boxes(h, w, idx, 0) = (center_x - box_width / 2.) / img_width;
+          e_boxes(h, w, idx, 0) = (center_x - box_width * 0.5) * inv_img_width;
          // ymin
-          e_boxes(h, w, idx, 1) = (center_y - box_height / 2.) / img_height;
+          e_boxes(h, w, idx, 1) =
+              (center_y - box_height * 0.5) * inv_img_height;
          // xmax
-          e_boxes(h, w, idx, 2) = (center_x + box_width / 2.) / img_width;
+          e_boxes(h, w, idx, 2) = (center_x + box_width * 0.5) * inv_img_width;
          // ymax
-          e_boxes(h, w, idx, 3) = (center_y + box_height / 2.) / img_height;
+          e_boxes(h, w, idx, 3) =
+              (center_y + box_height * 0.5) * inv_img_height;
          idx++;
          if (max_sizes.size() > 0) {
@@ -124,13 +129,17 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
            // size = sqrt(min_size * max_size)
            box_width = box_height = sqrt(min_size * max_size);
            // xmin
-            e_boxes(h, w, idx, 0) = (center_x - box_width / 2.) / img_width;
+            e_boxes(h, w, idx, 0) =
+                (center_x - box_width * 0.5) * inv_img_width;
            // ymin
-            e_boxes(h, w, idx, 1) = (center_y - box_height / 2.) / img_height;
+            e_boxes(h, w, idx, 1) =
+                (center_y - box_height * 0.5) * inv_img_height;
            // xmax
-            e_boxes(h, w, idx, 2) = (center_x + box_width / 2.) / img_width;
+            e_boxes(h, w, idx, 2) =
+                (center_x + box_width * 0.5) * inv_img_width;
            // ymax
-            e_boxes(h, w, idx, 3) = (center_y + box_height / 2.) / img_height;
+            e_boxes(h, w, idx, 3) =
+                (center_y + box_height * 0.5) * inv_img_height;
            idx++;
          }
@@ -143,13 +152,17 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
            box_width = min_size * sqrt(ar);
            box_height = min_size / sqrt(ar);
            // xmin
-            e_boxes(h, w, idx, 0) = (center_x - box_width / 2.) / img_width;
+            e_boxes(h, w, idx, 0) =
+                (center_x - box_width * 0.5) * inv_img_width;
            // ymin
-            e_boxes(h, w, idx, 1) = (center_y - box_height / 2.) / img_height;
+            e_boxes(h, w, idx, 1) =
+                (center_y - box_height * 0.5) * inv_img_height;
            // xmax
-            e_boxes(h, w, idx, 2) = (center_x + box_width / 2.) / img_width;
+            e_boxes(h, w, idx, 2) =
+                (center_x + box_width * 0.5) * inv_img_width;
            // ymax
-            e_boxes(h, w, idx, 3) = (center_y + box_height / 2.) / img_height;
+            e_boxes(h, w, idx, 3) =
+                (center_y + box_height * 0.5) * inv_img_height;
            idx++;
          }
        }

--- a/paddle/pybind/protobuf.cc
+++ b/paddle/pybind/protobuf.cc
@@ -214,11 +214,20 @@ void BindVarDsec(py::module &m) {
           py::return_value_policy::reference)
      .def("set_name", &VarDesc::SetName)
      .def("set_shape", &VarDesc::SetShape)
+      .def("set_shapes", &VarDesc::SetShapes)
      .def("set_dtype", &VarDesc::SetDataType)
-      .def("shape", &VarDesc::Shape, py::return_value_policy::reference)
+      .def("set_dtypes", &VarDesc::SetDataTypes)
+      .def("set_tensor_num", &VarDesc::SetTensorDescNum)
+      .def("tensor_num", &VarDesc::GetTensorDescNum)
+      .def("shape", &VarDesc::GetShape, py::return_value_policy::reference)
+      .def("shapes", &VarDesc::GetShapes, py::return_value_policy::reference)
      .def("dtype", &VarDesc::GetDataType, py::return_value_policy::reference)
+      .def("dtypes", &VarDesc::GetDataTypes, py::return_value_policy::reference)
      .def("lod_level", &VarDesc::GetLoDLevel)
+      .def("lod_levels", &VarDesc::GetLoDLevels,
+           py::return_value_policy::reference)
      .def("set_lod_level", &VarDesc::SetLoDLevel)
+      .def("set_lod_levels", &VarDesc::SetLoDLevels)
      .def("type", &VarDesc::GetType)
      .def("set_type", &VarDesc::SetType)
      .def("serialize_to_string", SerializeMessage<VarDesc>)
@@ -233,7 +242,8 @@ void BindVarDsec(py::module &m) {
      .value("STEP_SCOPES", proto::VarDesc::STEP_SCOPES)
      .value("LOD_RANK_TABLE", proto::VarDesc::LOD_RANK_TABLE)
      .value("LOD_TENSOR_ARRAY", proto::VarDesc::LOD_TENSOR_ARRAY)
-      .value("PLACE_LIST", proto::VarDesc::PLACE_LIST);
+      .value("PLACE_LIST", proto::VarDesc::PLACE_LIST)
+      .value("READER", proto::VarDesc::READER);
 }
 void BindOpDesc(py::module &m) {

--- a/paddle/scripts/docker/build.sh
+++ b/paddle/scripts/docker/build.sh
@@ -117,7 +117,7 @@ EOF
            -DWITH_STYLE_CHECK=OFF
        make -j `nproc` gen_proto_py
        make -j `nproc` paddle_python
-        make -j `nproc` paddle_docs paddle_docs_cn
+        make -j `nproc` paddle_docs paddle_docs_cn paddle_api_docs
        make -j `nproc` print_operators_doc
        paddle/pybind/print_operators_doc > doc/en/html/operators.json
        popd

--- a/paddle/scripts/travis/build_doc.sh
+++ b/paddle/scripts/travis/build_doc.sh
@@ -9,13 +9,14 @@ cd $TRAVIS_BUILD_DIR/build
 cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
 make -j `nproc` gen_proto_py
 make -j `nproc` paddle_python
-make -j `nproc` paddle_docs paddle_docs_cn
+make -j `nproc` paddle_docs paddle_docs_cn paddle_api_docs
 make -j `nproc` print_operators_doc
 paddle/pybind/print_operators_doc > doc/en/html/operators.json
 # check websites for broken links
 linkchecker doc/en/html/index.html
 linkchecker doc/cn/html/index.html
+linkchecker doc/api/en/html/index.html
 # Parse Github URL
 REPO=`git config remote.origin.url`
@@ -54,10 +55,11 @@ function deploy_docs() {
  mkdir -p ${DIR}
  # remove old docs. mv new docs.
  set +e
-  rm -rf ${DIR}/doc ${DIR}/doc_cn
+  rm -rf ${DIR}/doc ${DIR}/doc_cn ${DIR}/api_doc
  set -e
  cp -r ../doc/cn/html ${DIR}/doc_cn
  cp -r ../doc/en/html ${DIR}/doc
+  cp -r ../doc/api/en/html ${DIR}/api_doc
  git add .
 }

--- a/python/paddle/v2/fluid/executor.py
+++ b/python/paddle/v2/fluid/executor.py
@@ -17,7 +17,9 @@ import contextlib
 from framework import Program, default_main_program
 from . import core
-__all__ = ['Executor', 'global_scope', 'scope_guard', 'switch_scope']
+__all__ = [
+    'Executor', 'global_scope', 'scope_guard', 'switch_scope', 'fetch_var'
+]
 g_scope = core.Scope()
@@ -146,6 +148,35 @@ def has_fetch_operators(block, fetch_targets, fetch_holder_name):
    return fetch_count > 0
+def fetch_var(name, scope=None, return_numpy=True):
+    """
+    Fetch the value of the variable with the given name from the given scope
+    Args:
+        name(str): name of the variable. Typically, only persistable variables
+            can be found in the scope used for running the program.
+        scope(core.Scope|None): scope object. It should be the scope where
+            you pass to Executor.run() when running your program.
+            If None, global_scope() will be used.
+        return_numpy(bool): whether convert the tensor to numpy.ndarray
+    Returns:
+       LodTensor|numpy.ndarray
+    """
+    assert isinstance(name, str)
+    if scope is None:
+        scope = global_scope()
+    assert isinstance(scope, core.Scope)
+    var = global_scope().find_var(name)
+    assert var is not None, (
+        "Cannot find " + name + " in scope. Perhaps you need to make the"
+        " variable persistable by using var.persistable = True in your"
+        " program.")
+    tensor = var.get_tensor()
+    if return_numpy:
+        tensor = as_numpy(tensor)
+    return tensor
 class Executor(object):
    def __init__(self, places):
        if not isinstance(places, list) and not isinstance(places, tuple):

--- a/python/paddle/v2/fluid/framework.py
+++ b/python/paddle/v2/fluid/framework.py
@@ -31,6 +31,7 @@ __all__ = [
    'program_guard',
    'switch_startup_program',
    'switch_main_program',
+    'get_var',
 ]
 EMPTY_VAR_NAME = core.kEmptyVarName()
@@ -1123,3 +1124,22 @@ def program_guard(main_program, startup_program=None):
    switch_main_program(main_program)
    if startup_program is not None:
        switch_startup_program(startup_program)
+def get_var(name, program=None):
+    """
+    Get a variable by name from the global block of a program
+    Args:
+        name(str): name of the variable
+        program(Program|None): program object.
+             If None, default_global_program() will be used.
+    Returns:
+        Variable
+    """
+    if program is None:
+        program = default_main_program()
+    assert isinstance(name, str)
+    assert isinstance(name, Program)
+    return program.global_block().var(name)
--- a/python/paddle/v2/fluid/layers/tensor.py
+++ b/python/paddle/v2/fluid/layers/tensor.py
@@ -35,13 +35,15 @@ __all__ = [
 ]
-def create_tensor(dtype, name=None):
+def create_tensor(dtype, name=None, persistable=False):
    helper = LayerHelper("create_tensor", **locals())
-    return helper.create_variable(name=helper.name, dtype=dtype)
+    return helper.create_variable(
+        name=helper.name, dtype=dtype, persistable=persistable)
 def create_parameter(shape,
                     dtype,
+                     name=None,
                     attr=None,
                     is_bias=False,
                     default_initializer=None):
@@ -62,7 +64,7 @@ def create_parameter(shape,
    """
    helper = LayerHelper("create_parameter", **locals())
    if attr is None:
-        attr = ParamAttr()
+        attr = ParamAttr(name=name)
    return helper.create_parameter(attr, shape, dtype, is_bias,
                                   default_initializer)

--- a/python/paddle/v2/fluid/tests/book/test_recognize_digits.py
+++ b/python/paddle/v2/fluid/tests/book/test_recognize_digits.py
@@ -166,7 +166,9 @@ def infer(use_cuda, save_dirname=None):
     fetch_targets] = fluid.io.load_inference_model(save_dirname, exe)
    # The input's dimension of conv should be 4-D or 5-D.
-    tensor_img = numpy.random.rand(1, 1, 28, 28).astype("float32")
+    # Use normilized image pixels as input data, which should be in the range [-1.0, 1.0].
+    tensor_img = numpy.random.uniform(-1.0, 1.0,
+                                      [1, 1, 28, 28]).astype("float32")
    # Construct feed as a dictionary of {feed_target_name: feed_target_data}
    # and results will contain a list of data corresponding to fetch_targets.

--- a/python/paddle/v2/fluid/tests/test_fetch_var.py
+++ b/python/paddle/v2/fluid/tests/test_fetch_var.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle.v2.fluid as fluid
+import paddle.v2.fluid.layers as layers
+import op_test
+import numpy
+import unittest
+class TestFetchVar(op_test.OpTest):
+    def test_fetch_var(self):
+        val = numpy.array([1, 3, 5]).astype(numpy.int32)
+        x = layers.create_tensor(dtype="int32", persistable=True, name="x")
+        layers.assign(input=val, output=x)
+        exe = fluid.Executor(fluid.CPUPlace())
+        exe.run(fluid.default_main_program(), feed={}, fetch_list=[])
+        fetched_x = fluid.fetch_var("x")
+        self.assertTrue(
+            numpy.array_equal(fetched_x, val),
+            "fetch_x=%s val=%s" % (fetched_x, val))
+        self.assertEqual(fetched_x.dtype, val.dtype)
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/v2/fluid/tests/test_protobuf_descs.py
+++ b/python/paddle/v2/fluid/tests/test_protobuf_descs.py
@@ -115,6 +115,18 @@ class TestVarDesc(unittest.TestCase):
        self.assertEqual(src_shape, res_shape)
        self.assertEqual(core.VarDesc.VarType.SELECTED_ROWS, var.type())
+    def test_multiple_shape(self):
+        program_desc = core.ProgramDesc()
+        block = program_desc.block(0)
+        var = block.var('my_reader')
+        var.set_type(core.VarDesc.VarType.READER)
+        var.set_tensor_num(3)
+        src_shapes = [[2, 3, 3], [4, 5], [6, 7, 8, 9]]
+        var.set_shapes(src_shapes)
+        res_shapes = var.shapes()
+        self.assertEqual(src_shapes, res_shapes)
+        self.assertEqual(core.VarDesc.VarType.READER, var.type())
    def test_dtype(self):
        program_desc = core.ProgramDesc()
        block = program_desc.block(0)
@@ -124,6 +136,30 @@ class TestVarDesc(unittest.TestCase):
        self.assertEqual(core.DataType.INT32, var.dtype())
        self.assertEqual(core.VarDesc.VarType.LOD_TENSOR, var.type())
+    def test_multiple_dtype(self):
+        program_desc = core.ProgramDesc()
+        block = program_desc.block(0)
+        var = block.var('my_reader')
+        var.set_type(core.VarDesc.VarType.READER)
+        var.set_tensor_num(3)
+        src_types = [
+            core.DataType.INT32, core.DataType.FP64, core.DataType.FP32
+        ]
+        var.set_dtypes(src_types)
+        self.assertEqual(src_types, var.dtypes())
+        self.assertEqual(core.VarDesc.VarType.READER, var.type())
+    def test_multiple_lod_level(self):
+        program_desc = core.ProgramDesc()
+        block = program_desc.block(0)
+        var = block.var('my_reader')
+        var.set_type(core.VarDesc.VarType.READER)
+        var.set_tensor_num(3)
+        src_types = [3, 1, 2]
+        var.set_lod_levels(src_types)
+        self.assertEqual(src_types, var.lod_levels())
+        self.assertEqual(core.VarDesc.VarType.READER, var.type())
 class TestBlockDesc(unittest.TestCase):
    def test_add_var(self):