Merge pull request #8009 from JiayiFeng/dev_reader

Fundamental Data Reading in C++

Merge pull request #8009 from JiayiFeng/dev_reader
Fundamental Data Reading in C++
812cf151 · fengjiayi · GitHub · 83df277f · c1349d98 · 812cf151
17 changed file
--- a/paddle/framework/CMakeLists.txt
+++ b/paddle/framework/CMakeLists.txt
@@ -24,6 +24,8 @@ cc_library(lod_tensor SRCS lod_tensor.cc DEPS ddim place tensor framework_proto)
 cc_test(lod_tensor_test SRCS lod_tensor_test.cc DEPS lod_tensor paddle_memory)
 nv_test(lod_tensor_gpu_test SRCS lod_tensor_test.cu DEPS lod_tensor init)

+cc_library(reader SRCS reader.cc DEPS lod_tensor ddim)
+
 cc_test(variable_test SRCS variable_test.cc)

 cc_library(threadpool SRCS threadpool.cc DEPS enforce)

--- a/paddle/framework/executor.cc
+++ b/paddle/framework/executor.cc
@@ -22,6 +22,7 @@ limitations under the License. */
 #include "paddle/framework/lod_rank_table.h"
 #include "paddle/framework/lod_tensor_array.h"
 #include "paddle/framework/op_registry.h"
+#include "paddle/framework/reader.h"
 #include "paddle/platform/place.h"
 #include "paddle/platform/profiler.h"

@@ -52,11 +53,13 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) {
    var->GetMutable<LoDTensorArray>();
  } else if (var_type == proto::VarDesc::PLACE_LIST) {
    var->GetMutable<platform::PlaceList>();
+  } else if (var_type == proto::VarDesc::READER) {
+    var->GetMutable<ReaderHolder>();
  } else {
    PADDLE_THROW(
        "Variable type %d is not in "
-        "[LoDTensor, SelectedRows, FEED_MINIBATCH, FETCH_LIST, LOD_RANK_TABLE,"
-        " PLACE_LIST]",
+        "[LOD_TENSOR, SELECTED_ROWS, FEED_MINIBATCH, FETCH_LIST, "
+        "LOD_RANK_TABLE, PLACE_LIST, READER]",
        var_type);
  }
 }

--- a/paddle/framework/framework.proto
+++ b/paddle/framework/framework.proto
@@ -116,7 +116,7 @@ message LoDTensorArrayDesc {
  optional int32 lod_level = 2 [ default = 0 ];
 }

-message Reader { repeated LoDTensorDesc lod_tensor = 1; }
+message ReaderDesc { repeated LoDTensorDesc lod_tensor = 1; }

 message VarDesc {
  enum VarType {
@@ -136,7 +136,7 @@ message VarDesc {
  optional LoDTensorDesc lod_tensor = 4;
  optional TensorDesc selected_rows = 5;
  optional LoDTensorArrayDesc tensor_array = 6;
-  optional Reader reader = 7;
+  optional ReaderDesc reader = 7;
 }

 message BlockDesc {

--- a/paddle/framework/op_desc.cc
+++ b/paddle/framework/op_desc.cc
@@ -72,6 +72,11 @@ class CompileTimeInferShapeContext : public InferShapeContext {

  void SetDim(const std::string &name, const DDim &dim) override;

+  std::vector<DDim> GetRepeatedDims(const std::string &name) const override;
+
+  void SetRepeatedDims(const std::string &name,
+                       const std::vector<DDim> &dims) override;
+
  const OpDesc &op_;
  const BlockDesc &block_;
 };
@@ -457,23 +462,48 @@ const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
 DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
  auto var = block_.FindVarRecursive(name);
  PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name);
+  DDim res;
  try {
    auto shape = var->GetShape();
-    if (shape.empty()) {
-      return framework::make_ddim({0UL});
-    } else {
-      return framework::make_ddim(var->GetShape());
-    }
+    res = shape.empty() ? make_ddim({0UL}) : make_ddim(shape);
  } catch (...) {
    VLOG(5) << "GetDim of variable " << name << " error";
    std::rethrow_exception(std::current_exception());
  }
+  return res;
+}
+
+std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims(
+    const std::string &name) const {
+  auto var = block_.FindVarRecursive(name);
+  PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name);
+  std::vector<DDim> res;
+  try {
+    auto shapes = var->GetShapes();
+    for (const auto &s : shapes) {
+      res.push_back(s.empty() ? make_ddim({0UL}) : make_ddim(s));
+    }
+  } catch (...) {
+    VLOG(5) << "GetRepeatedDim of variable " << name << " error.";
+    std::rethrow_exception(std::current_exception());
+  }
+  return res;
 }

 void CompileTimeInferShapeContext::SetDim(const std::string &name,
                                          const DDim &dim) {
-  block_.FindVarRecursive(name)->SetShape(framework::vectorize(dim));
+  block_.FindVarRecursive(name)->SetShape(vectorize(dim));
 }
+
+void CompileTimeInferShapeContext::SetRepeatedDims(
+    const std::string &name, const std::vector<DDim> &dims) {
+  auto var = block_.FindVarRecursive(name);
+  PADDLE_ENFORCE(var != nullptr, "Cannot find variable %s", name);
+  std::vector<std::vector<int64_t>> dim_vec(dims.size());
+  std::transform(dims.begin(), dims.end(), dim_vec.begin(), vectorize);
+  var->SetShapes(dim_vec);
+}
+
 bool CompileTimeInferShapeContext::IsRuntime() const { return false; }

 proto::VarDesc::VarType CompileTimeInferShapeContext::GetVarType(

--- a/paddle/framework/operator.cc
+++ b/paddle/framework/operator.cc
@@ -320,8 +320,8 @@ class RuntimeInferShapeContext : public InferShapeContext {
    if (length == 0) {
      return false;
    }
-    PADDLE_ENFORCE_EQ(length, 1UL, "Input %s should have more than one inputs",
-                      name);
+    PADDLE_ENFORCE_EQ(length, 1UL,
+                      "Input %s should not have more than one inputs", name);
    auto ipt = ins[0];
    auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
    return var != nullptr;
@@ -333,8 +333,8 @@ class RuntimeInferShapeContext : public InferShapeContext {
    if (length == 0) {
      return false;
    }
-    PADDLE_ENFORCE_EQ(length, 1UL, "Output %s should have more than one inputs",
-                      name);
+    PADDLE_ENFORCE_EQ(length, 1UL,
+                      "Output %s should not have more than one inputs", name);
    auto ipt = outs[0];
    auto* var = ipt == kEmptyVarName ? nullptr : scope_.FindVar(ipt);
    return var != nullptr;
@@ -421,8 +421,22 @@ class RuntimeInferShapeContext : public InferShapeContext {
    } else if (var->IsType<SelectedRows>()) {
      return var->Get<SelectedRows>().GetCompleteDims();
    } else {
-      PADDLE_THROW("Variable %s type_id %s, expect LoDTensor/SelectedRows.",
-                   name, var->Type().name());
+      PADDLE_THROW(
+          "Only LoDTensor/SelectedRows support 'GetDim', but Variable %s's "
+          "type_id is %s.",
+          name, var->Type().name());
+    }
+  }
+
+  std::vector<DDim> GetRepeatedDims(const std::string& name) const override {
+    Variable* var = scope_.FindVar(name);
+    if (var->IsType<ReaderHolder>()) {
+      return var->Get<ReaderHolder>().shapes();
+    } else {
+      PADDLE_THROW(
+          "Only ReaderHolder support 'GetRepeatedDims', but Variable %s's "
+          "type_id is %s.",
+          name, var->Type().name());
    }
  }

@@ -438,6 +452,19 @@ class RuntimeInferShapeContext : public InferShapeContext {
    }
  }

+  void SetRepeatedDims(const std::string& name,
+                       const std::vector<DDim>& dims) override {
+    Variable* var = scope_.FindVar(name);
+    if (var->IsType<ReaderHolder>()) {
+      var->GetMutable<ReaderHolder>()->set_shapes(dims);
+    } else {
+      PADDLE_THROW(
+          "Only ReaderHolder support 'SetRepeatedDims', but Variable %s's "
+          "type_id is %s.",
+          name, var->Type().name());
+    }
+  }
+
  proto::VarDesc::VarType GetVarType(const std::string& name) const override {
    auto* var = scope_.FindVar(name);
    return ToVarType(var->Type());

--- a/paddle/framework/reader.cc
+++ b/paddle/framework/reader.cc
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/framework/reader.h"
+
+namespace paddle {
+namespace framework {
+
+DDim ReaderBase::shape(size_t idx) const {
+  PADDLE_ENFORCE_LT(
+      idx, shapes_.size(),
+      "Cannot get the %d'th shape, 'shapes_' only has %d elements.", idx,
+      shapes_.size());
+  return shapes_[idx];
+}
+
+void ShuffleReader::ReadNext(std::vector<LoDTensor>* out) {
+  if (iteration_pos_ >= buffer_.size()) {
+    // Reload buffer with new data
+    buffer_.clear();
+    buffer_.reserve(buffer_size_);
+    for (int i = 0; i < buffer_size_; ++i) {
+      if (reader_->HasNext()) {
+        buffer_.push_back(std::vector<LoDTensor>());
+        reader_->ReadNext(&buffer_.back());
+      } else {
+        break;
+      }
+    }
+    // TODO(fengjiayi): 'std::random_shuffle' can be very slow. It needs to be
+    // optimize.
+    std::random_shuffle(buffer_.begin(), buffer_.end());
+    iteration_pos_ = 0;
+  }
+  out->clear();
+  if (!buffer_.empty()) {
+    std::swap(*out, buffer_[iteration_pos_++]);
+  }
+  // if buffer_ is empty, the 'out' will return as an empty vector.
+}
+
+void BatchReader::ReadNext(std::vector<LoDTensor>* out) {
+  buffer_.clear();
+  buffer_.reserve(batch_size_);
+  for (int i = 0; i < batch_size_; ++i) {
+    if (reader_->HasNext()) {
+      buffer_.push_back(std::vector<LoDTensor>());
+      reader_->ReadNext(&buffer_.back());
+    } else {
+      break;
+    }
+  }
+  // Concat instances
+  out->clear();
+  if (buffer_.empty()) {
+    // if buffer_ is empty, the 'out' will return as an empty vector.
+    return;
+  }
+  int out_num = buffer_[0].size();
+  out->reserve(out_num);
+  for (int j = 0; j < out_num; ++j) {
+    // Merge shape and check date type
+    std::type_index batch_type = buffer_[0][j].type();
+    DDim batch_shape = buffer_[0][j].dims();
+    for (size_t i = 1; i < buffer_.size(); ++i) {
+      std::type_index ins_type = buffer_[i][j].type();
+      DDim ins_shape = buffer_[i][j].dims();
+      PADDLE_ENFORCE_EQ(batch_type, ins_type);
+      PADDLE_ENFORCE_EQ(slice_ddim(batch_shape, 1, batch_shape.size()),
+                        slice_ddim(ins_shape, 1, ins_shape.size()));
+      PADDLE_ENFORCE_GT(ins_shape[0], 0);
+      batch_shape[0] += ins_shape[0];
+    }
+
+    LoDTensor out_tensor;
+    out_tensor.Resize(batch_shape);
+    out_tensor.mutable_data(platform::CPUPlace(), batch_type);
+    int64_t dst_offset = 0;
+
+    // Merge lod and data
+    LoD batch_lod;
+    std::vector<size_t> top_level_lod({0});
+    for (size_t i = 0; i < buffer_.size(); ++i) {
+      DDim ins_shape = buffer_[i][j].dims();
+      LoD ins_lod = buffer_[i][j].lod();
+      if (i == 0) {
+        batch_lod = ins_lod;
+      } else {
+        PADDLE_ENFORCE_EQ(batch_lod.size(), ins_lod.size());
+        for (size_t level_idx = 0; level_idx < batch_lod.size(); ++level_idx) {
+          auto& lod_level = batch_lod[level_idx];
+          for (size_t k = 1; k < ins_lod[level_idx].size(); ++k) {
+            lod_level.push_back(ins_lod[level_idx][k] + lod_level.back());
+          }
+        }
+      }
+      top_level_lod.push_back(
+          top_level_lod.back() +
+          (ins_lod.empty() ? ins_shape[0] : (ins_lod[0].size() - 1)));
+
+      Tensor dst = out_tensor.Slice(dst_offset, dst_offset + ins_shape[0]);
+      Copy(buffer_[i][j], platform::CPUPlace(), &dst);
+      dst_offset += ins_shape[0];
+    }
+    batch_lod.insert(batch_lod.begin(), top_level_lod);
+    out_tensor.set_lod(batch_lod);
+    out->push_back(out_tensor);
+  }
+}
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/reader.h
+++ b/paddle/framework/reader.h
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include "paddle/framework/ddim.h"
+#include "paddle/framework/lod_tensor_array.h"
+
+namespace paddle {
+namespace framework {
+
+class ReaderBase {
+ public:
+  explicit ReaderBase(const std::vector<DDim>& shapes) : shapes_(shapes) {
+    PADDLE_ENFORCE(!shapes_.empty());
+  }
+  virtual void ReadNext(std::vector<LoDTensor>* out) = 0;
+  virtual bool HasNext() const = 0;
+
+  virtual void ReInit() = 0;
+
+  DDim shape(size_t idx) const;
+  std::vector<DDim> shapes() const { return shapes_; }
+  void set_shapes(const std::vector<DDim>& shapes) { shapes_ = shapes; }
+
+  virtual ~ReaderBase() {}
+
+ protected:
+  std::vector<DDim> shapes_;
+};
+
+class FileReader : public ReaderBase {
+ public:
+  explicit FileReader(const std::vector<DDim>& shapes) : ReaderBase(shapes) {}
+};
+
+class DecoratedReader : public ReaderBase {
+ public:
+  explicit DecoratedReader(ReaderBase* reader)
+      : ReaderBase(reader->shapes()), reader_(reader) {
+    PADDLE_ENFORCE_NOT_NULL(reader_);
+  }
+
+  bool HasNext() const override { return reader_->HasNext(); }
+
+  void ReInit() override { reader_->ReInit(); }
+
+ protected:
+  ReaderBase* reader_;
+};
+
+// file readers
+
+template <typename T>
+class RandomDataGenerator : public FileReader {
+ public:
+  RandomDataGenerator(const std::vector<DDim>& shapes, float min, float max)
+      : FileReader(shapes), min_(min), max_(max) {
+    PADDLE_ENFORCE_LE(
+        min, max, "'min' shouldn't be greater than 'max'.(%f vs %f)", min, max);
+    unsigned int seed = std::random_device()();
+    engine_.seed(seed);
+    dist_ = std::uniform_real_distribution<float>(min_, max_);
+  }
+
+  void ReadNext(std::vector<LoDTensor>* out) override {
+    out->clear();
+    out->reserve(shapes_.size());
+    for (const DDim& shape : shapes_) {
+      PADDLE_ENFORCE_GE(
+          shape.size(), 2,
+          "The rank of reader's output data should be 2 at least.(Now it's %d)",
+          shape.size());
+      LoDTensor out_tensor;
+      out_tensor.Resize(shape);
+      T* data = out_tensor.mutable_data<T>(platform::CPUPlace());
+      int64_t numel = product(shape);
+      for (int64_t i = 0; i < numel; ++i) {
+        data[i] = dist_(engine_);
+      }
+      out->push_back(out_tensor);
+    }
+  }
+
+  bool HasNext() const override { return true; }
+
+  void ReInit() override { return; }
+
+ private:
+  float min_;
+  float max_;
+  std::minstd_rand engine_;
+  std::uniform_real_distribution<float> dist_;
+};
+
+// decorated readers
+
+class ShuffleReader : public DecoratedReader {
+ public:
+  ShuffleReader(ReaderBase* reader, int buffer_size)
+      : DecoratedReader(reader), buffer_size_(buffer_size), iteration_pos_(0) {
+    buffer_.reserve(buffer_size);
+  }
+
+  void ReadNext(std::vector<LoDTensor>* out) override;
+
+ private:
+  int buffer_size_;
+  std::vector<std::vector<LoDTensor>> buffer_;
+  size_t iteration_pos_;
+};
+
+class BatchReader : public DecoratedReader {
+ public:
+  BatchReader(ReaderBase* reader, int batch_size)
+      : DecoratedReader(reader), batch_size_(batch_size) {
+    buffer_.reserve(batch_size_);
+  }
+
+  void ReadNext(std::vector<LoDTensor>* out) override;
+
+ private:
+  int batch_size_;
+  std::vector<std::vector<LoDTensor>> buffer_;
+};
+
+// The ReaderHolder is used as readers' unified wrapper,
+// making it easier to access different type readers in Variables.
+class ReaderHolder {
+ public:
+  void Reset(ReaderBase* reader) { reader_.reset(reader); }
+
+  ReaderBase* Get() const { return reader_.get(); }
+
+  void ReadNext(std::vector<LoDTensor>* out) { reader_->ReadNext(out); }
+  bool HasNext() const { return reader_->HasNext(); }
+  void ReInit() { reader_->ReInit(); }
+
+  DDim shape(size_t idx) const { return reader_->shape(idx); }
+  std::vector<DDim> shapes() const { return reader_->shapes(); }
+  void set_shapes(const std::vector<DDim>& shapes) {
+    reader_->set_shapes(shapes);
+  }
+
+ private:
+  std::unique_ptr<ReaderBase> reader_;
+};
+
+}  // namespace framework
+}  // namespace paddle
--- a/paddle/framework/shape_inference.cc
+++ b/paddle/framework/shape_inference.cc
@@ -32,6 +32,16 @@ std::vector<DDim> InferShapeContext::GetInputsDim(
  return GetDims(arg_names);
 }

+std::vector<DDim> InferShapeContext::GetReaderDims(
+    const std::string &name) const {
+  const std::vector<std::string> &arg_names = Inputs(name);
+  PADDLE_ENFORCE_EQ(
+      arg_names.size(), 1UL,
+      "Reader input '%s' should hold one element, but now it holds %d", name,
+      arg_names.size());
+  return this->GetRepeatedDims(arg_names[0]);
+}
+
 DDim InferShapeContext::GetInputsElementDim(const std::string &name,
                                            int idx) const {
  const std::vector<std::string> &names = Inputs(name);
@@ -52,6 +62,16 @@ void InferShapeContext::SetOutputsDim(const std::string &name,
  SetDims(names, dims);
 }

+void InferShapeContext::SetReaderDims(const std::string &name,
+                                      const std::vector<DDim> &dims) {
+  const std::vector<std::string> &arg_names = Outputs(name);
+  PADDLE_ENFORCE_EQ(
+      arg_names.size(), 1UL,
+      "Reader output '%s' should hold one element, but now it holds %d", name,
+      arg_names.size());
+  return this->SetRepeatedDims(arg_names[0], dims);
+}
+
 std::vector<DDim> InferShapeContext::GetDims(
    const std::vector<std::string> &names) const {
  std::vector<DDim> ret;
@@ -61,6 +81,7 @@ std::vector<DDim> InferShapeContext::GetDims(
      [this](const std::string &name) { return this->GetDim(name); });
  return ret;
 }
+
 void InferShapeContext::SetDims(const std::vector<std::string> &names,
                                const std::vector<DDim> &dims) {
  size_t length = names.size();
@@ -72,14 +93,17 @@ void InferShapeContext::SetDims(const std::vector<std::string> &names,
    SetDim(names[i], dims[i]);
  }
 }
+
 std::vector<proto::VarDesc::VarType> InferShapeContext::GetInputsVarType(
    const std::string &name) const {
  return GetVarTypes(Inputs(name));
 }
+
 std::vector<proto::VarDesc::VarType> InferShapeContext::GetOutputsVarType(
    const std::string &name) const {
  return GetVarTypes(Outputs(name));
 }
+
 std::vector<proto::VarDesc::VarType> InferShapeContext::GetVarTypes(
    const std::vector<std::string> &names) const {
  std::vector<proto::VarDesc::VarType> retv;

--- a/paddle/framework/shape_inference.h
+++ b/paddle/framework/shape_inference.h
@@ -36,12 +36,13 @@ class InferShapeContext {
  virtual bool HasOutputs(const std::string &name) const = 0;

  DDim GetInputDim(const std::string &name) const;
-
  std::vector<DDim> GetInputsDim(const std::string &name) const;
+  std::vector<DDim> GetReaderDims(const std::string &name) const;
  DDim GetInputsElementDim(const std::string &name, int idx) const;

  void SetOutputDim(const std::string &name, const DDim &dim);
  void SetOutputsDim(const std::string &name, const std::vector<DDim> &dims);
+  void SetReaderDims(const std::string &name, const std::vector<DDim> &dims);

  virtual AttrReader Attrs() const = 0;
  virtual const std::vector<std::string> &Inputs(
@@ -61,6 +62,9 @@ class InferShapeContext {
 protected:
  virtual DDim GetDim(const std::string &name) const = 0;
  virtual void SetDim(const std::string &name, const DDim &dim) = 0;
+  virtual std::vector<DDim> GetRepeatedDims(const std::string &name) const = 0;
+  virtual void SetRepeatedDims(const std::string &name,
+                               const std::vector<DDim> &dims) = 0;

  std::vector<DDim> GetDims(const std::vector<std::string> &names) const;
  std::vector<proto::VarDesc::VarType> GetVarTypes(

--- a/paddle/framework/var_desc.cc
+++ b/paddle/framework/var_desc.cc
@@ -57,10 +57,13 @@ size_t VarDesc::GetTensorDescNum() const {

 void VarDesc::SetShapes(
    const std::vector<std::vector<int64_t>> &multiple_dims) {
-  PADDLE_ENFORCE_EQ(multiple_dims.size(), GetTensorDescNum(),
-                    "The number of given shapes(%d) doesn't equal to the "
-                    "number of sub tensor.",
-                    multiple_dims.size(), GetTensorDescNum());
+  if (multiple_dims.size() != GetTensorDescNum()) {
+    VLOG(3) << "WARNING: The number of given shapes(" << multiple_dims.size()
+            << ") doesn't match the existing tensor number("
+            << GetTensorDescNum()
+            << "). The Reader is going to be reinitialized.";
+    SetTensorDescNum(multiple_dims.size());
+  }
  std::vector<proto::TensorDesc *> tensors = mutable_tensor_descs();
  for (size_t i = 0; i < multiple_dims.size(); ++i) {
    VectorToRepeated(multiple_dims[i], tensors[i]->mutable_dims());
@@ -87,10 +90,14 @@ void VarDesc::SetDataType(proto::DataType data_type) {

 void VarDesc::SetDataTypes(
    const std::vector<proto::DataType> &multiple_data_type) {
-  PADDLE_ENFORCE_EQ(multiple_data_type.size(), GetTensorDescNum(),
-                    "The number of given data types(%d) doesn't equal to the "
-                    "number of sub tensor.",
-                    multiple_data_type.size(), GetTensorDescNum());
+  if (multiple_data_type.size() != GetTensorDescNum()) {
+    VLOG(3) << "WARNING: The number of given data types("
+            << multiple_data_type.size()
+            << ") doesn't match the existing tensor number("
+            << GetTensorDescNum()
+            << "). The Reader is going to be reinitialized.";
+    SetTensorDescNum(multiple_data_type.size());
+  }
  std::vector<proto::TensorDesc *> tensor_descs = mutable_tensor_descs();
  for (size_t i = 0; i < multiple_data_type.size(); ++i) {
    tensor_descs[i]->set_data_type(multiple_data_type[i]);
@@ -127,10 +134,14 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
 }

 void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) {
-  PADDLE_ENFORCE_EQ(multiple_lod_level.size(), GetTensorDescNum(),
-                    "The number of given data types(%d) doesn't equal to the "
-                    "number of sub tensor.",
-                    multiple_lod_level.size(), GetTensorDescNum());
+  if (multiple_lod_level.size() != GetTensorDescNum()) {
+    VLOG(3) << "WARNING: The number of given lod_levels("
+            << multiple_lod_level.size()
+            << ") doesn't match the existing tensor number("
+            << GetTensorDescNum()
+            << "). The Reader is going to be reinitialized.";
+    SetTensorDescNum(multiple_lod_level.size());
+  }
  switch (desc_.type()) {
    case proto::VarDesc::READER: {
      size_t i = 0;

--- a/paddle/framework/var_type.h
+++ b/paddle/framework/var_type.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/framework/lod_rank_table.h"
 #include "paddle/framework/lod_tensor.h"
 #include "paddle/framework/lod_tensor_array.h"
+#include "paddle/framework/reader.h"
 #include "paddle/framework/selected_rows.h"
 #include "paddle/framework/variable.h"

@@ -31,6 +32,8 @@ inline proto::VarDesc::VarType ToVarType(std::type_index type) {
    return proto::VarDesc_VarType_LOD_TENSOR_ARRAY;
  } else if (type.hash_code() == typeid(SelectedRows).hash_code()) {
    return proto::VarDesc_VarType_SELECTED_ROWS;
+  } else if (type.hash_code() == typeid(ReaderHolder).hash_code()) {
+    return proto::VarDesc_VarType_READER;
  } else {
    PADDLE_THROW("ToVarType:Unsupported type %s", type.name());
  }
@@ -40,7 +43,7 @@ template <typename Visitor>
 inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
  switch (ToVarType(var.Type())) {
    case proto::VarDesc_VarType_LOD_TENSOR:
-      visitor(var.Get<framework::LoDTensor>());
+      visitor(var.Get<LoDTensor>());
      return;
    case proto::VarDesc_VarType_LOD_RANK_TABLE:
      visitor(var.Get<LoDRankTable>());
@@ -51,6 +54,9 @@ inline void VisitVarType(const framework::Variable& var, Visitor visitor) {
    case proto::VarDesc_VarType_SELECTED_ROWS:
      visitor(var.Get<SelectedRows>());
      return;
+    case proto::VarDesc_VarType_READER:
+      visitor(var.Get<ReaderHolder>());
+      return;
    default:
      PADDLE_THROW("Not supported visit type, %d", ToVarType(var.Type()));
  }

--- a/paddle/operators/CMakeLists.txt
+++ b/paddle/operators/CMakeLists.txt
@@ -62,7 +62,7 @@ function(op_library TARGET)
    endif()

    # Define operators that don't need pybind here.
-    foreach(manual_pybind_op "net_op" "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op")
+    foreach(manual_pybind_op "net_op" "compare_op" "logical_op" "nccl_op" "tensor_array_read_write_op" "create_reader_op")
        if ("${TARGET}" STREQUAL "${manual_pybind_op}")
            set(pybind_flag 1)
        endif()
@@ -155,6 +155,7 @@ op_library(recurrent_op DEPS executor)
 op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale math_function)
 op_library(cos_sim_op DEPS cos_sim_functor)
 op_library(parallel_do_op DEPS executor)
+op_library(create_reader_op DEPS reader)

 # Regist multiple Kernel to pybind
 if (WITH_GPU)
@@ -185,7 +186,7 @@ list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
 foreach(src ${GENERAL_OPS})
    op_library(${src})
 endforeach()
-file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\n")
+file(APPEND ${pybind_file} "USE_OP(less_than);\nUSE_OP(logical_and);\nUSE_NO_KERNEL_OP(read_from_array);\nUSE_NO_KERNEL_OP(create_random_data_generator);\n")

 set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library")


--- a/paddle/operators/create_reader_op.cc
+++ b/paddle/operators/create_reader_op.cc
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/framework/op_registry.h"
+#include "paddle/framework/reader.h"
+
+namespace paddle {
+namespace operators {
+
+static std::vector<framework::DDim> RestoreShapes(
+    const std::vector<int>& shape_concat, const std::vector<int>& ranks) {
+  std::vector<framework::DDim> res;
+  int offset = 0;
+  for (int len : ranks) {
+    auto start_it = shape_concat.begin() + offset;
+    auto end_it = start_it + len;
+    res.push_back(framework::make_ddim(std::vector<int>(start_it, end_it)));
+    offset += len;
+  }
+  return res;
+}
+
+// general infershape for file readers
+class CreateFileReaderInferShape : public framework::InferShapeBase {
+ public:
+  void operator()(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "The output file reader should not be null.");
+    const auto shape_concat =
+        ctx->Attrs().Get<std::vector<int>>("shape_concat");
+    const auto ranks = ctx->Attrs().Get<std::vector<int>>("ranks");
+    std::vector<framework::DDim> shapes = RestoreShapes(shape_concat, ranks);
+    ctx->SetReaderDims("Out", shapes);
+  }
+};
+
+// general infershape for decorated readers
+class CreateDecoratedReaderInferShape : public framework::InferShapeBase {
+ public:
+  void operator()(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("UnderlyingReader"),
+                   "Input(UnderlyingReader) should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "The output decorated reader should not be null.");
+    ctx->SetReaderDims("Out", ctx->GetReaderDims("UnderlyingReader"));
+  }
+};
+
+// general var type inference for all readers
+class CreateReaderInferVarType : public framework::VarTypeInference {
+ public:
+  void operator()(const framework::OpDesc& op_desc,
+                  framework::BlockDesc* block) const override {
+    std::string reader_name = op_desc.Output("Out")[0];
+    framework::VarDesc* reader = block->FindVarRecursive(reader_name);
+    reader->SetType(framework::proto::VarDesc::READER);
+  }
+};
+
+template <typename T>
+class CreateRandomDataGeneratorOp : public framework::OperatorBase {
+ public:
+  using framework::OperatorBase::OperatorBase;
+  void Run(const framework::Scope& scope,
+           const platform::Place& dev_place) const override {
+    const auto& shape_concat = Attr<std::vector<int>>("shape_concat");
+    const auto& ranks = Attr<std::vector<int>>("ranks");
+    PADDLE_ENFORCE(!shape_concat.empty() && !ranks.empty());
+    PADDLE_ENFORCE_EQ(std::accumulate(ranks.begin(), ranks.end(), 0),
+                      int(shape_concat.size()),
+                      "The accumulate of all ranks should be equal to the "
+                      "shape concat's length.");
+    std::vector<framework::DDim> shapes = RestoreShapes(shape_concat, ranks);
+    auto* out = scope.FindVar(Output("Out"))
+                    ->template GetMutable<framework::ReaderHolder>();
+    out->Reset(new framework::RandomDataGenerator<T>(shapes, Attr<float>("min"),
+                                                     Attr<float>("max")));
+  }
+};
+
+class CreateRandomDataGeneratorOpMaker
+    : public framework::OpProtoAndCheckerMaker {
+ public:
+  CreateRandomDataGeneratorOpMaker(OpProto* op_proto, OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(op_proto, op_checker) {
+    AddOutput("Out", "(ReaderHolder) The created random reader.");
+    AddAttr<std::vector<int>>("shape_concat",
+                              "The concat of all data's shapes.");
+    AddAttr<std::vector<int>>(
+        "ranks",
+        "The ranks of each data."
+        "e.g."
+        "shape_concat = [2,3,4,5,6]"
+        "ranks = [3,2]"
+        "It means the reader will generate two data each time,"
+        "whose shapes are [2,3,4] and [5,6] respectively.");
+    AddAttr<float>("min", "The lower bound of reader's uniform distribution.");
+    AddAttr<float>("max", "The upper bound of reader's uniform distribution.");
+    AddComment(R"DOC(
+      CreateRandomDataGenerator Operator
+
+      This Op creates a random reader. 
+      The reader generates random data instead of really reading from files.
+      Generated data follow an uniform distribution between 'min' and 'max'.
+    )DOC");
+  }
+};
+
+class CreateShuffleReaderOp : public framework::OperatorBase {
+ public:
+  using framework::OperatorBase::OperatorBase;
+  void Run(const framework::Scope& scope,
+           const platform::Place& dev_place) const override {
+    const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader"))
+                                        ->Get<framework::ReaderHolder>();
+    auto* out = scope.FindVar(Output("Out"))
+                    ->template GetMutable<framework::ReaderHolder>();
+    out->Reset(new framework::ShuffleReader(underlying_reader.Get(),
+                                            Attr<int>("buffer_size")));
+  }
+};
+
+class CreateShuffleReaderOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  CreateShuffleReaderOpMaker(OpProto* op_proto, OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(op_proto, op_checker) {
+    AddInput(
+        "UnderlyingReader",
+        "(ReaderHolder) The underlying reader for creating a shuffle reader.");
+    AddOutput("Out", "(ReaderHolder) The created shuffle reader.");
+    AddAttr<int>("buffer_size", "The shuffle buffer size.").GreaterThan(0);
+    AddComment(R"DOC(
+      CreateShuffleReader Operator
+
+      A shuffle reader takes another reader as its 'underlying reader'
+      and yields the underlying reader's outputs in a shuffled order. 
+    )DOC");
+  }
+};
+
+class CreateBatchReaderOp : public framework::OperatorBase {
+ public:
+  using framework::OperatorBase::OperatorBase;
+  void Run(const framework::Scope& scope,
+           const platform::Place& dev_place) const override {
+    const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader"))
+                                        ->Get<framework::ReaderHolder>();
+    auto* out = scope.FindVar(Output("Out"))
+                    ->template GetMutable<framework::ReaderHolder>();
+    out->Reset(new framework::BatchReader(underlying_reader.Get(),
+                                          Attr<int>("batch_size")));
+  }
+};
+
+class CreateBatchReaderOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  CreateBatchReaderOpMaker(OpProto* op_proto, OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(op_proto, op_checker) {
+    AddInput(
+        "UnderlyingReader",
+        "(ReaderHolder) The underlying reader for creating a batch reader.");
+    AddOutput("Out", "(ReaderHolder) The created batch reader.");
+    AddAttr<int>("batch_size",
+                 "How many instances the batch reader yields each time.")
+        .GreaterThan(0);
+    AddComment(R"DOC(
+      CreateBatchReader Operator
+
+      A batch reader takes another reader as its 'underlying reader', 
+      gathers the underlying reader's outputs and then yields them in batches. 
+    )DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(create_random_data_generator,
+                  ops::CreateRandomDataGeneratorOp<float>,
+                  ops::CreateFileReaderInferShape,
+                  ops::CreateRandomDataGeneratorOpMaker,
+                  paddle::framework::EmptyGradOpMaker,
+                  ops::CreateReaderInferVarType);
+REGISTER_OPERATOR(create_shuffle_reader, ops::CreateShuffleReaderOp,
+                  ops::CreateDecoratedReaderInferShape,
+                  ops::CreateShuffleReaderOpMaker,
+                  paddle::framework::EmptyGradOpMaker,
+                  ops::CreateReaderInferVarType);
+REGISTER_OPERATOR(create_batch_reader, ops::CreateBatchReaderOp,
+                  ops::CreateDecoratedReaderInferShape,
+                  ops::CreateBatchReaderOpMaker,
+                  paddle::framework::EmptyGradOpMaker,
+                  ops::CreateReaderInferVarType);
--- a/paddle/operators/read_op.cc
+++ b/paddle/operators/read_op.cc
+//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/framework/op_registry.h"
+#include "paddle/framework/reader.h"
+
+namespace paddle {
+namespace operators {
+
+class ReadInferShape : public framework::InferShapeBase {
+ public:
+  void operator()(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("Reader"),
+                   "The ReadOp must take a reader as input.");
+    PADDLE_ENFORCE(ctx->HasOutputs("Out"),
+                   "The ReadOp should be assigned with output.");
+    std::vector<framework::DDim> reader_dims = ctx->GetReaderDims("Reader");
+    std::vector<std::string> out_names = ctx->Outputs("Out");
+    PADDLE_ENFORCE_EQ(
+        reader_dims.size(), out_names.size(),
+        "The reader's dim number doesn't match the output number.");
+    ctx->SetOutputsDim("Out", reader_dims);
+  }
+};
+
+class ReadInferVarType : public framework::VarTypeInference {
+ public:
+  void operator()(const framework::OpDesc& op_desc,
+                  framework::BlockDesc* block) const override {
+    std::string reader_name = op_desc.Input("Reader")[0];
+    std::vector<std::string> out_names = op_desc.Output("Out");
+    framework::VarDesc* reader = block->FindVarRecursive(reader_name);
+    auto dtypes = reader->GetDataTypes();
+    PADDLE_ENFORCE_EQ(dtypes.size(), out_names.size());
+    for (size_t i = 0; i < dtypes.size(); ++i) {
+      framework::VarDesc& out = block->FindRecursiveOrCreateVar(out_names[i]);
+      out.SetType(framework::proto::VarDesc::LOD_TENSOR);
+      out.SetDataType(dtypes[i]);
+    }
+  }
+};
+
+class ReadOp : public framework::OperatorBase {
+ public:
+  using framework::OperatorBase::OperatorBase;
+  void Run(const framework::Scope& scope,
+           const platform::Place& dev_place) const override {
+    framework::ReaderHolder* reader =
+        scope.FindVar(Input("Reader"))->GetMutable<framework::ReaderHolder>();
+    if (!reader->HasNext()) {
+      reader->ReInit();
+      PADDLE_ENFORCE(
+          reader->HasNext(),
+          "Reader can not read the next data even it has been re-initialized.");
+    }
+    std::vector<std::string> out_arg_names = Outputs("Out");
+    std::vector<framework::LoDTensor> ins;
+    reader->ReadNext(&ins);
+    PADDLE_ENFORCE_EQ(ins.size(), out_arg_names.size());
+    for (size_t i = 0; i < ins.size(); ++i) {
+      auto* out =
+          scope.FindVar(out_arg_names[i])->GetMutable<framework::LoDTensor>();
+      out->ShareDataWith(ins[i]);
+      out->set_lod(ins[i].lod());
+    }
+  }
+};
+
+class ReadOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  ReadOpMaker(OpProto* op_proto, OpAttrChecker* op_checker)
+      : OpProtoAndCheckerMaker(op_proto, op_checker) {
+    AddInput("Reader", "(ReaderHolder) The executed reader.");
+    AddOutput("Out", "(LoDTensor) The output data.").AsDuplicable();
+    AddComment(R"DOC(
+      Read Operator
+
+      Execute a given reader once and output data.
+    )DOC");
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(read, ops::ReadOp, ops::ReadInferShape, ops::ReadOpMaker,
+                  paddle::framework::EmptyGradOpMaker, ops::ReadInferVarType);
--- a/paddle/pybind/protobuf.cc
+++ b/paddle/pybind/protobuf.cc
@@ -217,8 +217,6 @@ void BindVarDsec(py::module &m) {
      .def("set_shapes", &VarDesc::SetShapes)
      .def("set_dtype", &VarDesc::SetDataType)
      .def("set_dtypes", &VarDesc::SetDataTypes)
-      .def("set_tensor_num", &VarDesc::SetTensorDescNum)
-      .def("tensor_num", &VarDesc::GetTensorDescNum)
      .def("shape", &VarDesc::GetShape, py::return_value_policy::reference)
      .def("shapes", &VarDesc::GetShapes, py::return_value_policy::reference)
      .def("dtype", &VarDesc::GetDataType, py::return_value_policy::reference)

--- a/python/paddle/v2/fluid/tests/test_cpp_reader.py
+++ b/python/paddle/v2/fluid/tests/test_cpp_reader.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle.v2 as paddle
+import paddle.v2.fluid as fluid
+import numpy as np
+
+prog = fluid.framework.Program()
+block = prog.current_block()
+
+random_reader = block.create_var(
+    type=fluid.core.VarDesc.VarType.READER, name="RandomDataGenerator")
+random_reader.desc.set_lod_levels([0, 0])
+
+create_random_data_generator_op = block.append_op(
+    type="create_random_data_generator",
+    outputs={"Out": random_reader},
+    attrs={
+        "shape_concat": [1, 2, 1, 1],
+        "ranks": [2, 2],
+        "min": 0.0,
+        "max": 1.0
+    })
+
+out1 = block.create_var(
+    type=fluid.core.VarDesc.VarType.LOD_TENSOR,
+    name="Out1",
+    shape=[10, 2],
+    dtype="float32",
+    lod_level=1)
+out2 = block.create_var(
+    type=fluid.core.VarDesc.VarType.LOD_TENSOR,
+    name="Out2",
+    shape=[10, 1],
+    dtype="float32",
+    lod_level=1)
+
+read_op = block.append_op(
+    type="read",
+    inputs={"Reader": random_reader},
+    outputs={"Out": [out1, out2]})
+
+place = fluid.CPUPlace()
+exe = fluid.Executor(place)
+
+[res1, res2] = exe.run(prog, fetch_list=[out1, out2])
+
+if len(res1) == 0 or len(res2) == 0:
+    exit(1)
+
+exit(0)
--- a/python/paddle/v2/fluid/tests/test_protobuf_descs.py
+++ b/python/paddle/v2/fluid/tests/test_protobuf_descs.py
@@ -120,7 +120,6 @@ class TestVarDesc(unittest.TestCase):
        block = program_desc.block(0)
        var = block.var('my_reader')
        var.set_type(core.VarDesc.VarType.READER)
-        var.set_tensor_num(3)
        src_shapes = [[2, 3, 3], [4, 5], [6, 7, 8, 9]]
        var.set_shapes(src_shapes)
        res_shapes = var.shapes()
@@ -141,7 +140,6 @@ class TestVarDesc(unittest.TestCase):
        block = program_desc.block(0)
        var = block.var('my_reader')
        var.set_type(core.VarDesc.VarType.READER)
-        var.set_tensor_num(3)
        src_types = [
            core.DataType.INT32, core.DataType.FP64, core.DataType.FP32
        ]
@@ -154,7 +152,6 @@ class TestVarDesc(unittest.TestCase):
        block = program_desc.block(0)
        var = block.var('my_reader')
        var.set_type(core.VarDesc.VarType.READER)
-        var.set_tensor_num(3)
        src_types = [3, 1, 2]
        var.set_lod_levels(src_types)
        self.assertEqual(src_types, var.lod_levels())