reader.h 5.1 KB
Newer Older
F
fengjiayi 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

Y
Yi Wang 已提交
17 18
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
F
fengjiayi 已提交
19
#include "paddle/fluid/framework/threadpool.h"
F
fengjiayi 已提交
20 21 22 23

namespace paddle {
namespace framework {

F
fengjiayi 已提交
24 25
static constexpr size_t kDoubleBufferSize = 3;

F
fengjiayi 已提交
26
class ReaderBase {
F
fengjiayi 已提交
27
 public:
F
fengjiayi 已提交
28 29 30 31
  explicit ReaderBase(const std::vector<DDim>& shapes) : shapes_(shapes) {
    PADDLE_ENFORCE(!shapes_.empty());
  }
  virtual void ReadNext(std::vector<LoDTensor>* out) = 0;
F
fengjiayi 已提交
32 33
  virtual bool HasNext() const = 0;

F
fengjiayi 已提交
34 35
  virtual void ReInit() = 0;

F
fengjiayi 已提交
36 37 38
  DDim shape(size_t idx) const;
  std::vector<DDim> shapes() const { return shapes_; }
  void set_shapes(const std::vector<DDim>& shapes) { shapes_ = shapes; }
F
fengjiayi 已提交
39

F
fengjiayi 已提交
40
  virtual ~ReaderBase() {}
F
fengjiayi 已提交
41 42 43

 protected:
  std::vector<DDim> shapes_;
F
fengjiayi 已提交
44
};
F
fengjiayi 已提交
45

F
fengjiayi 已提交
46 47
class FileReader : public ReaderBase {
 public:
F
fengjiayi 已提交
48
  explicit FileReader(const std::vector<DDim>& shapes) : ReaderBase(shapes) {}
F
fengjiayi 已提交
49 50
};

F
fengjiayi 已提交
51
class DecoratedReader : public ReaderBase {
F
fengjiayi 已提交
52
 public:
F
fengjiayi 已提交
53 54
  explicit DecoratedReader(ReaderBase* reader)
      : ReaderBase(reader->shapes()), reader_(reader) {
F
fengjiayi 已提交
55 56 57
    PADDLE_ENFORCE_NOT_NULL(reader_);
  }

F
fengjiayi 已提交
58 59
  bool HasNext() const override { return reader_->HasNext(); }

F
fengjiayi 已提交
60 61
  void ReInit() override { reader_->ReInit(); }

F
fengjiayi 已提交
62 63 64 65
 protected:
  ReaderBase* reader_;
};

F
fengjiayi 已提交
66 67
// file readers

F
fengjiayi 已提交
68
template <typename T>
F
fengjiayi 已提交
69
class RandomDataGenerator : public FileReader {
F
fengjiayi 已提交
70
 public:
F
fengjiayi 已提交
71
  RandomDataGenerator(const std::vector<DDim>& shapes, float min, float max)
F
fengjiayi 已提交
72
      : FileReader(shapes), min_(min), max_(max) {
F
fengjiayi 已提交
73 74
    PADDLE_ENFORCE_LE(
        min, max, "'min' shouldn't be greater than 'max'.(%f vs %f)", min, max);
F
fengjiayi 已提交
75 76 77
    unsigned int seed = std::random_device()();
    engine_.seed(seed);
    dist_ = std::uniform_real_distribution<float>(min_, max_);
F
fengjiayi 已提交
78 79
  }

F
fengjiayi 已提交
80 81 82
  void ReadNext(std::vector<LoDTensor>* out) override {
    out->clear();
    out->reserve(shapes_.size());
F
fengjiayi 已提交
83
    for (const DDim& shape : shapes_) {
F
fengjiayi 已提交
84 85
      PADDLE_ENFORCE_GE(
          shape.size(), 2,
F
fengjiayi 已提交
86
          "The rank of reader's output data should be 2 at least.(Now it's %d)",
F
fengjiayi 已提交
87
          shape.size());
F
fengjiayi 已提交
88 89 90
      LoDTensor out_tensor;
      out_tensor.Resize(shape);
      T* data = out_tensor.mutable_data<T>(platform::CPUPlace());
F
fengjiayi 已提交
91 92
      int64_t numel = product(shape);
      for (int64_t i = 0; i < numel; ++i) {
F
fengjiayi 已提交
93
        data[i] = dist_(engine_);
F
fengjiayi 已提交
94
      }
F
fengjiayi 已提交
95
      out->push_back(out_tensor);
F
fengjiayi 已提交
96 97 98 99
    }
  }

  bool HasNext() const override { return true; }
F
fengjiayi 已提交
100

F
fengjiayi 已提交
101 102
  void ReInit() override { return; }

F
fengjiayi 已提交
103 104 105
 private:
  float min_;
  float max_;
F
fengjiayi 已提交
106 107
  std::minstd_rand engine_;
  std::uniform_real_distribution<float> dist_;
F
fengjiayi 已提交
108 109
};

F
fengjiayi 已提交
110
// decorated readers
F
fengjiayi 已提交
111

F
fengjiayi 已提交
112
class ShuffleReader : public DecoratedReader {
F
fengjiayi 已提交
113
 public:
F
fengjiayi 已提交
114
  ShuffleReader(ReaderBase* reader, int buffer_size)
F
fengjiayi 已提交
115
      : DecoratedReader(reader), buffer_size_(buffer_size), iteration_pos_(0) {
F
fengjiayi 已提交
116 117
    buffer_.reserve(buffer_size);
  }
F
fengjiayi 已提交
118

F
fengjiayi 已提交
119
  void ReadNext(std::vector<LoDTensor>* out) override;
F
fengjiayi 已提交
120 121

 private:
F
fengjiayi 已提交
122
  int buffer_size_;
F
fengjiayi 已提交
123
  std::vector<std::vector<LoDTensor>> buffer_;
F
fengjiayi 已提交
124
  size_t iteration_pos_;
F
fengjiayi 已提交
125 126
};

F
fengjiayi 已提交
127
class BatchReader : public DecoratedReader {
F
fengjiayi 已提交
128
 public:
F
fengjiayi 已提交
129
  BatchReader(ReaderBase* reader, int batch_size)
F
fengjiayi 已提交
130
      : DecoratedReader(reader), batch_size_(batch_size) {
F
fengjiayi 已提交
131 132 133
    buffer_.reserve(batch_size_);
  }

F
fengjiayi 已提交
134
  void ReadNext(std::vector<LoDTensor>* out) override;
F
fengjiayi 已提交
135 136

 private:
F
fengjiayi 已提交
137
  int batch_size_;
F
fengjiayi 已提交
138
  std::vector<std::vector<LoDTensor>> buffer_;
F
fengjiayi 已提交
139
};
F
fengjiayi 已提交
140

F
fengjiayi 已提交
141 142
class DoubleBufferReader : public DecoratedReader {
 public:
F
fengjiayi 已提交
143
  explicit DoubleBufferReader(ReaderBase* reader)
F
fengjiayi 已提交
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
      : DecoratedReader(reader), buffer_(kDoubleBufferSize) {
    framework::Async(std::bind(&DoubleBufferReader::ProducerThreadFunc, this));
  }

  void ReadNext(std::vector<LoDTensor>* out) override;
  bool HasNext() const override;

 private:
  void ProducerThreadFunc();

  std::vector<std::vector<LoDTensor>> buffer_;
  size_t write_pos_;
  size_t read_pos_;

  std::mutex mtx_;
  std::condition_variable buffer_not_full_;
  std::condition_variable buffer_not_empty_;
};

F
fengjiayi 已提交
163 164
// The ReaderHolder is used as readers' unified wrapper,
// making it easier to access different type readers in Variables.
F
fengjiayi 已提交
165 166 167 168 169 170
class ReaderHolder {
 public:
  void Reset(ReaderBase* reader) { reader_.reset(reader); }

  ReaderBase* Get() const { return reader_.get(); }

F
fengjiayi 已提交
171
  void ReadNext(std::vector<LoDTensor>* out) { reader_->ReadNext(out); }
F
fengjiayi 已提交
172
  bool HasNext() const { return reader_->HasNext(); }
F
fengjiayi 已提交
173
  void ReInit() { reader_->ReInit(); }
F
fengjiayi 已提交
174 175 176

  DDim shape(size_t idx) const { return reader_->shape(idx); }
  std::vector<DDim> shapes() const { return reader_->shapes(); }
F
fengjiayi 已提交
177 178 179
  void set_shapes(const std::vector<DDim>& shapes) {
    reader_->set_shapes(shapes);
  }
F
fengjiayi 已提交
180 181 182 183 184

 private:
  std::unique_ptr<ReaderBase> reader_;
};

F
fengjiayi 已提交
185 186
}  // namespace framework
}  // namespace paddle