read_op.cc 7.7 KB
Newer Older
1
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
F
fengjiayi 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15
#include "paddle/fluid/framework/convert_utils.h"
Y
Yi Wang 已提交
16 17
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
18
#include "paddle/fluid/platform/profiler/event_tracing.h"
F
fengjiayi 已提交
19 20 21 22

namespace paddle {
namespace operators {

23 24 25
// Returns true if the two dimensions are compatible.
// A dimension is compatible with the other if:
// 1. The length of the dimensions are same.
T
tianshuo78520a 已提交
26 27
// 2. Each non-negative number of the two dimensions are same.
// 3. For negative number in a dimension, it means unknown so it is compatible
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
//    with any number.
bool DimensionIsCompatibleWith(const framework::DDim& first,
                               const framework::DDim& second) {
  int dim_size = first.size();
  if (dim_size != second.size()) {
    return false;
  }
  for (int i = 0; i < dim_size; ++i) {
    if (first[i] >= 0 && second[i] >= 0 && first[i] != second[i]) {
      return false;
    }
  }
  return true;
}

F
fengjiayi 已提交
43 44 45
class ReadInferShape : public framework::InferShapeBase {
 public:
  void operator()(framework::InferShapeContext* ctx) const override {
46 47
    OP_INOUT_CHECK(ctx->HasInput("Reader"), "Input", "Reader", "read");
    OP_INOUT_CHECK(ctx->HasOutputs("Out"), "Output", "Out", "read");
Q
Qiao Longfei 已提交
48 49 50 51
    if (!ctx->IsRuntime() && ctx->Attrs().Get<bool>("infer_out")) {
      std::vector<framework::DDim> reader_dims = ctx->GetReaderDims("Reader");
      std::vector<std::string> out_names = ctx->Outputs("Out");
      PADDLE_ENFORCE_EQ(
52 53
          reader_dims.size(),
          out_names.size(),
54 55
          platform::errors::InvalidArgument(
              "The reader's dim number doesn't match the output number."));
Q
Qiao Longfei 已提交
56
      ctx->SetOutputsDim("Out", reader_dims);
S
sneaxiy 已提交
57
      auto in_desc =
R
Ruibiao Chen 已提交
58
          PADDLE_GET(framework::VarDesc*, ctx->GetInputVarPtrs("Reader")[0]);
S
sneaxiy 已提交
59 60
      auto in_lod_levels = in_desc->GetLoDLevels();
      auto out_var_ptrs = ctx->GetOutputVarPtrs("Out");
61
      PADDLE_ENFORCE_EQ(
62 63
          in_lod_levels.size(),
          out_var_ptrs.size(),
64 65 66
          platform::errors::InvalidArgument(
              "LoDLevels of Input(Reader) must be the same as the "
              "number of Outputs(Out)."));
S
sneaxiy 已提交
67
      for (size_t i = 0; i < out_var_ptrs.size(); ++i) {
R
Ruibiao Chen 已提交
68
        auto* out_desc = PADDLE_GET(framework::VarDesc*, out_var_ptrs[i]);
S
sneaxiy 已提交
69 70 71
        out_desc->SetLoDLevel(in_lod_levels[i]);
      }
    }
F
fengjiayi 已提交
72 73 74
  }
};

75
class ReadInferVarType : public framework::StaticGraphVarTypeInference {
F
fengjiayi 已提交
76
 public:
M
minqiyang 已提交
77
  void operator()(framework::InferVarTypeContext* ctx) const override {
R
Ruibiao Chen 已提交
78
    bool infer_out = PADDLE_GET_CONST(bool, ctx->GetAttr("infer_out"));
Q
Qiao Longfei 已提交
79
    if (infer_out) {
80 81 82
      std::string reader_name = Input(ctx, "Reader")[0];
      auto& out_names = Output(ctx, "Out");
      auto dtypes = GetDataTypes(ctx, reader_name);
83 84
      PADDLE_ENFORCE_EQ(dtypes.size(),
                        out_names.size(),
85 86 87
                        platform::errors::InvalidArgument(
                            "The number of input reader's dtypes do not match "
                            "the output variable number."));
Q
Qiao Longfei 已提交
88
      for (size_t i = 0; i < dtypes.size(); ++i) {
89 90
        SetType(ctx, out_names[i], framework::proto::VarType::LOD_TENSOR);
        SetDataType(ctx, out_names[i], dtypes[i]);
Q
Qiao Longfei 已提交
91
      }
F
fengjiayi 已提交
92 93 94 95 96 97 98
    }
  }
};

class ReadOp : public framework::OperatorBase {
 public:
  using framework::OperatorBase::OperatorBase;
99 100 101 102

 private:
  void RunImpl(const framework::Scope& scope,
               const platform::Place& dev_place) const override {
Q
Qiao Longfei 已提交
103
    VLOG(3) << "read op in";
F
fengjiayi 已提交
104
    framework::ReaderHolder* reader =
105 106
        GET_DATA_SAFELY(
            scope.FindVar(Input("Reader")), "Input", "Reader", "Read")
Y
Yu Yang 已提交
107
            .GetMutable<framework::ReaderHolder>();
F
fengjiayi 已提交
108
    std::vector<std::string> out_arg_names = Outputs("Out");
109
    paddle::framework::LoDTensorArray ins;
W
Wu Yi 已提交
110 111

    // For profiling
112 113
    platform::RecordEvent record_event(
        Type().c_str(), platform::TracerEventType::UserDefined, 1);
W
Wu Yi 已提交
114

F
fengjiayi 已提交
115
    reader->ReadNext(&ins);
116
    if (ins.empty()) {
C
chengduo 已提交
117 118
      VLOG(3) << "throw_eof_exp";
      PADDLE_THROW_EOF();
119
    }
120
    PADDLE_ENFORCE_EQ(
121 122
        ins.size(),
        out_arg_names.size(),
123 124
        platform::errors::InvalidArgument("input data number and output data "
                                          "number of read_op do not match"));
125 126 127 128 129

    const std::vector<framework::DDim>& shapes = reader->Shapes();
    const std::vector<framework::proto::VarType::Type>& var_types =
        reader->VarTypes();
    const std::vector<bool>& need_check_feed = reader->NeedCheckFeed();
130
    PADDLE_ENFORCE_EQ(
131 132
        out_arg_names.size(),
        need_check_feed.size(),
133 134 135 136
        platform::errors::InvalidArgument(
            "Output size of read_op and the number of fed "
            "variables of reader do not match. Received size of output is %d, "
            "number of fed variables of reader is %d",
137 138
            out_arg_names.size(),
            need_check_feed.size()));
139

140
    for (size_t i = 0; i < out_arg_names.size(); ++i) {
F
fengjiayi 已提交
141 142
      auto* out =
          scope.FindVar(out_arg_names[i])->GetMutable<framework::LoDTensor>();
143 144
      if (need_check_feed[i]) {
        auto in_dims = ins[i].dims();
145
        PADDLE_ENFORCE_EQ(
146 147
            DimensionIsCompatibleWith(shapes[i], in_dims),
            true,
148 149 150
            platform::errors::InvalidArgument(
                "The fed Variable %s should have dimensions = %d, "
                "shape = [%s], but received fed shape [%s]",
151 152 153 154
                out_arg_names[i],
                shapes[i].size(),
                shapes[i],
                in_dims));
155
        PADDLE_ENFORCE_EQ(
156 157
            framework::TransToProtoVarType(ins[i].dtype()),
            var_types[i],
158 159
            platform::errors::InvalidArgument(
                "The data type of fed Variable %s must be %s, but received %s",
160 161 162
                out_arg_names[i],
                var_types[i],
                ins[i].type()));
163
      }
F
fengjiayi 已提交
164 165 166 167 168 169 170 171
      out->ShareDataWith(ins[i]);
      out->set_lod(ins[i].lod());
    }
  }
};

class ReadOpMaker : public framework::OpProtoAndCheckerMaker {
 public:
Y
Yu Yang 已提交
172
  void Make() override {
F
fengjiayi 已提交
173 174
    AddInput("Reader", "(ReaderHolder) The executed reader.");
    AddOutput("Out", "(LoDTensor) The output data.").AsDuplicable();
Y
yuyang18 已提交
175 176 177 178 179 180 181
    AddAttr<bool>(
        "throw_eof_exp",
        "If set true, an exception will be thrown when the Reader "
        "yields empty (which means there is no next data).\n"
        "NOTES: This flag must be true always. It will be set to false"
        " only when the data-balance is enabled in ParallelExecutor"
        " and it is set by ParallelExecutor instance, not users.")
F
fengjiayi 已提交
182
        .SetDefault(true);
Q
Qiao Longfei 已提交
183
    AddAttr<bool>("infer_out", "").SetDefault(true);
184
    AddAttr<bool>("drop_last",
185 186
                  "Whether to drop last batches whose number is less than "
                  "actual used device number.")
187
        .SetDefault(true);
F
fengjiayi 已提交
188 189 190 191
    AddComment(R"DOC(
      Read Operator

      Execute a given reader once and output data.
F
fengjiayi 已提交
192
    )DOC");
F
fengjiayi 已提交
193 194 195 196
  }
};

}  // namespace operators
F
fengjiayi 已提交
197 198 199
}  // namespace paddle

namespace ops = paddle::operators;
H
hong 已提交
200
REGISTER_OPERATOR(
201 202 203 204
    read,
    ops::ReadOp,
    ops::ReadInferShape,
    ops::ReadOpMaker,
H
hong 已提交
205 206 207
    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
    ops::ReadInferVarType);