create_ctr_reader_op.cc 3.9 KB
Newer Older
Q
Qiao Longfei 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/operators/reader/ctr_reader.h"

#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/operators/reader/reader_op_registry.h"

namespace paddle {
namespace operators {
namespace reader {

class CreateCTRReaderOp : public framework::OperatorBase {
 public:
  using framework::OperatorBase::OperatorBase;

 private:
  void RunImpl(const framework::Scope& scope,
               const platform::Place& dev_place) const override {
    auto* out = scope.FindVar(Output("Out"))
                    ->template GetMutable<framework::ReaderHolder>();
    if (out->Get() != nullptr) return;

    const std::string& queue_name = Input("blocking_queue");
    auto* queue_holder_var = scope.FindVar(queue_name);
    PADDLE_ENFORCE_NOT_NULL(
        queue_holder_var,
39 40 41
        platform::errors::PreconditionNotMet(
            "No LoDTensorBlockingQueueHolder variable with name %s found",
            queue_name));
Q
Qiao Longfei 已提交
42 43 44
    auto* queue_holder =
        queue_holder_var->template GetMutable<LoDTensorBlockingQueueHolder>();

Q
Qiao Longfei 已提交
45 46
    auto thread_num = Attr<int>("thread_num");
    auto sparse_slots = Attr<std::vector<std::string>>("sparse_slots");
Q
Qiao Longfei 已提交
47 48
    auto dense_slot_index = Attr<std::vector<int>>("dense_slot_index");
    auto sparse_slot_index = Attr<std::vector<int>>("sparse_slot_index");
Q
Qiao Longfei 已提交
49 50 51 52
    auto batch_size = Attr<int>("batch_size");
    auto file_type = Attr<std::string>("file_type");
    auto file_format = Attr<std::string>("file_format");
    auto file_list = Attr<std::vector<std::string>>("file_list");
Q
Qiao Longfei 已提交
53 54
    DataDesc data_desc(batch_size, file_list, file_type, file_format,
                       dense_slot_index, sparse_slot_index, sparse_slots);
Q
Qiao Longfei 已提交
55
    VLOG(1) << data_desc;
Q
Qiao Longfei 已提交
56 57
    out->Reset(std::make_shared<CTRReader>(queue_holder->GetQueue(), thread_num,
                                           data_desc));
Q
Qiao Longfei 已提交
58 59 60 61 62 63 64 65
  }
};

class CreateCTRReaderOpMaker : public FileReaderMakerBase {
 protected:
  void Apply() override {
    AddInput("blocking_queue",
             "Name of the `LoDTensorBlockingQueueHolder` variable");
Q
Qiao Longfei 已提交
66 67
    AddAttr<int>("thread_num", "the thread num to read data");
    AddAttr<int>("batch_size", "the batch size of read data");
Q
Qiao Longfei 已提交
68 69
    AddAttr<std::string>("file_type", "plain or gzip").SetDefault("plain");
    AddAttr<std::string>("file_format", "svm or csv").SetDefault("csv");
Q
Qiao Longfei 已提交
70 71
    AddAttr<std::vector<std::string>>("file_list",
                                      "The list of files that need to read");
Q
Qiao Longfei 已提交
72 73
    AddAttr<std::vector<int>>(
        "dense_slot_index",
Q
Qiao Longfei 已提交
74
        "the dense slots id that should be extract from file")
Q
Qiao Longfei 已提交
75
        .SetDefault({});
Q
Qiao Longfei 已提交
76
    AddAttr<std::vector<int>>(
Q
Qiao Longfei 已提交
77
        "sparse_slot_index",
Q
Qiao Longfei 已提交
78 79 80 81 82 83
        "the sparse slots id that should be extract from file")
        .SetDefault({});
    AddAttr<std::vector<std::string>>("sparse_slots",
                                      "the sparse slots id that should be "
                                      "extract from file, used when file "
                                      "format is svm");
Q
Qiao Longfei 已提交
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98

    AddComment(R"DOC(
			Create CTRReader to support read ctr data with cpp.
      )DOC");
  }
};

}  // namespace reader
}  // namespace operators
}  // namespace paddle

namespace reader = ::paddle::operators::reader;

REGISTER_FILE_READER_OPERATOR(create_ctr_reader, reader::CreateCTRReaderOp,
                              reader::CreateCTRReaderOpMaker);