From d9868b08392d831d7f4bd1a1a098217cc4573c8f Mon Sep 17 00:00:00 2001 From: fengjiayi Date: Mon, 19 Mar 2018 19:06:38 +0800 Subject: [PATCH] Add multi_pass_reader --- paddle/fluid/operators/reader/CMakeLists.txt | 1 + .../reader/create_multi_pass_reader_op.cc | 101 ++++++++++++++++++ 2 files changed, 102 insertions(+) create mode 100644 paddle/fluid/operators/reader/create_multi_pass_reader_op.cc diff --git a/paddle/fluid/operators/reader/CMakeLists.txt b/paddle/fluid/operators/reader/CMakeLists.txt index 744bd3b7ef7..fc7ef227f02 100644 --- a/paddle/fluid/operators/reader/CMakeLists.txt +++ b/paddle/fluid/operators/reader/CMakeLists.txt @@ -20,5 +20,6 @@ reader_library(create_shuffle_reader_op SRCS create_shuffle_reader_op.cc) reader_library(create_batch_reader_op SRCS create_batch_reader_op.cc) reader_library(create_recordio_file_reader_op SRCS create_recordio_file_reader_op.cc) reader_library(create_double_buffer_reader_op SRCS create_double_buffer_reader_op.cc) +reader_library(create_multi_pass_reader_op SRCS create_multi_pass_reader_op.cc) # Export local libraries to parent set(READER_LIBRARY ${LOCAL_READER_LIBS} PARENT_SCOPE) diff --git a/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc b/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc new file mode 100644 index 00000000000..4d4e9fb909e --- /dev/null +++ b/paddle/fluid/operators/reader/create_multi_pass_reader_op.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/detail/safe_ref.h" +#include "paddle/fluid/operators/reader/reader_op_registry.h" + +namespace paddle { +namespace operators { +namespace reader { + +class MultiPassReader : public framework::DecoratedReader { + public: + MultiPassReader(ReaderBase* reader, int pass_num) + : DecoratedReader(reader), pass_num_(pass_num), pass_count_(0) {} + + void ReadNext(std::vector* out) override { + if (!HasNext()) { + PADDLE_THROW("There is no next data!"); + } + reader_->ReadNext(out); + } + + bool HasNext() const override { + if (reader_->HasNext()) { + return true; + } else { + ++pass_count_; + if (pass_count_ >= pass_num_) { + return false; + } else { + reader_->ReInit(); + return true; + } + } + } + + void ReInit() override { + pass_count_ = 0; + reader_->ReInit(); + } + + private: + int pass_num_; + mutable int pass_count_; +}; + +class CreateMultiPassReaderOp : public framework::OperatorBase { + public: + using framework::OperatorBase::OperatorBase; + + private: + void RunImpl(const framework::Scope& scope, + const platform::Place& dev_place) const override { + const auto& underlying_reader = scope.FindVar(Input("UnderlyingReader")) + ->Get(); + auto& out = detail::Ref(scope.FindVar(Output("Out"))); + int pass_num = Attr("pass_num"); + out.GetMutable()->Reset( + new MultiPassReader(underlying_reader.Get(), pass_num)); + } +}; + +class CreateMultiPassReaderOpMaker : public DecoratedReaderMakerBase { + public: + CreateMultiPassReaderOpMaker(OpProto* op_proto, OpAttrChecker* op_checker) + : DecoratedReaderMakerBase(op_proto, op_checker) { + AddAttr("pass_num", "The number of pass to run.").GreaterThan(0); + AddComment(R"DOC( + CreateMultiPassReader Operator + + This operator creates a multi-pass reader. A multi-pass reader + is used to yield data for several pass training continuously. + It takes the the number of pass to run as one of its attributes + ('pass_num'), and maintains a pass counter to record how many + passes it has completed. When the underlying reader reach the EOF, + the multi-pass reader checks whether it has completed training + of the given number of pass. If not, the underlying reader will + be re-initialized and starts a new pass automatically. + )DOC"); + } +}; + +} // namespace reader +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators::reader; +REGISTER_DECORATED_READER_OPERATOR(create_multi_pass_reader, + ops::CreateMultiPassReaderOp, + ops::CreateMultiPassReaderOpMaker); -- GitLab