diff --git a/doc/fluid/dev/api_doc_std_cn.md b/doc/fluid/dev/api_doc_std_cn.md index b50f18f21df0787b9761bf0935ed7f4384ff0f98..7d39b8de1e6dc502ffea5f7882bd6a42b1ed6549 100644 --- a/doc/fluid/dev/api_doc_std_cn.md +++ b/doc/fluid/dev/api_doc_std_cn.md @@ -1,8 +1,9 @@ # API注释撰写标准 -- [API注释模块](#API注释模块) -- [格式及示例](#格式及示例) -- [完整示例](#完整示例) +- [API注释撰写标准](#api) + - [API注释模块](#api) + - [格式及示例](#) + - [完整示例](#) ## API注释模块 @@ -217,4 +218,4 @@ API文档须使用reStructuredText格式撰写,该格式详情请参考[链接 ## 完整示例 -fc 的完整注释见[示例](src/fc.py)。 +fc 的完整注释见[示例](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/dev/src/fc.py)。 diff --git a/doc/fluid/dev/api_doc_std_en.md b/doc/fluid/dev/api_doc_std_en.md index e57072d52fd162e92a3482aef33f99ab9394c532..f175b219750d1c765a6a111c2ec3aa732fa46175 100644 --- a/doc/fluid/dev/api_doc_std_en.md +++ b/doc/fluid/dev/api_doc_std_en.md @@ -1,8 +1,9 @@ # API Doc Standard -- [API Doc Structure](#API Doc Structure) -- [Format and Examples](#Format and Examples) -- [Complete Example](#Complete Example) +- [API Doc Standard](#api-doc-standard) + - [API Doc Structure](#api-doc-structure) + - [Format and Examples](#format-and-examples) + - [Complete Example](#complete-example) ## API Doc Structure @@ -223,4 +224,4 @@ Format and examples of each part of API documantation are as follows: (take fc f ## Complete Example -Complete Example of fc please see [here](src/fc.py)。 +Complete Example of fc please see [here](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/dev/src/fc.py)。 diff --git a/paddle/contrib/inference/test_paddle_inference_api_impl.cc b/paddle/contrib/inference/test_paddle_inference_api_impl.cc index 4b6cb7b051d1ad2c63e895017c7faf1245c22612..5d843010e02b09087e6b328428e80fb40eb5bb97 100644 --- a/paddle/contrib/inference/test_paddle_inference_api_impl.cc +++ b/paddle/contrib/inference/test_paddle_inference_api_impl.cc @@ -109,7 +109,6 @@ void MainWord2Vec(bool use_gpu) { void MainImageClassification(bool use_gpu) { int batch_size = 2; - bool use_mkldnn = false; bool repeat = false; NativeConfig config = GetConfig(); config.use_gpu = use_gpu; @@ -134,12 +133,8 @@ void MainImageClassification(bool use_gpu) { std::vector cpu_fetchs1; cpu_fetchs1.push_back(&output1); - TestInference(config.model_dir, - cpu_feeds, - cpu_fetchs1, - repeat, - is_combined, - use_mkldnn); + TestInference( + config.model_dir, cpu_feeds, cpu_fetchs1, repeat, is_combined); auto predictor = CreatePaddlePredictor(config); std::vector paddle_tensor_feeds; diff --git a/paddle/fluid/framework/data_type.cc b/paddle/fluid/framework/data_type.cc index b6b93cf422a60c1d8e9cb8b477efd562f9fe4758..60382faffb8e53870658b2d1ff83abc4008cb4cf 100644 --- a/paddle/fluid/framework/data_type.cc +++ b/paddle/fluid/framework/data_type.cc @@ -28,6 +28,9 @@ struct DataTypeMap { }; static DataTypeMap* InitDataTypeMap(); +// C++11 removes the need for manual locking. Concurrent execution shall wait if +// a static local variable is already being initialized. +// https://stackoverflow.com/questions/11711920/how-to-implement-multithread-safe-singleton-in-c11-without-using-mutex static DataTypeMap& gDataTypeMap() { static DataTypeMap* g_data_type_map_ = InitDataTypeMap(); return *g_data_type_map_; diff --git a/paddle/fluid/framework/details/fuse_vars_op_handle.cc b/paddle/fluid/framework/details/fuse_vars_op_handle.cc index 32415c192f0be51bf0850fe533c212c635779a30..018c9bff71e553d8a3641f06f10b350453676b24 100644 --- a/paddle/fluid/framework/details/fuse_vars_op_handle.cc +++ b/paddle/fluid/framework/details/fuse_vars_op_handle.cc @@ -42,7 +42,7 @@ void FuseVarsOpHandle::RunImpl() { out_t->ShareDataWith(out_tensor->Slice(s, s + numel)); s += numel; } - this->RunAndRecordEvent([this] {}); + this->RunAndRecordEvent([] {}); } std::string FuseVarsOpHandle::Name() const { return "fuse vars"; } diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 3d68c5fb870d5b575f97eeb286528544402b8ed9..d4d6c34108b9f1e457d8eb0c36d10339b03330bd 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -24,6 +24,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler.h" DECLARE_bool(benchmark); +DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run"); namespace paddle { namespace framework { @@ -115,6 +116,7 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope, void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, bool create_local_scope, bool create_vars) { platform::RecordBlock b(block_id); + if (FLAGS_use_mkldnn) EnableMKLDNN(pdesc); auto ctx = Prepare(pdesc, block_id); RunPreparedContext(ctx.get(), scope, create_local_scope, create_vars); } @@ -214,6 +216,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, const std::string& feed_holder_name, const std::string& fetch_holder_name) { platform::RecordBlock b(kProgramId); + if (FLAGS_use_mkldnn) EnableMKLDNN(program); bool has_feed_ops = has_feed_operators(program.Block(0), *feed_targets, feed_holder_name); bool has_fetch_ops = @@ -225,7 +228,6 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, unique_ptr_of_copy_program.reset(new ProgramDesc(program)); copy_program = unique_ptr_of_copy_program.get(); } - auto* global_block = copy_program->MutableBlock(0); if (!has_feed_ops) { @@ -378,5 +380,19 @@ void Executor::RunPreparedContext( } } +void Executor::EnableMKLDNN(const ProgramDesc& program) { +#ifdef PADDLE_WITH_MKLDNN + VLOG(3) << "use_mkldnn=True"; + for (size_t bid = 0; bid < program.Size(); ++bid) { + auto* block = const_cast(program).MutableBlock(bid); + for (auto* op : block->AllOps()) { + if (op->HasAttr("use_mkldnn")) { + op->SetAttr("use_mkldnn", true); + } + } + } +#endif +} + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h index 0c3c23611d95e0da67cabfb8fb2755a4a52c991b..e6f9c3d31c18f762ef2de269977e0642a79fb174 100644 --- a/paddle/fluid/framework/executor.h +++ b/paddle/fluid/framework/executor.h @@ -81,6 +81,8 @@ class Executor { const std::string& feed_holder_name = "feed", const std::string& fetch_holder_name = "fetch"); + void EnableMKLDNN(const ProgramDesc& program); + private: const platform::Place place_; }; diff --git a/paddle/fluid/inference/tests/book/test_inference_image_classification.cc b/paddle/fluid/inference/tests/book/test_inference_image_classification.cc index 987da18116cc6f4902bd66ae317f2470a8bc5057..60c761c5281e2f535aab0200c93fb738addcdb87 100644 --- a/paddle/fluid/inference/tests/book/test_inference_image_classification.cc +++ b/paddle/fluid/inference/tests/book/test_inference_image_classification.cc @@ -21,7 +21,6 @@ DEFINE_string(fp16_dirname, "", "Directory of the float16 inference model."); DEFINE_int32(batch_size, 1, "Batch size of input data"); DEFINE_int32(repeat, 1, "Running the inference program repeat times"); DEFINE_bool(skip_cpu, false, "Skip the cpu test"); -DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference"); TEST(inference, image_classification) { if (FLAGS_dirname.empty() || FLAGS_batch_size < 1 || FLAGS_repeat < 1) { @@ -59,10 +58,8 @@ TEST(inference, image_classification) { // Run inference on CPU LOG(INFO) << "--- CPU Runs: ---"; LOG(INFO) << "Batch size is " << FLAGS_batch_size; - LOG(INFO) << "FLAGS_use_mkldnn: " << FLAGS_use_mkldnn; TestInference( - dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined, - FLAGS_use_mkldnn); + dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined); LOG(INFO) << output1.dims(); } diff --git a/paddle/fluid/inference/tests/book/test_inference_nlp.cc b/paddle/fluid/inference/tests/book/test_inference_nlp.cc index a0e83a17058a4edcb8f23f23ce155e644ae0cf3b..9dcd79c3bb9ed713ff0f12024969cc5798750988 100644 --- a/paddle/fluid/inference/tests/book/test_inference_nlp.cc +++ b/paddle/fluid/inference/tests/book/test_inference_nlp.cc @@ -27,7 +27,6 @@ limitations under the License. */ DEFINE_string(model_path, "", "Directory of the inference model."); DEFINE_string(data_file, "", "File of input index data."); DEFINE_int32(repeat, 100, "Running the inference program repeat times"); -DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); DEFINE_int32(num_threads, 1, "Number of threads should be used"); @@ -190,9 +189,6 @@ TEST(inference, nlp) { std::unique_ptr inference_program; inference_program = InitProgram(&executor, scope.get(), FLAGS_model_path, /*model combined*/ false); - if (FLAGS_use_mkldnn) { - EnableMKLDNN(inference_program); - } // always prepare context std::unique_ptr ctx; ctx = executor.Prepare(*inference_program, 0); diff --git a/paddle/fluid/inference/tests/test_helper.h b/paddle/fluid/inference/tests/test_helper.h index 01b8dc0be662da22fe15a79cd9abfe5fa92c9577..44c36b1683b037832a218df02184e7cd2ba143e9 100644 --- a/paddle/fluid/inference/tests/test_helper.h +++ b/paddle/fluid/inference/tests/test_helper.h @@ -22,6 +22,8 @@ limitations under the License. */ #include "paddle/fluid/inference/io.h" #include "paddle/fluid/platform/profiler.h" +DECLARE_bool(use_mkldnn); + template void SetupTensor(paddle::framework::LoDTensor* input, paddle::framework::DDim dims, T lower, T upper) { @@ -133,24 +135,11 @@ std::vector> GetFeedTargetShapes( return feed_target_shapes; } -void EnableMKLDNN( - const std::unique_ptr& program) { - for (size_t bid = 0; bid < program->Size(); ++bid) { - auto* block = program->MutableBlock(bid); - for (auto* op : block->AllOps()) { - if (op->HasAttr("use_mkldnn")) { - op->SetAttr("use_mkldnn", true); - } - } - } -} - template void TestInference(const std::string& dirname, const std::vector& cpu_feeds, const std::vector& cpu_fetchs, - const int repeat = 1, const bool is_combined = false, - const bool use_mkldnn = false) { + const int repeat = 1, const bool is_combined = false) { // 1. Define place, executor, scope auto place = Place(); auto executor = paddle::framework::Executor(place); @@ -182,9 +171,6 @@ void TestInference(const std::string& dirname, "init_program", paddle::platform::DeviceContextPool::Instance().Get(place)); inference_program = InitProgram(&executor, scope, dirname, is_combined); - if (use_mkldnn) { - EnableMKLDNN(inference_program); - } } // Disable the profiler and print the timing information paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault, @@ -210,7 +196,10 @@ void TestInference(const std::string& dirname, fetch_targets[fetch_target_names[i]] = cpu_fetchs[i]; } - // 6. Run the inference program + // 6. If export Flags_use_mkldnn=True, use mkldnn related ops. + if (FLAGS_use_mkldnn) executor.EnableMKLDNN(*inference_program); + + // 7. Run the inference program { if (!CreateVars) { // If users don't want to create and destroy variables every time they diff --git a/paddle/fluid/operators/arg_max_op.cc b/paddle/fluid/operators/arg_max_op.cc index 859cccd1b2dfcc8b2278e11157468095fd6a396d..8174d3735859b1fac40cd4c07545f34874d31ab7 100644 --- a/paddle/fluid/operators/arg_max_op.cc +++ b/paddle/fluid/operators/arg_max_op.cc @@ -12,24 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/arg_max_op.h" +#include "paddle/fluid/operators/arg_min_max_op_base.h" -REGISTER_OPERATOR(arg_max, paddle::operators::ArgMaxOp, +REGISTER_OPERATOR(arg_max, paddle::operators::ArgMinMaxOp, paddle::operators::ArgMaxOpMaker, paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL( - arg_max, paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, + paddle::operators::ArgMaxKernel, + paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel); + paddle::operators::ArgMaxKernel, + paddle::operators::ArgMaxKernel, + paddle::operators::ArgMaxKernel, + paddle::operators::ArgMaxKernel); diff --git a/paddle/fluid/operators/arg_max_op.cu b/paddle/fluid/operators/arg_max_op.cu index c9c102bdccf32b02b055cdfd800957c6c3723183..a147d77a9e9c577984028e1a6ed9582dda622069 100644 --- a/paddle/fluid/operators/arg_max_op.cu +++ b/paddle/fluid/operators/arg_max_op.cu @@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/arg_max_op.h" +#include "paddle/fluid/operators/arg_min_max_op_base.h" REGISTER_OP_CUDA_KERNEL( arg_max, - paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, + paddle::operators::ArgMaxKernel, + paddle::operators::ArgMaxKernel, paddle::operators::ArgMaxKernel, + int32_t>, paddle::operators::ArgMaxKernel, + int16_t>, paddle::operators::ArgMaxKernel, - paddle::operators::ArgMaxKernel, + size_t>, paddle::operators::ArgMaxKernel); + uint8_t>); diff --git a/paddle/fluid/operators/arg_max_op.h b/paddle/fluid/operators/arg_max_op.h deleted file mode 100644 index d232a856992fb8841a7be4ed6e1a881a1e4de6cd..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/arg_max_op.h +++ /dev/null @@ -1,16 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/operators/arg_min_max_op_base.h" diff --git a/paddle/fluid/operators/arg_min_max_op_base.h b/paddle/fluid/operators/arg_min_max_op_base.h index 8c20461a345b88f7c7ff3722d970a23daf444041..6cbdaefeda099c36a864289ef8195c20d09c55e6 100644 --- a/paddle/fluid/operators/arg_min_max_op_base.h +++ b/paddle/fluid/operators/arg_min_max_op_base.h @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include #include "paddle/fluid/framework/ddim.h" @@ -37,9 +38,9 @@ struct ArgMinMaxFunctor {}; struct ArgMinMaxFunctor { \ void operator()(const DeviceContext& ctx, const framework::LoDTensor& in, \ - framework::LoDTensor& out, int64_t axis) { \ + framework::LoDTensor* out, int64_t axis) { \ auto in_eigen = framework::EigenTensor::From(in); \ - auto out_eigen = framework::EigenTensor::From(out); \ + auto out_eigen = framework::EigenTensor::From(*out); \ out_eigen.device(*(ctx.eigen_device())) = \ in_eigen.eigen_op_type(axis).template cast(); \ } \ @@ -62,7 +63,7 @@ class ArgMinMaxKernel : public framework::OpKernel { #define CALL_ARG_MINMAX_FUNCTOR(rank) \ ArgMinMaxFunctor \ functor##rank; \ - functor##rank(dev_ctx, x, out, axis) + functor##rank(dev_ctx, x, &out, axis) switch (x.dims().size()) { case 1: @@ -89,19 +90,20 @@ class ArgMinMaxKernel : public framework::OpKernel { "than 6.", (EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax")); break; +#undef CALL_ARG_MINMAX_FUNCTOR } } }; -template +template using ArgMinKernel = - ArgMinMaxKernel; + ArgMinMaxKernel; -template +template using ArgMaxKernel = - ArgMinMaxKernel; + ArgMinMaxKernel; -typedef class BaseArgMinMaxOp : public framework::OperatorWithKernel { +class ArgMinMaxOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -121,7 +123,7 @@ typedef class BaseArgMinMaxOp : public framework::OperatorWithKernel { for (int64_t i = axis + 1; i < x_rank; i++) vec.push_back(x_dims[i]); ctx->SetOutputDim("Out", framework::make_ddim(vec)); } -} ArgMinOp, ArgMaxOp; +}; class BaseArgMinMaxOpMaker : public framework::OpProtoAndCheckerMaker { protected: @@ -133,12 +135,13 @@ class BaseArgMinMaxOpMaker : public framework::OpProtoAndCheckerMaker { AddInput("X", "Input tensor."); AddOutput("Out", "Output tensor."); AddAttr("axis", "The axis in which to compute the arg indics."); - AddComment(::paddle::string::Sprintf(R"DOC( - %s Operator. + AddComment(string::Sprintf(R"DOC( + %s Operator. - Computes the indices of the %s elements of the input tensor's element along the provided axis. + Computes the indices of the %s elements of the input tensor's element + along the provided axis. )DOC", - OpName(), Name())); + OpName(), Name())); } }; diff --git a/paddle/fluid/operators/arg_min_op.cc b/paddle/fluid/operators/arg_min_op.cc index 18c0884a042604d35978d2bde57877a018668511..41f188029f17dbe8717afc0ca0760a39edc24b54 100644 --- a/paddle/fluid/operators/arg_min_op.cc +++ b/paddle/fluid/operators/arg_min_op.cc @@ -12,24 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/arg_min_op.h" +#include "paddle/fluid/operators/arg_min_max_op_base.h" -REGISTER_OPERATOR(arg_min, paddle::operators::ArgMinOp, +REGISTER_OPERATOR(arg_min, paddle::operators::ArgMinMaxOp, paddle::operators::ArgMinOpMaker, paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL( - arg_min, paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, + paddle::operators::ArgMinKernel, + paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel); + paddle::operators::ArgMinKernel, + paddle::operators::ArgMinKernel, + paddle::operators::ArgMinKernel, + paddle::operators::ArgMinKernel); diff --git a/paddle/fluid/operators/arg_min_op.cu b/paddle/fluid/operators/arg_min_op.cu index 6d5aaa9596101f6cbd3e8177d516e24a43223ec8..4d020508505a6ebac8be41ce1e4f99d436b67ab5 100644 --- a/paddle/fluid/operators/arg_min_op.cu +++ b/paddle/fluid/operators/arg_min_op.cu @@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/arg_min_op.h" +#include "paddle/fluid/operators/arg_min_max_op_base.h" REGISTER_OP_CUDA_KERNEL( arg_min, - paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, + paddle::operators::ArgMinKernel, + paddle::operators::ArgMinKernel, paddle::operators::ArgMinKernel, + int32_t>, paddle::operators::ArgMinKernel, + int16_t>, paddle::operators::ArgMinKernel, - paddle::operators::ArgMinKernel, + size_t>, paddle::operators::ArgMinKernel); + uint8_t>); diff --git a/paddle/fluid/operators/arg_min_op.h b/paddle/fluid/operators/arg_min_op.h deleted file mode 100644 index d232a856992fb8841a7be4ed6e1a881a1e4de6cd..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/arg_min_op.h +++ /dev/null @@ -1,16 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include "paddle/fluid/operators/arg_min_max_op_base.h" diff --git a/paddle/fluid/operators/batch_size_like.h b/paddle/fluid/operators/batch_size_like.h index 483c9f8c2191fa4eb98b91112f9d6753e2fbddc3..fc15d56891cf7af10a91ca22a09c84fa2e52d465 100644 --- a/paddle/fluid/operators/batch_size_like.h +++ b/paddle/fluid/operators/batch_size_like.h @@ -54,18 +54,18 @@ class BatchSizeLikeOp : public framework::OperatorWithKernel { class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() final { - AddInput("Input", - "(Tensor) Tensor " - "whose input_dim_idx'th dimension specifies the batch_size"); + AddInput( + "Input", + "Tensor whose input_dim_idx'th dimension specifies the batch_size"); AddOutput("Out", - "(Tensor) Tensor of specified shape will be filled " + "Tensor of specified shape will be filled " "with the specified value"); - AddAttr>("shape", "(vector) The shape of the output"); + AddAttr>("shape", "The shape of the output"); AddAttr("input_dim_idx", - "(int, default 0) The index of input's batch size dimension") + "default 0. The index of input's batch size dimension") .SetDefault(0); AddAttr("output_dim_idx", - "(int, default 0) The index of output's batch size dimension") + "default 0. The index of output's batch size dimension") .SetDefault(0); Apply(); } diff --git a/paddle/fluid/operators/bilinear_interp_op.cc b/paddle/fluid/operators/bilinear_interp_op.cc index 3321adf2743c28f6eeca8b5cc91ef89beed6b97c..2572e813d656353a2187c29da89266733a32f3ce 100644 --- a/paddle/fluid/operators/bilinear_interp_op.cc +++ b/paddle/fluid/operators/bilinear_interp_op.cc @@ -56,17 +56,16 @@ class BilinearInterpOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("X", - "(Tensor) The input tensor of bilinear interpolation, " + "The input tensor of bilinear interpolation, " "This is a 4-D tensor with shape of (N x C x h x w)"); AddInput("OutSize", - "(Tensor) This is a 1-D tensor with two number. " + "This is a 1-D tensor with two number. " "The first number is height and the second number is width.") .AsDispensable(); - AddOutput("Out", - "(Tensor) The dimension of output is (N x C x out_h x out_w]"); + AddOutput("Out", "The dimension of output is (N x C x out_h x out_w)"); - AddAttr("out_h", "(int) output height of bilinear interpolation op."); - AddAttr("out_w", "(int) output width of bilinear interpolation op."); + AddAttr("out_h", "output height of bilinear interpolation op."); + AddAttr("out_w", "output width of bilinear interpolation op."); AddComment(R"DOC( Bilinear interpolation is an extension of linear interpolation for interpolating functions of two variables (e.g. H-direction and diff --git a/paddle/fluid/operators/detail/request_handler_impl.cc b/paddle/fluid/operators/detail/request_handler_impl.cc index b5ee3ab51ec5e685b41057ba60d6701d61cbc09c..9473dce55029f2a4e0987ab8f6f5e7205d7fff47 100644 --- a/paddle/fluid/operators/detail/request_handler_impl.cc +++ b/paddle/fluid/operators/detail/request_handler_impl.cc @@ -64,13 +64,21 @@ bool RequestSendHandler::Handle(const std::string& varname, return false; } if (invar->IsType()) { - rpc_server_->RecordSparseVar(invar); + std::unique_lock lock(mutex_sparse_vars_); + sparse_vars_.push_back(invar); } } - return true; } +void RequestSendHandler::ResetSparseVarRecorder() { + std::unique_lock lock(mutex_sparse_vars_); + for (auto* var : sparse_vars_) { + var->GetMutable()->mutable_rows()->clear(); + } + sparse_vars_.clear(); +} + bool RequestGetHandler::Handle(const std::string& varname, framework::Scope* scope, framework::Variable* invar, diff --git a/paddle/fluid/operators/detail/request_handler_impl.h b/paddle/fluid/operators/detail/request_handler_impl.h index 8d0c62232b68ad6c05e751c25103802ee12db57e..443d951914dd0f40e8831abc637848363d9fef16 100644 --- a/paddle/fluid/operators/detail/request_handler_impl.h +++ b/paddle/fluid/operators/detail/request_handler_impl.h @@ -41,6 +41,11 @@ class RequestSendHandler final : public RequestHandler { virtual ~RequestSendHandler() {} bool Handle(const std::string& varname, framework::Scope* scope, framework::Variable* var, framework::Variable** outvar) override; + void ResetSparseVarRecorder(); + + private: + std::mutex mutex_sparse_vars_; + std::vector sparse_vars_; }; class RequestGetHandler final : public RequestHandler { diff --git a/paddle/fluid/operators/detail/rpc_server.cc b/paddle/fluid/operators/detail/rpc_server.cc index 7feddbeca89ee97769f5598b0111e50a3f572ce8..448763372a8c224cc68319a4a444915896b68234 100644 --- a/paddle/fluid/operators/detail/rpc_server.cc +++ b/paddle/fluid/operators/detail/rpc_server.cc @@ -73,19 +73,6 @@ void RPCServer::ResetBarrierCounter() { t.second = 0; } } -void RPCServer::RecordSparseVar(framework::Variable* sparse_var) { - std::unique_lock lock(mutex_sparse_var_recorder_); - sparse_vars_.push_back(sparse_var); -} - -void RPCServer::ResetSparseVarsRecorder() { - VLOG(3) << "RPCServer reset sparse vars recorder."; - std::unique_lock lock(mutex_sparse_var_recorder_); - for (auto* var : sparse_vars_) { - var->GetMutable()->mutable_rows()->clear(); - } - sparse_vars_.clear(); -} void RPCServer::RegisterRPC(const std::string& rpc_name, RequestHandler* handler, int thread_num) { diff --git a/paddle/fluid/operators/detail/rpc_server.h b/paddle/fluid/operators/detail/rpc_server.h index 94a21ef8d04e0fc8f4e22bf00cf89dc5f41f294b..f809c13c726ac2f1c60e8cf84848c4138f631b44 100644 --- a/paddle/fluid/operators/detail/rpc_server.h +++ b/paddle/fluid/operators/detail/rpc_server.h @@ -62,8 +62,6 @@ class RPCServer { void IncreaseBatchBarrier(const std::string rpc_name); void ResetBarrierCounter(); - void RecordSparseVar(framework::Variable* sparse_var); - void ResetSparseVarsRecorder(); protected: virtual void ShutDownImpl() = 0; @@ -77,9 +75,6 @@ class RPCServer { std::atomic cur_cond_; std::condition_variable rpc_cond_; - std::vector sparse_vars_; - std::mutex mutex_sparse_var_recorder_; - protected: std::string bind_address_; std::atomic exit_flag_; diff --git a/paddle/fluid/operators/fill_constant_batch_size_like_op.cc b/paddle/fluid/operators/fill_constant_batch_size_like_op.cc index 1ae78675a0cac8a72aeaef1227b631a41e4a10b2..453a1b32a0171a2ca88879ab3287e89c4d3c7759 100644 --- a/paddle/fluid/operators/fill_constant_batch_size_like_op.cc +++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.cc @@ -32,16 +32,16 @@ class FillConstantBatchSizeLikeOp : public BatchSizeLikeOp { class FillConstantBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker { protected: void Apply() override { - AddAttr("dtype", - "(int, default 5 (FP32)) " - "Output data type") + AddAttr( + "dtype", + "It could be numpy.dtype. Output data type. Default is float32") .SetDefault(framework::proto::VarType::FP32); - AddAttr("value", "(float, default 0) The value to be filled") + AddAttr("value", "default 0. The value to be filled") .SetDefault(0.0f); AddComment(R"DOC( -FillConstantBatchSizeLike Operator. - -Fill up a variable with specified constant value. +This function creates a tensor of specified *shape*, *dtype* and batch size, +and initializes this with a constant supplied in *value*. The batch size is +obtained from the `input` tensor. )DOC"); } diff --git a/paddle/fluid/operators/gather_test.cc b/paddle/fluid/operators/gather_test.cc index 9c0561b016fdbfa8e48535eaa673a3f85bc936e5..f6b156eb30dae154395b34dcfc26319cd89edbca 100644 --- a/paddle/fluid/operators/gather_test.cc +++ b/paddle/fluid/operators/gather_test.cc @@ -43,7 +43,8 @@ TEST(Gather, GatherData) { auto* cpu_place = new paddle::platform::CPUPlace(); paddle::platform::CPUDeviceContext ctx(*cpu_place); paddle::operators::CPUGather(ctx, *src, *index, output); - + delete cpu_place; + cpu_place = NULL; for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4); for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4); diff --git a/paddle/fluid/operators/linear_chain_crf_op.cc b/paddle/fluid/operators/linear_chain_crf_op.cc index e38525cd7f44de020f364ffd16e71a439048347f..a711da362771353891f900f544d97e64510dc0ba 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.cc +++ b/paddle/fluid/operators/linear_chain_crf_op.cc @@ -67,8 +67,6 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { "mini-batch. Note: S is equal to the sequence number in a mini-batch. " "The output is no longer a LoDTensor."); AddComment(R"DOC( -LinearChainCRF Operator. - Conditional Random Field defines an undirected probabilistic graph with nodes denoting random variables and edges denoting dependencies between these variables. CRF learns the conditional probability $P(Y|X)$, where diff --git a/paddle/fluid/operators/listen_and_serv_op.cc b/paddle/fluid/operators/listen_and_serv_op.cc index ee7b01a54ca96e7685b96f58ebfd1a454b19a976..66d31c88951926a6dd9b7262942a69bb1564a416 100644 --- a/paddle/fluid/operators/listen_and_serv_op.cc +++ b/paddle/fluid/operators/listen_and_serv_op.cc @@ -146,7 +146,9 @@ void ListenAndServOp::RunSyncLoop(framework::Executor *executor, rpc_service_->SetCond(detail::kRequestGet); rpc_service_->WaitBarrier(detail::kRequestGet); rpc_service_->ResetBarrierCounter(); - rpc_service_->ResetSparseVarsRecorder(); + // reset received sparse vars to avoid reuse it in the next mini-batch + dynamic_cast(request_send_handler_.get()) + ->ResetSparseVarRecorder(); } // while(true) } diff --git a/paddle/fluid/operators/load_op.cc b/paddle/fluid/operators/load_op.cc index 93f45cff8a26201b1fbb1c44141e125a67c44037..8f4b5049271c9592d2db268ea7ff2f5c8abc28b6 100644 --- a/paddle/fluid/operators/load_op.cc +++ b/paddle/fluid/operators/load_op.cc @@ -74,25 +74,18 @@ class LoadOp : public framework::OperatorBase { class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddOutput("Out", "(Tensor) The tensor need to be loaded"); + AddOutput("Out", "The tensor need to be loaded"); AddAttr( "load_as_fp16", - "(boolean, default false)" "If true, the tensor will be first loaded and then " "converted to float16 data type. Otherwise, the tensor will be " - "directly loaded without data type conversion.") + "directly loaded without data type conversion. Default is false.") .SetDefault(false); AddAttr("file_path", - "(string) " - "Variable will be loaded from \"file_path\".") + R"(Variable will be loaded from "file_path")") .AddCustomChecker( [](const std::string &path) { return !path.empty(); }); - AddComment(R"DOC( -Load Operator. - -Load operator will load a tensor variable from disk file. - -)DOC"); + AddComment("Load operator will load a tensor variable from disk file."); } }; } // namespace operators diff --git a/paddle/fluid/operators/math/math_function_test.cc b/paddle/fluid/operators/math/math_function_test.cc index 3719a264e90ea7d1a99eb9589ce4fd0d8e074781..b545671b43d3a453ab03e4774427179617f62db0 100644 --- a/paddle/fluid/operators/math/math_function_test.cc +++ b/paddle/fluid/operators/math/math_function_test.cc @@ -77,6 +77,8 @@ TEST(math_function, gemm_trans_clbas) { paddle::platform::CPUDeviceContext context(*cpu_place); GetBlas(context).GEMM(false, true, m, n, k, 1, input1_ptr, 3, input2_ptr + 3, 3, 1, input3_ptr + 1, 4); + delete cpu_place; + cpu_place = NULL; EXPECT_EQ(input3_ptr[0], 0); EXPECT_EQ(input3_ptr[1], 24); diff --git a/paddle/fluid/operators/max_sequence_len_op.cc b/paddle/fluid/operators/max_sequence_len_op.cc index 8e508b68eeab69a4595904dcc3ea0a541d9ab6e6..b1e69f375d3274aade3184af02f7f914dba5db71 100644 --- a/paddle/fluid/operators/max_sequence_len_op.cc +++ b/paddle/fluid/operators/max_sequence_len_op.cc @@ -42,10 +42,15 @@ class MaxSeqenceLenOp : public framework::OperatorBase { class MaxSeqenceLenOpProtoMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput("RankTable", "The lod_rank_table."); - AddOutput("Out", "The max sequence length."); - AddComment( - R"DOC(Calculate the max sequence length through lod_rank_table.)DOC"); + AddInput("RankTable", "Input variable which is a LoDRankTable object"); + AddOutput("Out", "The max sequence length"); + AddComment(R"DOC( + Given a LoDRankTable object, this layer returns the max length of + a batch of sequences. In fact, a LoDRankTable object contains a list of + tuples() and the list is already sorted by + sequence length in descending order, so the operator just returns the + sequence length of the first tuple element +)DOC"); } }; diff --git a/paddle/testing/paddle_gtest_main.cc b/paddle/testing/paddle_gtest_main.cc index 586ec48477f085a14d2f15b265a95d596705694f..507479c8622c8d33722e08bba018ad1ba5452e15 100644 --- a/paddle/testing/paddle_gtest_main.cc +++ b/paddle/testing/paddle_gtest_main.cc @@ -30,7 +30,7 @@ int main(int argc, char** argv) { new_argv.push_back( strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); #else - new_argv.push_back(strdup("--tryfromenv=use_pinned_memory")); + new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn")); #endif int new_argc = static_cast(new_argv.size()); char** new_argv_address = new_argv.data(); diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index c4fad620f0c49bb6b0ad3be22a564c16619efb0b..bd985ad733aa8eece2f8374d033f452a0175a011 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -26,6 +26,7 @@ from trainer import BeginEpochEvent from trainer import EndEpochEvent from trainer import BeginStepEvent from trainer import EndStepEvent +from trainer import CheckpointConfig import inferencer from inferencer import Inferencer @@ -116,7 +117,7 @@ def __bootstrap__(): read_env_flags = [ 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', - 'eager_delete_scope' + 'eager_delete_scope', 'use_mkldnn' ] if core.is_compiled_with_cuda(): read_env_flags += [ diff --git a/python/paddle/fluid/inferencer.py b/python/paddle/fluid/inferencer.py index 9f242cf29a56573349f192307a68e135a409a4be..6baac00905713594acd59bb3819038576fab0674 100644 --- a/python/paddle/fluid/inferencer.py +++ b/python/paddle/fluid/inferencer.py @@ -56,6 +56,8 @@ class Inferencer(object): else: self.exe = executor.Executor(self.place) + self.inference_program = self.inference_program.clone(for_test=True) + def infer(self, inputs, return_numpy=True): """ :param inputs: a map of {"input_name": input_var} that will be feed into the inference program diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py index 8e58e5eb794e1bb507ab05394a1f7b57a1d2ed42..6323c9899e0080b436a52f852c647466b8f94bc1 100644 --- a/python/paddle/fluid/io.py +++ b/python/paddle/fluid/io.py @@ -24,7 +24,8 @@ __all__ = [ 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', 'load_persistables', 'save_inference_model', 'load_inference_model', 'get_inference_program', 'save_checkpoint', 'load_checkpoint', - 'clean_checkpoint' + 'clean_checkpoint', 'load_persist_vars_without_grad', + 'save_persist_vars_without_grad', 'get_latest_checkpoint_serial' ] @@ -457,95 +458,161 @@ def get_parameter_value_by_name(name, executor, program=None): SUCCESS_MARK_FILENAME = "_SUCCESS" CHECKPOINT_PREFIX = "checkpoint" +MODEL_DIR = "__model__" +TRAINER_PREFIX = "trainer" CHECKPOINT_SEPARATOR = "_" def save_checkpoint(executor, - checkpoint_dir=None, - max_num_checkpoints=3, - save_interval_secs=600, - main_program=None): + checkpoint_dir, + trainer_id, + trainer_args=None, + main_program=None, + max_num_checkpoints=3): """ Save Checkpoint will save persistable LodTensor variables from main_program in checkpoint directory, the directory named by serial number from 0 to (n -1), save_checkpoint use LRU strategy to keep numbers of checkpoint directory, the numbers of checkpoint directory are max_num_checkpoints at most, The interval between two saved checkpoints must greater than save_interval_secs. - :param executor - :param checkpoint_dir - :param max_num_checkpoints - :param save_interval_secs - :param main_program + :param executor executor for save the value + :param checkpoint_dir the checkpoint directory + :param trainer_id currect trainer id, if id is equal to 0, the trainer is chief + :param main_program will save all variables in program + :param max_num_checkpoints will keep numbers of checkpoint serials not bigger than max_num_checkpoints """ if checkpoint_dir is None: - checkpoint_dir = os.getcwd() + raise ValueError("'checkpoint_dir' should not be None") + + if trainer_args: + assert isinstance(trainer_args, dict) if not os.path.isdir(checkpoint_dir): os.makedirs(checkpoint_dir) - serial = _get_lastest_checkpoint_dir(checkpoint_dir) - if serial >= 0 and not _interval_secs_exceed( - _get_serial_dir(serial, checkpoint_dir), save_interval_secs): - return + serial = get_latest_checkpoint_serial(checkpoint_dir) + 1 + cur_dir = _get_serial_dir(checkpoint_dir, serial) - serial += 1 - cur_dir = _get_serial_dir(serial, checkpoint_dir) + save_trainer_args(cur_dir, trainer_id, trainer_args) - save_vars( - executor, - dirname=cur_dir, - main_program=main_program, - vars=None, - predicate=_is_checkpoint_var, - filename=None) - _write_success(cur_dir) - _lru_delete(checkpoint_dir, max_num_checkpoints) + if trainer_id == 0: + save_persist_vars_without_grad(executor, cur_dir, main_program) + + _scroll_delete(checkpoint_dir, max_num_checkpoints) -def load_checkpoint(executor, checkpoint_dir=None, main_program=None): +def load_checkpoint(executor, checkpoint_dir, serial, main_program): """ Load checkpoint from a directory by executor, it will find the most recent saved checkpoint file and load it auto. - :param executor - :param checkpoint_dir - :param main_program + :param executor executor for load the value + :param checkpoint_dir the checkpoint directory + :param serial the serial folder in checkpoint directory will be load + :param main_program will load all variables in program """ if checkpoint_dir is None: - checkpoint_dir = os.getcwd() + raise ValueError("'checkpoint_dir' should not be None") - serial = _get_lastest_checkpoint_dir(checkpoint_dir) + if serial is None or serial < 0: + raise ValueError("'serial' should not be None or <0 ") - if serial < 0: - return + if main_program is None: + raise ValueError('main_program should not be None.') - cur_dir = _get_serial_dir(serial, checkpoint_dir) - - load_vars( - executor, - dirname=cur_dir, - main_program=main_program, - predicate=_is_checkpoint_var, - filename=None) + cur_dir = _get_serial_dir(checkpoint_dir, serial) + load_persist_vars_without_grad(executor, cur_dir, main_program, True) def clean_checkpoint(checkpoint_dir, delete_dir=False): """ clean the checkpoint dir, when the train exits normally, the trainer will call clean_checkpoint to delete checkpoint directory saved before. delete_dir only works when the directory is empty, otherwise, OSError is raised. + + :param checkpoint_dir + :param delete_dir """ + if checkpoint_dir is None: - checkpoint_dir = os.getcwd() - _lru_delete(checkpoint_dir, max_num_checkpoints=0) + raise ValueError("'checkpoint_dir' should not be None") + _scroll_delete(checkpoint_dir, max_num_checkpoints=0) if delete_dir and not os.listdir(checkpoint_dir): os.rmdir(checkpoint_dir) -def _get_serial_dir(serial, checkpoint_dir): - serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial) - return os.path.join(checkpoint_dir, serial_folder) +def load_persist_vars_without_grad(executor, + dirname, + program, + has_model_dir=False): + """ + load_persist_vars_without_grad will load variables from a directory by an executor, + the variable named end with "@GRAD" will not be loaded. + + :param executor executor for load the value + :param dirname the checkpoint directory + :param program will load all variables in program + :param has_model_dir if has_model_dir is True, will load variables from sub directory named __model__ + """ + + if has_model_dir: + dirname = _get_model_dir(dirname) + + load_vars( + executor, + dirname=dirname, + main_program=program, + predicate=_is_checkpoint_var, + filename=None) + + +def save_persist_vars_without_grad(executor, dirname, program): + """ + save_persist_vars_without_grad will save variables to a directory by an executor, + the variable named end with "@GRAD" will not be saved. + + :param executor executor for load the value + :param dirname the checkpoint directory + :param program will load all variables in program + """ + cur_dir = _get_model_dir(dirname) + save_vars( + executor, + dirname=cur_dir, + main_program=program, + vars=None, + predicate=_is_checkpoint_var, + filename=None) + _write_success(cur_dir) + + +def save_trainer_args(dirname, trainer_id, trainer_args): + assert isinstance(trainer_args, dict) + + cur_dir = _get_trainer_dir(dirname, trainer_id) + + for name, value in trainer_args.iteritems(): + args_file = os.path.join(cur_dir, name) + with open(args_file, 'w') as f: + f.write(str(value)) + _write_success(cur_dir) + + +def load_trainer_args(checkpoint_dir, serial, trainer_id, trainer_args): + assert isinstance(trainer_args, list) + + cur_dir = _get_serial_dir(checkpoint_dir, serial) + cur_dir = _get_trainer_dir(cur_dir, trainer_id) + + ret_values = [] + + for arg in trainer_args: + cur_file = os.path.join(cur_dir, arg) + with open(cur_file, 'r') as f: + contents = f.read() + ret_values.append(contents.strip()) + return ret_values def _is_checkpoint_var(var): @@ -559,36 +626,74 @@ def _is_checkpoint_var(var): var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ var.desc.type() == core.VarDesc.VarType.RAW: return False + # @GRAD are named for gradient variables, checkpoint will not save it. + if "@GRAD" in var.name: + return False + # .trainer_ are named for distribute train variables, checkpoint will not save it. + if ".trainer_" in var.name: + return False - if var.name.endswith("@GRAD"): + # .block is named for distribute train variables, checkpoint will not save it. + if ".block" in var.name: return False return var.persistable -def _interval_secs_exceed(dirname, save_interval_secs): - dir_time = os.path.getmtime(dirname) - if save_interval_secs > (time.time() - dir_time): - return False - return True +def _get_dir_serial(dirname): + _, serial = dirname.split(CHECKPOINT_SEPARATOR) + + try: + serial_num = int(serial) + except ValueError: + serial_num = -1 + return serial_num + + +def _get_serial_dir(dirname, serial): + serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial) + serial_dir = os.path.join(dirname, serial_folder) + + if not os.path.isdir(serial_dir): + os.makedirs(serial_dir) + + return serial_dir + +def _get_model_dir(dirname): + model_dir = os.path.join(dirname, MODEL_DIR) -def _lru_delete(dirname, max_num_checkpoints=3): + if not os.path.isdir(model_dir): + os.makedirs(model_dir) + + return model_dir + + +def _get_trainer_dir(dirname, trainer_id): + trainer_folder = TRAINER_PREFIX + CHECKPOINT_SEPARATOR + str(trainer_id) + trainer_dir = os.path.join(dirname, trainer_folder) + + if not os.path.isdir(trainer_dir): + os.makedirs(trainer_dir) + + return trainer_dir + + +def _scroll_delete(dirname, max_num_checkpoints=3): dirs = os.listdir(dirname) - serials = [] + serial_map = {} for serial in dirs: - try: - serials.append(int(serial)) - except ValueError: - continue + serial_num = _get_dir_serial(serial) + serial_map[serial_num] = serial - if len(serials) <= max_num_checkpoints: + if len(serial_map.keys()) <= max_num_checkpoints: return + serials = serial_map.keys() serials.sort(reverse=True) serials = serials[max_num_checkpoints:] for serial in serials: - cur_dir = os.path.join(dirname, str(serial)) + cur_dir = _get_serial_dir(dirname, serial) shutil.rmtree(cur_dir) @@ -604,33 +709,30 @@ def _write_success(dirname): f.write(now) -def _get_lastest_checkpoint_dir(checkpoint_dir): +def get_latest_checkpoint_serial(checkpoint_dir): """ get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory :param checkpoint_dir """ - if not checkpoint_dir.strip(): + if not checkpoint_dir: return -1 def has_success(checkpoint_dir, cur_dir): """ is _SUCCESS in this dir """ - _, serial = cur_dir.split(CHECKPOINT_SEPARATOR) - - try: - int(serial) - except ValueError: - return -1 - if not os.path.isdir(os.path.join(checkpoint_dir, cur_dir)): + serial = _get_dir_serial(cur_dir) + if serial == -1 or not os.path.isdir( + os.path.join(checkpoint_dir, cur_dir)): return -1 success_path = os.path.join( - _get_serial_dir(serial, checkpoint_dir), SUCCESS_MARK_FILENAME) + _get_serial_dir(checkpoint_dir, serial), MODEL_DIR, + SUCCESS_MARK_FILENAME) if os.path.isfile(success_path): - return int(serial) + return serial if not os.path.isdir(checkpoint_dir): return -1 diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index d1ea9f148566d20988a43f4c9d421c4452697ef1..80e8ff484a4c04df1b41bbca284d7c604962934c 100644 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -13,7 +13,7 @@ # limitations under the License. import contextlib -from layer_function_generator import autodoc +from layer_function_generator import autodoc, templatedoc from tensor import assign, fill_constant from .. import core from ..framework import Program, Variable, Operator @@ -721,26 +721,22 @@ def lod_rank_table(x, level=0): return table +@templatedoc() def max_sequence_len(rank_table): - """Max Sequence Len Operator. Given a LoDRankTable object, this layer - returns the max length of a batch of sequences. In fact, a LoDRankTable - object contains a list of tuples() and - the list is already sorted by sequence length in descending order, so the - operator just returns the sequence length of the first tuple element. + """ + ${comment} + + >>> import paddle.fluid as fluid + >>> x = fluid.layers.data(name='x', shape=[10], dtype='float32', + >>> lod_level=1) + >>> rank_table = layers.lod_rank_table(x=x, level=0) + >>> max_seq_len = layers.max_sequence_len(rank_table) Args: - rank_table (Variable): Input variable which is a LoDRankTable object. + rank_table(${rank_table_type}): ${rank_table_comment}. Returns: - Variable: The max length of sequence. - - Examples: - .. code-block:: python - - x = fluid.layers.data(name='x', shape=[10], - dtype='float32', lod_level=1) - rank_table = layers.lod_rank_table(x=x, level=0) - max_seq_len = layers.max_sequence_len(rank_table) + ${out_comment}. """ helper = LayerHelper("max_seqence_len", **locals()) res = helper.create_tmp_variable(dtype="int64") diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index a56f3ea9db6b9fabf9d78f102d394a0817a44a98..9de88e2c3205ace74beff43df7ae8956897d965a 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -19,11 +19,12 @@ from ..unique_name import generate as unique_name from control_flow import BlockGuard from ..layer_helper import LayerHelper from ..executor import global_scope +from layer_function_generator import generate_layer_fn, templatedoc __all__ = [ 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file', 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer', - 'random_data_generator', 'Preprocessor' + 'random_data_generator', 'Preprocessor', 'load' ] @@ -662,3 +663,29 @@ class Preprocessor(object): "sink_var_names": self.sink_var_names }) return monkey_patch_reader_methods(self.reader) + + +@templatedoc() +def load(out, file_path, load_as_fp16=None): + """ + ${comment} + + >>> import paddle.fluid as fluid + >>> tmp_tensor = fluid.layers.create_tensor(dtype='float32') + >>> fluid.layers.load(tmp_tensor, "./tmp_tensor.bin") + + Args: + out(${out_type}): ${out_comment}. + + file_path(${file_path_type}): ${file_path_comment}. + + load_as_fp16(${load_as_fp16_type}): ${load_as_fp16_comment}. + + Returns: + None + """ + helper = LayerHelper("load", **locals()) + attrs = {"file_path": file_path} + if load_as_fp16 is not None: + attrs['load_as_fp16'] = load_as_fp16 + helper.append_op(type="load", inputs={}, output={"Out": out}, args=attrs) diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index 904413cc11b50f80d3c4730bf66ec359f9285ae6..cb60a3aec9a5a69f1eed281eb017384a621c66a8 100644 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -224,7 +224,10 @@ def autodoc(comment=""): return __impl__ -def templatedoc(): +_inline_math_single_dollar = re.compile(r"\$([^\$]+)\$") + + +def templatedoc(op_type=None): """ Decorator of layer function. It will use the docstring from the layer function as the template. The template arguments are: @@ -238,32 +241,47 @@ def templatedoc(): Decorated function. """ + def trim_ending_dot(msg): + return msg.rstrip('.') + + def escape_inline_math(msg): + return _inline_math_single_dollar.sub(repl=r':math:`\1`', string=msg) + def __impl__(func): - op_proto = OpProtoHolder.instance().get_op_proto(func.__name__) + if op_type is None: + op_type_name = func.__name__ + else: + op_type_name = op_type + op_proto = OpProtoHolder.instance().get_op_proto(op_type_name) tmpl = string.Template(func.__doc__) comment_lines = op_proto.comment.split("\n") comment = "" for line in comment_lines: - line = line.lstrip() - comment += line - comment += "\n" - - args = {"comment": comment} + line = line.strip() + if len(line) != 0: + comment += escape_inline_math(line) + comment += " " + elif len(comment) != 0: + comment += "\n \n " + + args = {"comment": trim_ending_dot(comment)} for each_input in op_proto.inputs: input_name = _convert_(each_input.name) - args["{0}_comment".format(input_name)] = each_input.comment + args["{0}_comment".format(input_name)] = trim_ending_dot( + each_input.comment) args["{0}_type".format(input_name)] = "Variable" for each_attr in op_proto.attrs: input_name = _convert_(each_attr.name) - args["{0}_comment".format(input_name)] = each_attr.comment + args["{0}_comment".format(input_name)] = trim_ending_dot( + each_attr.comment) args["{0}_type".format(input_name)] = _type_to_str_(each_attr.type) for each_opt in op_proto.outputs: output_name = _convert_(each_opt.name) - args["{0}_comment".format(output_name)] = each_opt.comment + args["{0}_comment".format(output_name)] = trim_ending_dot( + each_opt.comment) args["{0}_type".format(output_name)] = "Variable" - func.__doc__ = tmpl.substitute(args) return func diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index d13c54daa5a985e2e1bf9357630fe29d24a17bb4..716cc7824eff0c56cc55a055310fa8b1913ac5e6 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -11,6 +11,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +""" +When training a model, it's often useful to decay the +learning rate during training process, this is called +learning_rate_decay. There are many strategies to do +this, this module will provide some classical method. +User can also implement their own learning_rate_decay +strategy according to this module. +""" import control_flow import nn @@ -22,14 +30,6 @@ __all__ = [ 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', 'polynomial_decay', 'piecewise_decay', 'noam_decay' ] -""" -When training a model, it's often useful to decay the -learning rate during training process, this is called -learning_rate_decay. There are many strategies to do -this, this module will provide some classical method. -User can also implement their own learning_rate_decay -strategy according to this module. -""" def _decay_step_counter(begin=0): @@ -41,18 +41,20 @@ def _decay_step_counter(begin=0): def noam_decay(d_model, warmup_steps): - """Apply decay to learning rate. - ```python - lr_value = np.power(d_model, -0.5) * np.min([ - np.power(current_steps, -0.5), - np.power(warmup_steps, -1.5) * current_steps - ]) - ``` + """ + Noam decay method. The numpy implementation of noam decay as follows. + + >>> import numpy as np + >>> lr_value = np.power(d_model, -0.5) * np.min([ + >>> np.power(current_steps, -0.5), + >>> np.power(warmup_steps, -1.5) * current_steps]) + + Please reference `attention is all you need + `_. Args: d_model(Variable): The dimensionality of input and output of model. - Reference: attention is all you need - https://arxiv.org/pdf/1706.03762.pdf + warmup_steps(Variable): A super parameter. Returns: diff --git a/python/paddle/fluid/layers/metric.py b/python/paddle/fluid/layers/metric.py index cab2eb55510542bdd4dd7eca7667601697759181..a1c64ce2771526cbd0baa944f97d01e7878b3ac1 100644 --- a/python/paddle/fluid/layers/metric.py +++ b/python/paddle/fluid/layers/metric.py @@ -64,10 +64,6 @@ def auc(input, label, curve='ROC', num_thresholds=200): topk_indices = helper.create_tmp_variable(dtype="int64") topk_out, topk_indices = nn.topk(input, k=k) auc_out = helper.create_tmp_variable(dtype="float32") - if correct is None: - correct = helper.create_tmp_variable(dtype="int64") - if total is None: - total = helper.create_tmp_variable(dtype="int64") helper.append_op( type="accuracy", inputs={ diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index ddaeb415af4320c233aa7d01130fe1da2cdcbfa8..b9ea74fc81e0eb9b52e9cd1e9af8cba005a10f21 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -4037,18 +4037,25 @@ def image_resize(input, return out +@templatedoc(op_type="bilinear_interp") def resize_bilinear(input, out_shape=None, scale=None, name=None): """ - This is an alias of layer 'image_resize' with bilinear interpolation. + ${comment} + + Args: + input(${x_type}): ${x_comment}. + + out_shape(${out_size_type}): ${out_size_comment}. - The mathematical meaning of resize bilinear layer is - Bilinear interpolation. - Bilinear interpolation is an extension of linear interpolation for - interpolating functions of two variables (e.g. H-direction and - W-direction in this layer) on a rectilinear 2D grid. + scale(float|None): The multiplier for the input height or width. At + least one of out_shape or scale must be set. And out_shape has + a higher priority than scale. Default: None. + + name(str|None): The output variable name. + + Returns: - For details, please refer to Wikipedia: - https://en.wikipedia.org/wiki/Bilinear_interpolation + ${out_comment}. """ return image_resize(input, out_shape, scale, name, 'BILINEAR') diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 69cfde852dd087bb9192da1f7582f925582dbce4..3260f81e9edcd9ed83e98a681c43a5d9dbfd1312 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -73,6 +73,7 @@ __all__ = [ 'sum', 'polygon_box_transform', 'shape', + 'maxout', ] + __activations__ for _OP in set(__all__): diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 3dfacfff6acf24329e710e005e3d56c4b8df97b0..62b01d595a812ee8fc094e40b6dfb5c3f56cd012 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -18,6 +18,7 @@ from ..framework import convert_np_dtype_to_dtype_ from ..framework import Variable from ..initializer import Constant, force_init_on_cpu from ..core import VarDesc +from layer_function_generator import templatedoc import numpy __all__ = [ @@ -268,6 +269,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None): return out +@templatedoc() def fill_constant_batch_size_like(input, shape, dtype, @@ -275,30 +277,28 @@ def fill_constant_batch_size_like(input, input_dim_idx=0, output_dim_idx=0): """ - **fill_constant_batch_size_like** - - This function creates a tensor of specified *shape*, *dtype* and batch size, - and initializes this with a constant supplied in *value*. The batch size is - obtained from the `input` tensor. + ${comment} It also sets *stop_gradient* to True. + >>> data = fluid.layers.fill_constant_batch_size_like( + >>> input=like, shape=[1], value=0, dtype='int64') + Args: - input(Variable): Tensor whose dimensions will be used to get batch size - shape(tuple|list|None): Shape of output tensor - dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor - value(float): Constant value to initialize the output tensor - input_dim_idx(int): Index of input's batch size dimension - output_dim_idx(int): Index of output's batch size dimension + input(${input_type}): ${input_comment}. - Returns: - Variable: The tensor variable storing the output + shape(${shape_type}): ${shape_comment}. - Examples: - .. code-block:: python + dtype(${dtype_type}): ${dtype_comment}. + + value(${value_type}): ${value_comment}. - data = fluid.layers.fill_constant_batch_size_like( - input=like, shape=[1], value=0, dtype='int64') + input_dim_idx(${input_dim_idx_type}): ${input_dim_idx_comment}. + + output_dim_idx(${output_dim_idx_type}): ${output_dim_idx_comment}. + + Returns: + ${out_comment}. """ helper = LayerHelper("fill_constant_batch_size_like", **locals()) out = helper.create_tmp_variable(dtype=dtype) @@ -501,22 +501,6 @@ def save_combine(x, file_path, overwrite=True): "overwrite": overwrite}) -def load(out, file_path): - """ - Loads a variable from a given file. - - Args: - out(variable): The variable to be read from the disk file. - file_path(str): The path of the disk file. - """ - helper = LayerHelper("load", **locals()) - helper.append_op( - type="load", - inputs={}, - output={"Out": out}, - args={"file_path": file_path}) - - def load_combine(out, file_path): """ Loads a list of vairables from a single file. diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py index b3117cf2e5e0513089e5e1146d49702fcc8b7ba6..ad28c9eff560507e5b326451159be3949353f58f 100644 --- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py @@ -38,7 +38,7 @@ def inference_program(): return y_predict -def linear(): +def train_program(): y = fluid.layers.data(name='y', shape=[1], dtype='float32') y_predict = inference_program() @@ -104,7 +104,7 @@ def main(use_cuda): # Directory for saving the trained model params_dirname = "fit_a_line.inference.model" - train(use_cuda, linear, params_dirname) + train(use_cuda, train_program, params_dirname) infer(use_cuda, inference_program, params_dirname) diff --git a/python/paddle/fluid/tests/unittests/test_checkpoint.py b/python/paddle/fluid/tests/unittests/test_checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..e22400a045ced16c46b0bf005155f621f249d263 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_checkpoint.py @@ -0,0 +1,75 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +import unittest +import os +import tempfile + + +class TestCheckpoint(unittest.TestCase): + def setUp(self): + self.dirname = tempfile.mktemp() + self.max_num_checkpoints = 3 + self.epoch_interval = 1 + self.step_interval = 1 + self.trainer_id = 0 + self.chief = self.trainer_id == 0 + self.place = fluid.CPUPlace() + self.epoch_id = 100 + self.step_id = 20 + + def test_checkpoint(self): + self.save_checkpoint() + serial = fluid.io.get_latest_checkpoint_serial(self.dirname) + self.assertTrue(serial >= 0) + trainer_args = ["epoch_id", "step_id"] + epoch_id, step_id = fluid.io.load_trainer_args( + self.dirname, serial, self.trainer_id, trainer_args) + self.assertEqual(self.step_id, int(step_id)) + self.assertEqual(self.epoch_id, int(epoch_id)) + + program = fluid.Program() + with fluid.program_guard(program): + exe = fluid.Executor(self.place) + fluid.io.load_checkpoint(exe, self.dirname, serial, program) + + fluid.io.clean_checkpoint(self.dirname, delete_dir=True) + self.assertFalse(os.path.isdir(self.dirname)) + + def save_checkpoint(self): + config = fluid.CheckpointConfig(self.dirname, self.max_num_checkpoints, + self.epoch_interval, self.step_interval) + + trainer_args = {} + trainer_args["epoch_id"] = self.epoch_id + trainer_args["step_id"] = self.step_id + + program = fluid.Program() + with fluid.program_guard(program): + program.global_block().create_var( + name="scale_0", + psersistable=True, + dtype="float32", + shape=[32, 32]) + + exe = fluid.Executor(self.place) + for i in xrange(10): + fluid.io.save_checkpoint(exe, config.checkpoint_dir, + self.trainer_id, trainer_args, program, + config.max_num_checkpoints) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py b/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py index 22329390754d8d010dced0d1aca35617140cd097..95af51f1b2f8cd9492baa9cb14fe31ffa586f2fc 100644 --- a/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py +++ b/python/paddle/fluid/tests/unittests/test_dynrnn_gradient_check.py @@ -30,9 +30,6 @@ class Memory(object): assert val.dtype == self.ex.dtype self.cur = val - def ex(self): - return self.ex - def next(self): self.ex = self.cur self.cur = None diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 621a450fa6a6a8f47e3f1c1de609614b2359c33b..8b0ebe3cf52bf5b4514eacbd5d1bdd7c7a9b8b67 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -387,6 +387,14 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(output) print(str(program)) + def test_maxout(self): + program = Program() + with program_guard(program): + data = layers.data(name='x', shape=[8, 6, 6], dtype="float32") + output = layers.maxout(x=data, groups=2) + self.assertIsNotNone(output) + print(str(program)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py index cdacb419863518cc0606903ed8eb79f0d2bc9e40..efc28d899304b01a3085891f3ae9396d57c589a1 100644 --- a/python/paddle/fluid/trainer.py +++ b/python/paddle/fluid/trainer.py @@ -27,11 +27,8 @@ import parallel_executor from transpiler import distribute_transpiler __all__ = [ - 'Trainer', - 'BeginEpochEvent', - 'EndEpochEvent', - 'BeginStepEvent', - 'EndStepEvent', + 'Trainer', 'BeginEpochEvent', 'EndEpochEvent', 'BeginStepEvent', + 'EndStepEvent', 'CheckpointConfig' ] @@ -59,6 +56,35 @@ class EndStepEvent(object): self.metrics = metrics +class CheckpointConfig(object): + def __init__(self, + checkpoint_dir=None, + max_num_checkpoints=3, + epoch_interval=1, + step_interval=10): + if checkpoint_dir is None: + self.checkpoint_dir = os.getcwd() + else: + self.checkpoint_dir = checkpoint_dir + + self.max_num_checkpoints = max_num_checkpoints + + if epoch_interval < 1: + self.epoch_interval = 1 + else: + self.epoch_interval = epoch_interval + + if step_interval < 1: + self.step_interval = 10 + else: + self.step_interval = step_interval + + self.epoch_id = 0 + self.step_id = 0 + self.load_serial = None + self.is_pserver = False + + def check_and_get_place(place): """ Check the type of place or get the default place @@ -99,13 +125,24 @@ class Trainer(object): optimizer_func, param_path=None, place=None, - parallel=False): + parallel=False, + checkpoint_config=None): self.__stop = False self.parallel = parallel # 1. we need to generate a framework.Program by calling # program_func. Reference: fluid.program_guard in # test_word2vec.py + # config for checkpoint + # only chief worker will save variables + self.trainer_id = 0 + self.checkpoint_cfg = checkpoint_config + if self.checkpoint_cfg: + assert isinstance(self.checkpoint_cfg, CheckpointConfig) + serial = io.get_latest_checkpoint_serial( + self.checkpoint_cfg.checkpoint_dir) + self.checkpoint_cfg.load_serial = serial if serial >= 0 else None + self.scope = core.Scope() self.startup_program = framework.Program() @@ -115,9 +152,9 @@ class Trainer(object): program_func_outs = train_func() self.train_func_outputs = program_func_outs if isinstance( program_func_outs, list) else [program_func_outs] - self.test_program = self.train_program.clone() + self.test_program = self.train_program.clone(for_test=True) - # The fisrt element of program_func_outs is loss. + # The first element of program_func_outs is loss. loss = self.train_func_outputs[0] optimizer = optimizer_func() @@ -137,9 +174,25 @@ class Trainer(object): exe = executor.Executor(place) exe.run(self.startup_program) - if param_path: + if self.checkpoint_cfg and self.checkpoint_cfg.load_serial: + with self._prog_and_scope_guard(): + exe = executor.Executor(place) + io.load_checkpoint(exe, self.checkpoint_cfg.checkpoint_dir, + self.checkpoint_cfg.load_serial, + self.startup_program) + + if not self.checkpoint_cfg.is_pserver: + epoch_id, step_id = io.load_trainer_args( + self.checkpoint_cfg.checkpoint_dir, + self.checkpoint_cfg.load_serial, self.trainer_id, + self._get_checkpoint_load_args()) + self.checkpoint_cfg.epoch_id = int(epoch_id) + self.checkpoint_cfg.step_id = int(step_id) + + if param_path and os.path.isdir(param_path): # load params from param_path into scope - io.load_persistables(exe, dirname=param_path) + io.load_persist_vars_without_grad( + exe, dirname=param_path, program=self.startup_program) def _transpile_nccl2_dist(self): # PADDLE_TRAINER_IPS @@ -194,14 +247,18 @@ class Trainer(object): current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port # the unique trainer id, starting from 0, needed by trainer # only - trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + # the role, should be either PSERVER or TRAINER training_role = os.getenv("PADDLE_TRAINING_ROLE") with self._prog_and_scope_guard(): t = distribute_transpiler.DistributeTranspiler() t.transpile( - trainer_id, pservers=pserver_endpoints, trainers=trainers) + self.trainer_id, pservers=pserver_endpoints, trainers=trainers) if training_role == "PSERVER": + if self.checkpoint_cfg: + self.is_pserver = True + self.train_program = t.get_pserver_program(current_endpoint) self.startup_program = t.get_startup_program(current_endpoint, self.train_program) @@ -294,11 +351,26 @@ class Trainer(object): self._train_by_any_executor(event_handler, exe, num_epochs, reader) def _train_by_any_executor(self, event_handler, exe, num_epochs, reader): - for epoch_id in range(num_epochs): + if self.checkpoint_cfg: + epochs = [ + epoch_id for epoch_id in range(num_epochs) + if epoch_id >= self.checkpoint_cfg.epoch_id + ] + else: + epochs = [epoch_id for epoch_id in range(num_epochs)] + + for epoch_id in epochs: event_handler(BeginEpochEvent(epoch_id)) for step_id, data in enumerate(reader()): if self.__stop: + if self.checkpoint_cfg: + self._clean_checkpoint() return + + if self.checkpoint_cfg and self.checkpoint_cfg.load_serial \ + and self.checkpoint_cfg.step_id >= step_id and self.checkpoint_cfg.epoch_id == epoch_id: + continue + begin_event = BeginStepEvent(epoch_id, step_id) event_handler(begin_event) if begin_event.fetch_metrics: @@ -309,8 +381,13 @@ class Trainer(object): ]) else: metrics = exe.run(feed=data, fetch_list=[]) + + if self.checkpoint_cfg: + self._save_checkpoint(epoch_id, step_id) event_handler(EndStepEvent(epoch_id, step_id, metrics)) event_handler(EndEpochEvent(epoch_id)) + if self.checkpoint_cfg: + self._clean_checkpoint() def _test_by_executor(self, reader, feed_order, fetch_list): with executor.scope_guard(self.scope): @@ -349,6 +426,38 @@ class Trainer(object): loss_name=self.train_func_outputs[0].name) return self._get_parallel_executor() + def _clean_checkpoint(self): + assert self.checkpoint_cfg + io.clean_checkpoint(checkpoint_dir=self.checkpoint_cfg.checkpoint_dir) + + def _get_checkpoint_load_args(self): + """ + epoch_id and step_id are runtime arguments, they are not variables, will load them independently. + """ + return ["epoch_id", "step_id"] + + def _get_checkpoint_save_args(self, epoch_id, step_id): + """ + epoch_id and step_id are runtime arguments, they are not variables, will save them independently. + """ + trainer_args = {} + trainer_args["epoch_id"] = epoch_id + trainer_args["step_id"] = step_id + return trainer_args + + def _save_checkpoint(self, epoch_id, step_id): + assert self.checkpoint_cfg + + if epoch_id % self.checkpoint_cfg.epoch_interval == 0 and step_id % self.checkpoint_cfg.step_interval == 0: + exe = executor.Executor(self.place) + io.save_checkpoint( + executor=exe, + checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, + trainer_id=self.trainer_id, + trainer_args=self._get_checkpoint_save_args(epoch_id, step_id), + main_program=self.train_program, + max_num_checkpoints=self.checkpoint_cfg.max_num_checkpoints) + def build_feed_var_list(program, feed_order): if not isinstance(program, framework.Program): diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 27992df462ffd00ddf445538cc508b4232712481..c7ab300e0f0704ad16c15fce6fa3703587ff7c9e 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -177,6 +177,7 @@ class DistributeTranspiler: dtype=table_grad_var.dtype) for index in range(len(self.pserver_endpoints)) ] + return param_list, grad_list def _init_splited_vars(self, slice_var_up): # update these mappings for further transpile: @@ -199,8 +200,8 @@ class DistributeTranspiler: grad_list.append(g) param_grad_set.add(g.name) - self._update_dist_lookup_table_vars(param_list, grad_list, - self.params_grads) + param_list, grad_list = self._update_dist_lookup_table_vars( + param_list, grad_list, self.params_grads) if slice_var_up: # when we slice var up into blocks, we will slice the var according to