提交 c2a00f01 编写于 作者: S sneaxiy

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into argmin_argmax

# API注释撰写标准 # API注释撰写标准
- [API注释模块](#API注释模块) - [API注释撰写标准](#api)
- [格式及示例](#格式及示例) - [API注释模块](#api)
- [完整示例](#完整示例) - [格式及示例](#)
- [完整示例](#)
## API注释模块 ## API注释模块
...@@ -217,4 +218,4 @@ API文档须使用reStructuredText格式撰写,该格式详情请参考[链接 ...@@ -217,4 +218,4 @@ API文档须使用reStructuredText格式撰写,该格式详情请参考[链接
## 完整示例 ## 完整示例
fc 的完整注释见[示例](src/fc.py) fc 的完整注释见[示例](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/dev/src/fc.py)
# API Doc Standard # API Doc Standard
- [API Doc Structure](#API Doc Structure) - [API Doc Standard](#api-doc-standard)
- [Format and Examples](#Format and Examples) - [API Doc Structure](#api-doc-structure)
- [Complete Example](#Complete Example) - [Format and Examples](#format-and-examples)
- [Complete Example](#complete-example)
## API Doc Structure ## API Doc Structure
...@@ -223,4 +224,4 @@ Format and examples of each part of API documantation are as follows: (take fc f ...@@ -223,4 +224,4 @@ Format and examples of each part of API documantation are as follows: (take fc f
## Complete Example ## Complete Example
Complete Example of fc please see [here](src/fc.py) Complete Example of fc please see [here](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/dev/src/fc.py)
...@@ -109,7 +109,6 @@ void MainWord2Vec(bool use_gpu) { ...@@ -109,7 +109,6 @@ void MainWord2Vec(bool use_gpu) {
void MainImageClassification(bool use_gpu) { void MainImageClassification(bool use_gpu) {
int batch_size = 2; int batch_size = 2;
bool use_mkldnn = false;
bool repeat = false; bool repeat = false;
NativeConfig config = GetConfig(); NativeConfig config = GetConfig();
config.use_gpu = use_gpu; config.use_gpu = use_gpu;
...@@ -134,12 +133,8 @@ void MainImageClassification(bool use_gpu) { ...@@ -134,12 +133,8 @@ void MainImageClassification(bool use_gpu) {
std::vector<framework::LoDTensor*> cpu_fetchs1; std::vector<framework::LoDTensor*> cpu_fetchs1;
cpu_fetchs1.push_back(&output1); cpu_fetchs1.push_back(&output1);
TestInference<platform::CPUPlace, false, true>(config.model_dir, TestInference<platform::CPUPlace, false, true>(
cpu_feeds, config.model_dir, cpu_feeds, cpu_fetchs1, repeat, is_combined);
cpu_fetchs1,
repeat,
is_combined,
use_mkldnn);
auto predictor = CreatePaddlePredictor(config); auto predictor = CreatePaddlePredictor(config);
std::vector<PaddleTensor> paddle_tensor_feeds; std::vector<PaddleTensor> paddle_tensor_feeds;
......
...@@ -28,6 +28,9 @@ struct DataTypeMap { ...@@ -28,6 +28,9 @@ struct DataTypeMap {
}; };
static DataTypeMap* InitDataTypeMap(); static DataTypeMap* InitDataTypeMap();
// C++11 removes the need for manual locking. Concurrent execution shall wait if
// a static local variable is already being initialized.
// https://stackoverflow.com/questions/11711920/how-to-implement-multithread-safe-singleton-in-c11-without-using-mutex
static DataTypeMap& gDataTypeMap() { static DataTypeMap& gDataTypeMap() {
static DataTypeMap* g_data_type_map_ = InitDataTypeMap(); static DataTypeMap* g_data_type_map_ = InitDataTypeMap();
return *g_data_type_map_; return *g_data_type_map_;
......
...@@ -42,7 +42,7 @@ void FuseVarsOpHandle::RunImpl() { ...@@ -42,7 +42,7 @@ void FuseVarsOpHandle::RunImpl() {
out_t->ShareDataWith(out_tensor->Slice(s, s + numel)); out_t->ShareDataWith(out_tensor->Slice(s, s + numel));
s += numel; s += numel;
} }
this->RunAndRecordEvent([this] {}); this->RunAndRecordEvent([] {});
} }
std::string FuseVarsOpHandle::Name() const { return "fuse vars"; } std::string FuseVarsOpHandle::Name() const { return "fuse vars"; }
......
...@@ -24,6 +24,7 @@ limitations under the License. */ ...@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DECLARE_bool(benchmark); DECLARE_bool(benchmark);
DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -115,6 +116,7 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope, ...@@ -115,6 +116,7 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
bool create_local_scope, bool create_vars) { bool create_local_scope, bool create_vars) {
platform::RecordBlock b(block_id); platform::RecordBlock b(block_id);
if (FLAGS_use_mkldnn) EnableMKLDNN(pdesc);
auto ctx = Prepare(pdesc, block_id); auto ctx = Prepare(pdesc, block_id);
RunPreparedContext(ctx.get(), scope, create_local_scope, create_vars); RunPreparedContext(ctx.get(), scope, create_local_scope, create_vars);
} }
...@@ -214,6 +216,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -214,6 +216,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
const std::string& feed_holder_name, const std::string& feed_holder_name,
const std::string& fetch_holder_name) { const std::string& fetch_holder_name) {
platform::RecordBlock b(kProgramId); platform::RecordBlock b(kProgramId);
if (FLAGS_use_mkldnn) EnableMKLDNN(program);
bool has_feed_ops = bool has_feed_ops =
has_feed_operators(program.Block(0), *feed_targets, feed_holder_name); has_feed_operators(program.Block(0), *feed_targets, feed_holder_name);
bool has_fetch_ops = bool has_fetch_ops =
...@@ -225,7 +228,6 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -225,7 +228,6 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
unique_ptr_of_copy_program.reset(new ProgramDesc(program)); unique_ptr_of_copy_program.reset(new ProgramDesc(program));
copy_program = unique_ptr_of_copy_program.get(); copy_program = unique_ptr_of_copy_program.get();
} }
auto* global_block = copy_program->MutableBlock(0); auto* global_block = copy_program->MutableBlock(0);
if (!has_feed_ops) { if (!has_feed_ops) {
...@@ -378,5 +380,19 @@ void Executor::RunPreparedContext( ...@@ -378,5 +380,19 @@ void Executor::RunPreparedContext(
} }
} }
void Executor::EnableMKLDNN(const ProgramDesc& program) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(3) << "use_mkldnn=True";
for (size_t bid = 0; bid < program.Size(); ++bid) {
auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid);
for (auto* op : block->AllOps()) {
if (op->HasAttr("use_mkldnn")) {
op->SetAttr("use_mkldnn", true);
}
}
}
#endif
}
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -81,6 +81,8 @@ class Executor { ...@@ -81,6 +81,8 @@ class Executor {
const std::string& feed_holder_name = "feed", const std::string& feed_holder_name = "feed",
const std::string& fetch_holder_name = "fetch"); const std::string& fetch_holder_name = "fetch");
void EnableMKLDNN(const ProgramDesc& program);
private: private:
const platform::Place place_; const platform::Place place_;
}; };
......
...@@ -21,7 +21,6 @@ DEFINE_string(fp16_dirname, "", "Directory of the float16 inference model."); ...@@ -21,7 +21,6 @@ DEFINE_string(fp16_dirname, "", "Directory of the float16 inference model.");
DEFINE_int32(batch_size, 1, "Batch size of input data"); DEFINE_int32(batch_size, 1, "Batch size of input data");
DEFINE_int32(repeat, 1, "Running the inference program repeat times"); DEFINE_int32(repeat, 1, "Running the inference program repeat times");
DEFINE_bool(skip_cpu, false, "Skip the cpu test"); DEFINE_bool(skip_cpu, false, "Skip the cpu test");
DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference");
TEST(inference, image_classification) { TEST(inference, image_classification) {
if (FLAGS_dirname.empty() || FLAGS_batch_size < 1 || FLAGS_repeat < 1) { if (FLAGS_dirname.empty() || FLAGS_batch_size < 1 || FLAGS_repeat < 1) {
...@@ -59,10 +58,8 @@ TEST(inference, image_classification) { ...@@ -59,10 +58,8 @@ TEST(inference, image_classification) {
// Run inference on CPU // Run inference on CPU
LOG(INFO) << "--- CPU Runs: ---"; LOG(INFO) << "--- CPU Runs: ---";
LOG(INFO) << "Batch size is " << FLAGS_batch_size; LOG(INFO) << "Batch size is " << FLAGS_batch_size;
LOG(INFO) << "FLAGS_use_mkldnn: " << FLAGS_use_mkldnn;
TestInference<paddle::platform::CPUPlace, false, true>( TestInference<paddle::platform::CPUPlace, false, true>(
dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined, dirname, cpu_feeds, cpu_fetchs1, FLAGS_repeat, is_combined);
FLAGS_use_mkldnn);
LOG(INFO) << output1.dims(); LOG(INFO) << output1.dims();
} }
......
...@@ -27,7 +27,6 @@ limitations under the License. */ ...@@ -27,7 +27,6 @@ limitations under the License. */
DEFINE_string(model_path, "", "Directory of the inference model."); DEFINE_string(model_path, "", "Directory of the inference model.");
DEFINE_string(data_file, "", "File of input index data."); DEFINE_string(data_file, "", "File of input index data.");
DEFINE_int32(repeat, 100, "Running the inference program repeat times"); DEFINE_int32(repeat, 100, "Running the inference program repeat times");
DEFINE_bool(use_mkldnn, false, "Use MKLDNN to run inference");
DEFINE_bool(prepare_vars, true, "Prepare variables before executor"); DEFINE_bool(prepare_vars, true, "Prepare variables before executor");
DEFINE_int32(num_threads, 1, "Number of threads should be used"); DEFINE_int32(num_threads, 1, "Number of threads should be used");
...@@ -190,9 +189,6 @@ TEST(inference, nlp) { ...@@ -190,9 +189,6 @@ TEST(inference, nlp) {
std::unique_ptr<paddle::framework::ProgramDesc> inference_program; std::unique_ptr<paddle::framework::ProgramDesc> inference_program;
inference_program = InitProgram(&executor, scope.get(), FLAGS_model_path, inference_program = InitProgram(&executor, scope.get(), FLAGS_model_path,
/*model combined*/ false); /*model combined*/ false);
if (FLAGS_use_mkldnn) {
EnableMKLDNN(inference_program);
}
// always prepare context // always prepare context
std::unique_ptr<paddle::framework::ExecutorPrepareContext> ctx; std::unique_ptr<paddle::framework::ExecutorPrepareContext> ctx;
ctx = executor.Prepare(*inference_program, 0); ctx = executor.Prepare(*inference_program, 0);
......
...@@ -22,6 +22,8 @@ limitations under the License. */ ...@@ -22,6 +22,8 @@ limitations under the License. */
#include "paddle/fluid/inference/io.h" #include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
DECLARE_bool(use_mkldnn);
template <typename T> template <typename T>
void SetupTensor(paddle::framework::LoDTensor* input, void SetupTensor(paddle::framework::LoDTensor* input,
paddle::framework::DDim dims, T lower, T upper) { paddle::framework::DDim dims, T lower, T upper) {
...@@ -133,24 +135,11 @@ std::vector<std::vector<int64_t>> GetFeedTargetShapes( ...@@ -133,24 +135,11 @@ std::vector<std::vector<int64_t>> GetFeedTargetShapes(
return feed_target_shapes; return feed_target_shapes;
} }
void EnableMKLDNN(
const std::unique_ptr<paddle::framework::ProgramDesc>& program) {
for (size_t bid = 0; bid < program->Size(); ++bid) {
auto* block = program->MutableBlock(bid);
for (auto* op : block->AllOps()) {
if (op->HasAttr("use_mkldnn")) {
op->SetAttr("use_mkldnn", true);
}
}
}
}
template <typename Place, bool CreateVars = true, bool PrepareContext = false> template <typename Place, bool CreateVars = true, bool PrepareContext = false>
void TestInference(const std::string& dirname, void TestInference(const std::string& dirname,
const std::vector<paddle::framework::LoDTensor*>& cpu_feeds, const std::vector<paddle::framework::LoDTensor*>& cpu_feeds,
const std::vector<paddle::framework::LoDTensor*>& cpu_fetchs, const std::vector<paddle::framework::LoDTensor*>& cpu_fetchs,
const int repeat = 1, const bool is_combined = false, const int repeat = 1, const bool is_combined = false) {
const bool use_mkldnn = false) {
// 1. Define place, executor, scope // 1. Define place, executor, scope
auto place = Place(); auto place = Place();
auto executor = paddle::framework::Executor(place); auto executor = paddle::framework::Executor(place);
...@@ -182,9 +171,6 @@ void TestInference(const std::string& dirname, ...@@ -182,9 +171,6 @@ void TestInference(const std::string& dirname,
"init_program", "init_program",
paddle::platform::DeviceContextPool::Instance().Get(place)); paddle::platform::DeviceContextPool::Instance().Get(place));
inference_program = InitProgram(&executor, scope, dirname, is_combined); inference_program = InitProgram(&executor, scope, dirname, is_combined);
if (use_mkldnn) {
EnableMKLDNN(inference_program);
}
} }
// Disable the profiler and print the timing information // Disable the profiler and print the timing information
paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault, paddle::platform::DisableProfiler(paddle::platform::EventSortingKey::kDefault,
...@@ -210,7 +196,10 @@ void TestInference(const std::string& dirname, ...@@ -210,7 +196,10 @@ void TestInference(const std::string& dirname,
fetch_targets[fetch_target_names[i]] = cpu_fetchs[i]; fetch_targets[fetch_target_names[i]] = cpu_fetchs[i];
} }
// 6. Run the inference program // 6. If export Flags_use_mkldnn=True, use mkldnn related ops.
if (FLAGS_use_mkldnn) executor.EnableMKLDNN(*inference_program);
// 7. Run the inference program
{ {
if (!CreateVars) { if (!CreateVars) {
// If users don't want to create and destroy variables every time they // If users don't want to create and destroy variables every time they
......
...@@ -12,24 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,24 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/arg_max_op.h" #include "paddle/fluid/operators/arg_min_max_op_base.h"
REGISTER_OPERATOR(arg_max, paddle::operators::ArgMaxOp, REGISTER_OPERATOR(arg_max, paddle::operators::ArgMinMaxOp,
paddle::operators::ArgMaxOpMaker, paddle::operators::ArgMaxOpMaker,
paddle::framework::EmptyGradOpMaker); paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
arg_max, paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext, arg_max,
float, int64_t>, paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext, float>,
paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext, double, paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext, double>,
paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext,
int64_t>, int64_t>,
paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext, int64_t, paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext,
int64_t>, int32_t>,
paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext, int32_t, paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext,
int64_t>, int16_t>,
paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext, int16_t, paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext, size_t>,
int64_t>, paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext,
paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext, size_t, uint8_t>);
int64_t>,
paddle::operators::ArgMaxKernel<paddle::platform::CPUDeviceContext, uint8_t,
int64_t>);
...@@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/arg_max_op.h" #include "paddle/fluid/operators/arg_min_max_op_base.h"
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
arg_max, arg_max,
paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext, float, paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext, float>,
int64_t>, paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext,
paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext, double, double>,
paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext,
int64_t>, int64_t>,
paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext, paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext,
int64_t, int64_t>, int32_t>,
paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext, paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext,
int32_t, int64_t>, int16_t>,
paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext, paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext,
int16_t, int64_t>, size_t>,
paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext, size_t,
int64_t>,
paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext, paddle::operators::ArgMaxKernel<paddle::platform::CUDADeviceContext,
uint8_t, int64_t>); uint8_t>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/operators/arg_min_max_op_base.h"
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <string>
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/ddim.h"
...@@ -37,9 +38,9 @@ struct ArgMinMaxFunctor {}; ...@@ -37,9 +38,9 @@ struct ArgMinMaxFunctor {};
struct ArgMinMaxFunctor<DeviceContext, T, Tout, Rank, \ struct ArgMinMaxFunctor<DeviceContext, T, Tout, Rank, \
enum_argminmax_value> { \ enum_argminmax_value> { \
void operator()(const DeviceContext& ctx, const framework::LoDTensor& in, \ void operator()(const DeviceContext& ctx, const framework::LoDTensor& in, \
framework::LoDTensor& out, int64_t axis) { \ framework::LoDTensor* out, int64_t axis) { \
auto in_eigen = framework::EigenTensor<T, Rank>::From(in); \ auto in_eigen = framework::EigenTensor<T, Rank>::From(in); \
auto out_eigen = framework::EigenTensor<Tout, Rank - 1>::From(out); \ auto out_eigen = framework::EigenTensor<Tout, Rank - 1>::From(*out); \
out_eigen.device(*(ctx.eigen_device())) = \ out_eigen.device(*(ctx.eigen_device())) = \
in_eigen.eigen_op_type(axis).template cast<Tout>(); \ in_eigen.eigen_op_type(axis).template cast<Tout>(); \
} \ } \
...@@ -62,7 +63,7 @@ class ArgMinMaxKernel : public framework::OpKernel<T> { ...@@ -62,7 +63,7 @@ class ArgMinMaxKernel : public framework::OpKernel<T> {
#define CALL_ARG_MINMAX_FUNCTOR(rank) \ #define CALL_ARG_MINMAX_FUNCTOR(rank) \
ArgMinMaxFunctor<DeviceContext, T, Tout, rank, EnumArgMinMaxValue> \ ArgMinMaxFunctor<DeviceContext, T, Tout, rank, EnumArgMinMaxValue> \
functor##rank; \ functor##rank; \
functor##rank(dev_ctx, x, out, axis) functor##rank(dev_ctx, x, &out, axis)
switch (x.dims().size()) { switch (x.dims().size()) {
case 1: case 1:
...@@ -89,19 +90,20 @@ class ArgMinMaxKernel : public framework::OpKernel<T> { ...@@ -89,19 +90,20 @@ class ArgMinMaxKernel : public framework::OpKernel<T> {
"than 6.", "than 6.",
(EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax")); (EnumArgMinMaxValue == kArgMin ? "argmin" : "argmax"));
break; break;
#undef CALL_ARG_MINMAX_FUNCTOR
} }
} }
}; };
template <typename DeviceContext, typename T, typename Tout> template <typename DeviceContext, typename T>
using ArgMinKernel = using ArgMinKernel =
ArgMinMaxKernel<DeviceContext, T, Tout, ArgMinMaxType::kArgMin>; ArgMinMaxKernel<DeviceContext, T, int64_t, ArgMinMaxType::kArgMin>;
template <typename DeviceContext, typename T, typename Tout> template <typename DeviceContext, typename T>
using ArgMaxKernel = using ArgMaxKernel =
ArgMinMaxKernel<DeviceContext, T, Tout, ArgMinMaxType::kArgMax>; ArgMinMaxKernel<DeviceContext, T, int64_t, ArgMinMaxType::kArgMax>;
typedef class BaseArgMinMaxOp : public framework::OperatorWithKernel { class ArgMinMaxOp : public framework::OperatorWithKernel {
public: public:
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
...@@ -121,7 +123,7 @@ typedef class BaseArgMinMaxOp : public framework::OperatorWithKernel { ...@@ -121,7 +123,7 @@ typedef class BaseArgMinMaxOp : public framework::OperatorWithKernel {
for (int64_t i = axis + 1; i < x_rank; i++) vec.push_back(x_dims[i]); for (int64_t i = axis + 1; i < x_rank; i++) vec.push_back(x_dims[i]);
ctx->SetOutputDim("Out", framework::make_ddim(vec)); ctx->SetOutputDim("Out", framework::make_ddim(vec));
} }
} ArgMinOp, ArgMaxOp; };
class BaseArgMinMaxOpMaker : public framework::OpProtoAndCheckerMaker { class BaseArgMinMaxOpMaker : public framework::OpProtoAndCheckerMaker {
protected: protected:
...@@ -133,12 +135,13 @@ class BaseArgMinMaxOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -133,12 +135,13 @@ class BaseArgMinMaxOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput("X", "Input tensor."); AddInput("X", "Input tensor.");
AddOutput("Out", "Output tensor."); AddOutput("Out", "Output tensor.");
AddAttr<int64_t>("axis", "The axis in which to compute the arg indics."); AddAttr<int64_t>("axis", "The axis in which to compute the arg indics.");
AddComment(::paddle::string::Sprintf(R"DOC( AddComment(string::Sprintf(R"DOC(
%s Operator. %s Operator.
Computes the indices of the %s elements of the input tensor's element along the provided axis. Computes the indices of the %s elements of the input tensor's element
along the provided axis.
)DOC", )DOC",
OpName(), Name())); OpName(), Name()));
} }
}; };
......
...@@ -12,24 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,24 +12,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/arg_min_op.h" #include "paddle/fluid/operators/arg_min_max_op_base.h"
REGISTER_OPERATOR(arg_min, paddle::operators::ArgMinOp, REGISTER_OPERATOR(arg_min, paddle::operators::ArgMinMaxOp,
paddle::operators::ArgMinOpMaker, paddle::operators::ArgMinOpMaker,
paddle::framework::EmptyGradOpMaker); paddle::framework::EmptyGradOpMaker);
REGISTER_OP_CPU_KERNEL( REGISTER_OP_CPU_KERNEL(
arg_min, paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext, arg_min,
float, int64_t>, paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext, float>,
paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext, double, paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext, double>,
paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext,
int64_t>, int64_t>,
paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext, int64_t, paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext,
int64_t>, int32_t>,
paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext, int32_t, paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext,
int64_t>, int16_t>,
paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext, int16_t, paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext, size_t>,
int64_t>, paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext,
paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext, size_t, uint8_t>);
int64_t>,
paddle::operators::ArgMinKernel<paddle::platform::CPUDeviceContext, uint8_t,
int64_t>);
...@@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,21 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/operators/arg_min_op.h" #include "paddle/fluid/operators/arg_min_max_op_base.h"
REGISTER_OP_CUDA_KERNEL( REGISTER_OP_CUDA_KERNEL(
arg_min, arg_min,
paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext, float, paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext, float>,
int64_t>, paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext,
paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext, double, double>,
paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext,
int64_t>, int64_t>,
paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext, paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext,
int64_t, int64_t>, int32_t>,
paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext, paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext,
int32_t, int64_t>, int16_t>,
paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext, paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext,
int16_t, int64_t>, size_t>,
paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext, size_t,
int64_t>,
paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext, paddle::operators::ArgMinKernel<paddle::platform::CUDADeviceContext,
uint8_t, int64_t>); uint8_t>);
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/operators/arg_min_max_op_base.h"
...@@ -54,18 +54,18 @@ class BatchSizeLikeOp : public framework::OperatorWithKernel { ...@@ -54,18 +54,18 @@ class BatchSizeLikeOp : public framework::OperatorWithKernel {
class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker { class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() final { void Make() final {
AddInput("Input", AddInput(
"(Tensor) Tensor " "Input",
"whose input_dim_idx'th dimension specifies the batch_size"); "Tensor whose input_dim_idx'th dimension specifies the batch_size");
AddOutput("Out", AddOutput("Out",
"(Tensor) Tensor of specified shape will be filled " "Tensor of specified shape will be filled "
"with the specified value"); "with the specified value");
AddAttr<std::vector<int>>("shape", "(vector<int>) The shape of the output"); AddAttr<std::vector<int>>("shape", "The shape of the output");
AddAttr<int>("input_dim_idx", AddAttr<int>("input_dim_idx",
"(int, default 0) The index of input's batch size dimension") "default 0. The index of input's batch size dimension")
.SetDefault(0); .SetDefault(0);
AddAttr<int>("output_dim_idx", AddAttr<int>("output_dim_idx",
"(int, default 0) The index of output's batch size dimension") "default 0. The index of output's batch size dimension")
.SetDefault(0); .SetDefault(0);
Apply(); Apply();
} }
......
...@@ -56,17 +56,16 @@ class BilinearInterpOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -56,17 +56,16 @@ class BilinearInterpOpMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("X", AddInput("X",
"(Tensor) The input tensor of bilinear interpolation, " "The input tensor of bilinear interpolation, "
"This is a 4-D tensor with shape of (N x C x h x w)"); "This is a 4-D tensor with shape of (N x C x h x w)");
AddInput("OutSize", AddInput("OutSize",
"(Tensor) This is a 1-D tensor with two number. " "This is a 1-D tensor with two number. "
"The first number is height and the second number is width.") "The first number is height and the second number is width.")
.AsDispensable(); .AsDispensable();
AddOutput("Out", AddOutput("Out", "The dimension of output is (N x C x out_h x out_w)");
"(Tensor) The dimension of output is (N x C x out_h x out_w]");
AddAttr<int>("out_h", "(int) output height of bilinear interpolation op."); AddAttr<int>("out_h", "output height of bilinear interpolation op.");
AddAttr<int>("out_w", "(int) output width of bilinear interpolation op."); AddAttr<int>("out_w", "output width of bilinear interpolation op.");
AddComment(R"DOC( AddComment(R"DOC(
Bilinear interpolation is an extension of linear interpolation for Bilinear interpolation is an extension of linear interpolation for
interpolating functions of two variables (e.g. H-direction and interpolating functions of two variables (e.g. H-direction and
......
...@@ -64,13 +64,21 @@ bool RequestSendHandler::Handle(const std::string& varname, ...@@ -64,13 +64,21 @@ bool RequestSendHandler::Handle(const std::string& varname,
return false; return false;
} }
if (invar->IsType<framework::SelectedRows>()) { if (invar->IsType<framework::SelectedRows>()) {
rpc_server_->RecordSparseVar(invar); std::unique_lock<std::mutex> lock(mutex_sparse_vars_);
sparse_vars_.push_back(invar);
} }
} }
return true; return true;
} }
void RequestSendHandler::ResetSparseVarRecorder() {
std::unique_lock<std::mutex> lock(mutex_sparse_vars_);
for (auto* var : sparse_vars_) {
var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
}
sparse_vars_.clear();
}
bool RequestGetHandler::Handle(const std::string& varname, bool RequestGetHandler::Handle(const std::string& varname,
framework::Scope* scope, framework::Scope* scope,
framework::Variable* invar, framework::Variable* invar,
......
...@@ -41,6 +41,11 @@ class RequestSendHandler final : public RequestHandler { ...@@ -41,6 +41,11 @@ class RequestSendHandler final : public RequestHandler {
virtual ~RequestSendHandler() {} virtual ~RequestSendHandler() {}
bool Handle(const std::string& varname, framework::Scope* scope, bool Handle(const std::string& varname, framework::Scope* scope,
framework::Variable* var, framework::Variable** outvar) override; framework::Variable* var, framework::Variable** outvar) override;
void ResetSparseVarRecorder();
private:
std::mutex mutex_sparse_vars_;
std::vector<framework::Variable*> sparse_vars_;
}; };
class RequestGetHandler final : public RequestHandler { class RequestGetHandler final : public RequestHandler {
......
...@@ -73,19 +73,6 @@ void RPCServer::ResetBarrierCounter() { ...@@ -73,19 +73,6 @@ void RPCServer::ResetBarrierCounter() {
t.second = 0; t.second = 0;
} }
} }
void RPCServer::RecordSparseVar(framework::Variable* sparse_var) {
std::unique_lock<std::mutex> lock(mutex_sparse_var_recorder_);
sparse_vars_.push_back(sparse_var);
}
void RPCServer::ResetSparseVarsRecorder() {
VLOG(3) << "RPCServer reset sparse vars recorder.";
std::unique_lock<std::mutex> lock(mutex_sparse_var_recorder_);
for (auto* var : sparse_vars_) {
var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
}
sparse_vars_.clear();
}
void RPCServer::RegisterRPC(const std::string& rpc_name, void RPCServer::RegisterRPC(const std::string& rpc_name,
RequestHandler* handler, int thread_num) { RequestHandler* handler, int thread_num) {
......
...@@ -62,8 +62,6 @@ class RPCServer { ...@@ -62,8 +62,6 @@ class RPCServer {
void IncreaseBatchBarrier(const std::string rpc_name); void IncreaseBatchBarrier(const std::string rpc_name);
void ResetBarrierCounter(); void ResetBarrierCounter();
void RecordSparseVar(framework::Variable* sparse_var);
void ResetSparseVarsRecorder();
protected: protected:
virtual void ShutDownImpl() = 0; virtual void ShutDownImpl() = 0;
...@@ -77,9 +75,6 @@ class RPCServer { ...@@ -77,9 +75,6 @@ class RPCServer {
std::atomic<int> cur_cond_; std::atomic<int> cur_cond_;
std::condition_variable rpc_cond_; std::condition_variable rpc_cond_;
std::vector<framework::Variable*> sparse_vars_;
std::mutex mutex_sparse_var_recorder_;
protected: protected:
std::string bind_address_; std::string bind_address_;
std::atomic<int> exit_flag_; std::atomic<int> exit_flag_;
......
...@@ -32,16 +32,16 @@ class FillConstantBatchSizeLikeOp : public BatchSizeLikeOp { ...@@ -32,16 +32,16 @@ class FillConstantBatchSizeLikeOp : public BatchSizeLikeOp {
class FillConstantBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker { class FillConstantBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
protected: protected:
void Apply() override { void Apply() override {
AddAttr<int>("dtype", AddAttr<int>(
"(int, default 5 (FP32)) " "dtype",
"Output data type") "It could be numpy.dtype. Output data type. Default is float32")
.SetDefault(framework::proto::VarType::FP32); .SetDefault(framework::proto::VarType::FP32);
AddAttr<float>("value", "(float, default 0) The value to be filled") AddAttr<float>("value", "default 0. The value to be filled")
.SetDefault(0.0f); .SetDefault(0.0f);
AddComment(R"DOC( AddComment(R"DOC(
FillConstantBatchSizeLike Operator. This function creates a tensor of specified *shape*, *dtype* and batch size,
and initializes this with a constant supplied in *value*. The batch size is
Fill up a variable with specified constant value. obtained from the `input` tensor.
)DOC"); )DOC");
} }
......
...@@ -43,7 +43,8 @@ TEST(Gather, GatherData) { ...@@ -43,7 +43,8 @@ TEST(Gather, GatherData) {
auto* cpu_place = new paddle::platform::CPUPlace(); auto* cpu_place = new paddle::platform::CPUPlace();
paddle::platform::CPUDeviceContext ctx(*cpu_place); paddle::platform::CPUDeviceContext ctx(*cpu_place);
paddle::operators::CPUGather<int>(ctx, *src, *index, output); paddle::operators::CPUGather<int>(ctx, *src, *index, output);
delete cpu_place;
cpu_place = NULL;
for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4); for (int i = 0; i < 4; ++i) EXPECT_EQ(p_output[i], i + 4);
for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4); for (int i = 4; i < 8; ++i) EXPECT_EQ(p_output[i], i - 4);
......
...@@ -67,8 +67,6 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -67,8 +67,6 @@ class LinearChainCRFOpMaker : public framework::OpProtoAndCheckerMaker {
"mini-batch. Note: S is equal to the sequence number in a mini-batch. " "mini-batch. Note: S is equal to the sequence number in a mini-batch. "
"The output is no longer a LoDTensor."); "The output is no longer a LoDTensor.");
AddComment(R"DOC( AddComment(R"DOC(
LinearChainCRF Operator.
Conditional Random Field defines an undirected probabilistic graph with nodes Conditional Random Field defines an undirected probabilistic graph with nodes
denoting random variables and edges denoting dependencies between these denoting random variables and edges denoting dependencies between these
variables. CRF learns the conditional probability $P(Y|X)$, where variables. CRF learns the conditional probability $P(Y|X)$, where
......
...@@ -146,7 +146,9 @@ void ListenAndServOp::RunSyncLoop(framework::Executor *executor, ...@@ -146,7 +146,9 @@ void ListenAndServOp::RunSyncLoop(framework::Executor *executor,
rpc_service_->SetCond(detail::kRequestGet); rpc_service_->SetCond(detail::kRequestGet);
rpc_service_->WaitBarrier(detail::kRequestGet); rpc_service_->WaitBarrier(detail::kRequestGet);
rpc_service_->ResetBarrierCounter(); rpc_service_->ResetBarrierCounter();
rpc_service_->ResetSparseVarsRecorder(); // reset received sparse vars to avoid reuse it in the next mini-batch
dynamic_cast<detail::RequestSendHandler *>(request_send_handler_.get())
->ResetSparseVarRecorder();
} // while(true) } // while(true)
} }
......
...@@ -74,25 +74,18 @@ class LoadOp : public framework::OperatorBase { ...@@ -74,25 +74,18 @@ class LoadOp : public framework::OperatorBase {
class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker { class LoadOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddOutput("Out", "(Tensor) The tensor need to be loaded"); AddOutput("Out", "The tensor need to be loaded");
AddAttr<bool>( AddAttr<bool>(
"load_as_fp16", "load_as_fp16",
"(boolean, default false)"
"If true, the tensor will be first loaded and then " "If true, the tensor will be first loaded and then "
"converted to float16 data type. Otherwise, the tensor will be " "converted to float16 data type. Otherwise, the tensor will be "
"directly loaded without data type conversion.") "directly loaded without data type conversion. Default is false.")
.SetDefault(false); .SetDefault(false);
AddAttr<std::string>("file_path", AddAttr<std::string>("file_path",
"(string) " R"(Variable will be loaded from "file_path")")
"Variable will be loaded from \"file_path\".")
.AddCustomChecker( .AddCustomChecker(
[](const std::string &path) { return !path.empty(); }); [](const std::string &path) { return !path.empty(); });
AddComment(R"DOC( AddComment("Load operator will load a tensor variable from disk file.");
Load Operator.
Load operator will load a tensor variable from disk file.
)DOC");
} }
}; };
} // namespace operators } // namespace operators
......
...@@ -77,6 +77,8 @@ TEST(math_function, gemm_trans_clbas) { ...@@ -77,6 +77,8 @@ TEST(math_function, gemm_trans_clbas) {
paddle::platform::CPUDeviceContext context(*cpu_place); paddle::platform::CPUDeviceContext context(*cpu_place);
GetBlas<float>(context).GEMM(false, true, m, n, k, 1, input1_ptr, 3, GetBlas<float>(context).GEMM(false, true, m, n, k, 1, input1_ptr, 3,
input2_ptr + 3, 3, 1, input3_ptr + 1, 4); input2_ptr + 3, 3, 1, input3_ptr + 1, 4);
delete cpu_place;
cpu_place = NULL;
EXPECT_EQ(input3_ptr[0], 0); EXPECT_EQ(input3_ptr[0], 0);
EXPECT_EQ(input3_ptr[1], 24); EXPECT_EQ(input3_ptr[1], 24);
......
...@@ -42,10 +42,15 @@ class MaxSeqenceLenOp : public framework::OperatorBase { ...@@ -42,10 +42,15 @@ class MaxSeqenceLenOp : public framework::OperatorBase {
class MaxSeqenceLenOpProtoMaker : public framework::OpProtoAndCheckerMaker { class MaxSeqenceLenOpProtoMaker : public framework::OpProtoAndCheckerMaker {
public: public:
void Make() override { void Make() override {
AddInput("RankTable", "The lod_rank_table."); AddInput("RankTable", "Input variable which is a LoDRankTable object");
AddOutput("Out", "The max sequence length."); AddOutput("Out", "The max sequence length");
AddComment( AddComment(R"DOC(
R"DOC(Calculate the max sequence length through lod_rank_table.)DOC"); Given a LoDRankTable object, this layer returns the max length of
a batch of sequences. In fact, a LoDRankTable object contains a list of
tuples(<sequence index, sequence length>) and the list is already sorted by
sequence length in descending order, so the operator just returns the
sequence length of the first tuple element
)DOC");
} }
}; };
......
...@@ -30,7 +30,7 @@ int main(int argc, char** argv) { ...@@ -30,7 +30,7 @@ int main(int argc, char** argv) {
new_argv.push_back( new_argv.push_back(
strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory")); strdup("--tryfromenv=fraction_of_gpu_memory_to_use,use_pinned_memory"));
#else #else
new_argv.push_back(strdup("--tryfromenv=use_pinned_memory")); new_argv.push_back(strdup("--tryfromenv=use_pinned_memory,use_mkldnn"));
#endif #endif
int new_argc = static_cast<int>(new_argv.size()); int new_argc = static_cast<int>(new_argv.size());
char** new_argv_address = new_argv.data(); char** new_argv_address = new_argv.data();
......
...@@ -26,6 +26,7 @@ from trainer import BeginEpochEvent ...@@ -26,6 +26,7 @@ from trainer import BeginEpochEvent
from trainer import EndEpochEvent from trainer import EndEpochEvent
from trainer import BeginStepEvent from trainer import BeginStepEvent
from trainer import EndStepEvent from trainer import EndStepEvent
from trainer import CheckpointConfig
import inferencer import inferencer
from inferencer import Inferencer from inferencer import Inferencer
...@@ -116,7 +117,7 @@ def __bootstrap__(): ...@@ -116,7 +117,7 @@ def __bootstrap__():
read_env_flags = [ read_env_flags = [
'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir', 'use_pinned_memory', 'check_nan_inf', 'benchmark', 'warpctc_dir',
'eager_delete_scope' 'eager_delete_scope', 'use_mkldnn'
] ]
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
read_env_flags += [ read_env_flags += [
......
...@@ -56,6 +56,8 @@ class Inferencer(object): ...@@ -56,6 +56,8 @@ class Inferencer(object):
else: else:
self.exe = executor.Executor(self.place) self.exe = executor.Executor(self.place)
self.inference_program = self.inference_program.clone(for_test=True)
def infer(self, inputs, return_numpy=True): def infer(self, inputs, return_numpy=True):
""" """
:param inputs: a map of {"input_name": input_var} that will be feed into the inference program :param inputs: a map of {"input_name": input_var} that will be feed into the inference program
......
...@@ -24,7 +24,8 @@ __all__ = [ ...@@ -24,7 +24,8 @@ __all__ = [
'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params', 'save_vars', 'save_params', 'save_persistables', 'load_vars', 'load_params',
'load_persistables', 'save_inference_model', 'load_inference_model', 'load_persistables', 'save_inference_model', 'load_inference_model',
'get_inference_program', 'save_checkpoint', 'load_checkpoint', 'get_inference_program', 'save_checkpoint', 'load_checkpoint',
'clean_checkpoint' 'clean_checkpoint', 'load_persist_vars_without_grad',
'save_persist_vars_without_grad', 'get_latest_checkpoint_serial'
] ]
...@@ -457,95 +458,161 @@ def get_parameter_value_by_name(name, executor, program=None): ...@@ -457,95 +458,161 @@ def get_parameter_value_by_name(name, executor, program=None):
SUCCESS_MARK_FILENAME = "_SUCCESS" SUCCESS_MARK_FILENAME = "_SUCCESS"
CHECKPOINT_PREFIX = "checkpoint" CHECKPOINT_PREFIX = "checkpoint"
MODEL_DIR = "__model__"
TRAINER_PREFIX = "trainer"
CHECKPOINT_SEPARATOR = "_" CHECKPOINT_SEPARATOR = "_"
def save_checkpoint(executor, def save_checkpoint(executor,
checkpoint_dir=None, checkpoint_dir,
max_num_checkpoints=3, trainer_id,
save_interval_secs=600, trainer_args=None,
main_program=None): main_program=None,
max_num_checkpoints=3):
""" """
Save Checkpoint will save persistable LodTensor variables from main_program in checkpoint directory, Save Checkpoint will save persistable LodTensor variables from main_program in checkpoint directory,
the directory named by serial number from 0 to (n -1), save_checkpoint use LRU strategy the directory named by serial number from 0 to (n -1), save_checkpoint use LRU strategy
to keep numbers of checkpoint directory, the numbers of checkpoint directory are max_num_checkpoints at most, to keep numbers of checkpoint directory, the numbers of checkpoint directory are max_num_checkpoints at most,
The interval between two saved checkpoints must greater than save_interval_secs. The interval between two saved checkpoints must greater than save_interval_secs.
:param executor :param executor executor for save the value
:param checkpoint_dir :param checkpoint_dir the checkpoint directory
:param max_num_checkpoints :param trainer_id currect trainer id, if id is equal to 0, the trainer is chief
:param save_interval_secs :param main_program will save all variables in program
:param main_program :param max_num_checkpoints will keep numbers of checkpoint serials not bigger than max_num_checkpoints
""" """
if checkpoint_dir is None: if checkpoint_dir is None:
checkpoint_dir = os.getcwd() raise ValueError("'checkpoint_dir' should not be None")
if trainer_args:
assert isinstance(trainer_args, dict)
if not os.path.isdir(checkpoint_dir): if not os.path.isdir(checkpoint_dir):
os.makedirs(checkpoint_dir) os.makedirs(checkpoint_dir)
serial = _get_lastest_checkpoint_dir(checkpoint_dir) serial = get_latest_checkpoint_serial(checkpoint_dir) + 1
if serial >= 0 and not _interval_secs_exceed( cur_dir = _get_serial_dir(checkpoint_dir, serial)
_get_serial_dir(serial, checkpoint_dir), save_interval_secs):
return
serial += 1 save_trainer_args(cur_dir, trainer_id, trainer_args)
cur_dir = _get_serial_dir(serial, checkpoint_dir)
save_vars( if trainer_id == 0:
executor, save_persist_vars_without_grad(executor, cur_dir, main_program)
dirname=cur_dir,
main_program=main_program, _scroll_delete(checkpoint_dir, max_num_checkpoints)
vars=None,
predicate=_is_checkpoint_var,
filename=None)
_write_success(cur_dir)
_lru_delete(checkpoint_dir, max_num_checkpoints)
def load_checkpoint(executor, checkpoint_dir=None, main_program=None): def load_checkpoint(executor, checkpoint_dir, serial, main_program):
""" """
Load checkpoint from a directory by executor, Load checkpoint from a directory by executor,
it will find the most recent saved checkpoint file and load it auto. it will find the most recent saved checkpoint file and load it auto.
:param executor :param executor executor for load the value
:param checkpoint_dir :param checkpoint_dir the checkpoint directory
:param main_program :param serial the serial folder in checkpoint directory will be load
:param main_program will load all variables in program
""" """
if checkpoint_dir is None: if checkpoint_dir is None:
checkpoint_dir = os.getcwd() raise ValueError("'checkpoint_dir' should not be None")
serial = _get_lastest_checkpoint_dir(checkpoint_dir) if serial is None or serial < 0:
raise ValueError("'serial' should not be None or <0 ")
if serial < 0: if main_program is None:
return raise ValueError('main_program should not be None.')
cur_dir = _get_serial_dir(serial, checkpoint_dir) cur_dir = _get_serial_dir(checkpoint_dir, serial)
load_persist_vars_without_grad(executor, cur_dir, main_program, True)
load_vars(
executor,
dirname=cur_dir,
main_program=main_program,
predicate=_is_checkpoint_var,
filename=None)
def clean_checkpoint(checkpoint_dir, delete_dir=False): def clean_checkpoint(checkpoint_dir, delete_dir=False):
""" """
clean the checkpoint dir, when the train exits normally, the trainer will call clean_checkpoint to delete checkpoint directory saved before. clean the checkpoint dir, when the train exits normally, the trainer will call clean_checkpoint to delete checkpoint directory saved before.
delete_dir only works when the directory is empty, otherwise, OSError is raised. delete_dir only works when the directory is empty, otherwise, OSError is raised.
:param checkpoint_dir
:param delete_dir
""" """
if checkpoint_dir is None: if checkpoint_dir is None:
checkpoint_dir = os.getcwd() raise ValueError("'checkpoint_dir' should not be None")
_lru_delete(checkpoint_dir, max_num_checkpoints=0) _scroll_delete(checkpoint_dir, max_num_checkpoints=0)
if delete_dir and not os.listdir(checkpoint_dir): if delete_dir and not os.listdir(checkpoint_dir):
os.rmdir(checkpoint_dir) os.rmdir(checkpoint_dir)
def _get_serial_dir(serial, checkpoint_dir): def load_persist_vars_without_grad(executor,
serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial) dirname,
return os.path.join(checkpoint_dir, serial_folder) program,
has_model_dir=False):
"""
load_persist_vars_without_grad will load variables from a directory by an executor,
the variable named end with "@GRAD" will not be loaded.
:param executor executor for load the value
:param dirname the checkpoint directory
:param program will load all variables in program
:param has_model_dir if has_model_dir is True, will load variables from sub directory named __model__
"""
if has_model_dir:
dirname = _get_model_dir(dirname)
load_vars(
executor,
dirname=dirname,
main_program=program,
predicate=_is_checkpoint_var,
filename=None)
def save_persist_vars_without_grad(executor, dirname, program):
"""
save_persist_vars_without_grad will save variables to a directory by an executor,
the variable named end with "@GRAD" will not be saved.
:param executor executor for load the value
:param dirname the checkpoint directory
:param program will load all variables in program
"""
cur_dir = _get_model_dir(dirname)
save_vars(
executor,
dirname=cur_dir,
main_program=program,
vars=None,
predicate=_is_checkpoint_var,
filename=None)
_write_success(cur_dir)
def save_trainer_args(dirname, trainer_id, trainer_args):
assert isinstance(trainer_args, dict)
cur_dir = _get_trainer_dir(dirname, trainer_id)
for name, value in trainer_args.iteritems():
args_file = os.path.join(cur_dir, name)
with open(args_file, 'w') as f:
f.write(str(value))
_write_success(cur_dir)
def load_trainer_args(checkpoint_dir, serial, trainer_id, trainer_args):
assert isinstance(trainer_args, list)
cur_dir = _get_serial_dir(checkpoint_dir, serial)
cur_dir = _get_trainer_dir(cur_dir, trainer_id)
ret_values = []
for arg in trainer_args:
cur_file = os.path.join(cur_dir, arg)
with open(cur_file, 'r') as f:
contents = f.read()
ret_values.append(contents.strip())
return ret_values
def _is_checkpoint_var(var): def _is_checkpoint_var(var):
...@@ -559,36 +626,74 @@ def _is_checkpoint_var(var): ...@@ -559,36 +626,74 @@ def _is_checkpoint_var(var):
var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \
var.desc.type() == core.VarDesc.VarType.RAW: var.desc.type() == core.VarDesc.VarType.RAW:
return False return False
# @GRAD are named for gradient variables, checkpoint will not save it.
if "@GRAD" in var.name:
return False
# .trainer_ are named for distribute train variables, checkpoint will not save it.
if ".trainer_" in var.name:
return False
if var.name.endswith("@GRAD"): # .block is named for distribute train variables, checkpoint will not save it.
if ".block" in var.name:
return False return False
return var.persistable return var.persistable
def _interval_secs_exceed(dirname, save_interval_secs): def _get_dir_serial(dirname):
dir_time = os.path.getmtime(dirname) _, serial = dirname.split(CHECKPOINT_SEPARATOR)
if save_interval_secs > (time.time() - dir_time):
return False try:
return True serial_num = int(serial)
except ValueError:
serial_num = -1
return serial_num
def _get_serial_dir(dirname, serial):
serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial)
serial_dir = os.path.join(dirname, serial_folder)
if not os.path.isdir(serial_dir):
os.makedirs(serial_dir)
return serial_dir
def _get_model_dir(dirname):
model_dir = os.path.join(dirname, MODEL_DIR)
def _lru_delete(dirname, max_num_checkpoints=3): if not os.path.isdir(model_dir):
os.makedirs(model_dir)
return model_dir
def _get_trainer_dir(dirname, trainer_id):
trainer_folder = TRAINER_PREFIX + CHECKPOINT_SEPARATOR + str(trainer_id)
trainer_dir = os.path.join(dirname, trainer_folder)
if not os.path.isdir(trainer_dir):
os.makedirs(trainer_dir)
return trainer_dir
def _scroll_delete(dirname, max_num_checkpoints=3):
dirs = os.listdir(dirname) dirs = os.listdir(dirname)
serials = [] serial_map = {}
for serial in dirs: for serial in dirs:
try: serial_num = _get_dir_serial(serial)
serials.append(int(serial)) serial_map[serial_num] = serial
except ValueError:
continue
if len(serials) <= max_num_checkpoints: if len(serial_map.keys()) <= max_num_checkpoints:
return return
serials = serial_map.keys()
serials.sort(reverse=True) serials.sort(reverse=True)
serials = serials[max_num_checkpoints:] serials = serials[max_num_checkpoints:]
for serial in serials: for serial in serials:
cur_dir = os.path.join(dirname, str(serial)) cur_dir = _get_serial_dir(dirname, serial)
shutil.rmtree(cur_dir) shutil.rmtree(cur_dir)
...@@ -604,33 +709,30 @@ def _write_success(dirname): ...@@ -604,33 +709,30 @@ def _write_success(dirname):
f.write(now) f.write(now)
def _get_lastest_checkpoint_dir(checkpoint_dir): def get_latest_checkpoint_serial(checkpoint_dir):
""" """
get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory
:param checkpoint_dir :param checkpoint_dir
""" """
if not checkpoint_dir.strip(): if not checkpoint_dir:
return -1 return -1
def has_success(checkpoint_dir, cur_dir): def has_success(checkpoint_dir, cur_dir):
""" """
is _SUCCESS in this dir is _SUCCESS in this dir
""" """
_, serial = cur_dir.split(CHECKPOINT_SEPARATOR)
try:
int(serial)
except ValueError:
return -1
if not os.path.isdir(os.path.join(checkpoint_dir, cur_dir)): serial = _get_dir_serial(cur_dir)
if serial == -1 or not os.path.isdir(
os.path.join(checkpoint_dir, cur_dir)):
return -1 return -1
success_path = os.path.join( success_path = os.path.join(
_get_serial_dir(serial, checkpoint_dir), SUCCESS_MARK_FILENAME) _get_serial_dir(checkpoint_dir, serial), MODEL_DIR,
SUCCESS_MARK_FILENAME)
if os.path.isfile(success_path): if os.path.isfile(success_path):
return int(serial) return serial
if not os.path.isdir(checkpoint_dir): if not os.path.isdir(checkpoint_dir):
return -1 return -1
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import contextlib import contextlib
from layer_function_generator import autodoc from layer_function_generator import autodoc, templatedoc
from tensor import assign, fill_constant from tensor import assign, fill_constant
from .. import core from .. import core
from ..framework import Program, Variable, Operator from ..framework import Program, Variable, Operator
...@@ -721,26 +721,22 @@ def lod_rank_table(x, level=0): ...@@ -721,26 +721,22 @@ def lod_rank_table(x, level=0):
return table return table
@templatedoc()
def max_sequence_len(rank_table): def max_sequence_len(rank_table):
"""Max Sequence Len Operator. Given a LoDRankTable object, this layer """
returns the max length of a batch of sequences. In fact, a LoDRankTable ${comment}
object contains a list of tuples(<sequence index, sequence length>) and
the list is already sorted by sequence length in descending order, so the >>> import paddle.fluid as fluid
operator just returns the sequence length of the first tuple element. >>> x = fluid.layers.data(name='x', shape=[10], dtype='float32',
>>> lod_level=1)
>>> rank_table = layers.lod_rank_table(x=x, level=0)
>>> max_seq_len = layers.max_sequence_len(rank_table)
Args: Args:
rank_table (Variable): Input variable which is a LoDRankTable object. rank_table(${rank_table_type}): ${rank_table_comment}.
Returns: Returns:
Variable: The max length of sequence. ${out_comment}.
Examples:
.. code-block:: python
x = fluid.layers.data(name='x', shape=[10],
dtype='float32', lod_level=1)
rank_table = layers.lod_rank_table(x=x, level=0)
max_seq_len = layers.max_sequence_len(rank_table)
""" """
helper = LayerHelper("max_seqence_len", **locals()) helper = LayerHelper("max_seqence_len", **locals())
res = helper.create_tmp_variable(dtype="int64") res = helper.create_tmp_variable(dtype="int64")
......
...@@ -19,11 +19,12 @@ from ..unique_name import generate as unique_name ...@@ -19,11 +19,12 @@ from ..unique_name import generate as unique_name
from control_flow import BlockGuard from control_flow import BlockGuard
from ..layer_helper import LayerHelper from ..layer_helper import LayerHelper
from ..executor import global_scope from ..executor import global_scope
from layer_function_generator import generate_layer_fn, templatedoc
__all__ = [ __all__ = [
'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file', 'data', 'BlockGuardServ', 'ListenAndServ', 'Send', 'open_recordio_file',
'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer', 'open_files', 'read_file', 'shuffle', 'batch', 'double_buffer',
'random_data_generator', 'Preprocessor' 'random_data_generator', 'Preprocessor', 'load'
] ]
...@@ -662,3 +663,29 @@ class Preprocessor(object): ...@@ -662,3 +663,29 @@ class Preprocessor(object):
"sink_var_names": self.sink_var_names "sink_var_names": self.sink_var_names
}) })
return monkey_patch_reader_methods(self.reader) return monkey_patch_reader_methods(self.reader)
@templatedoc()
def load(out, file_path, load_as_fp16=None):
"""
${comment}
>>> import paddle.fluid as fluid
>>> tmp_tensor = fluid.layers.create_tensor(dtype='float32')
>>> fluid.layers.load(tmp_tensor, "./tmp_tensor.bin")
Args:
out(${out_type}): ${out_comment}.
file_path(${file_path_type}): ${file_path_comment}.
load_as_fp16(${load_as_fp16_type}): ${load_as_fp16_comment}.
Returns:
None
"""
helper = LayerHelper("load", **locals())
attrs = {"file_path": file_path}
if load_as_fp16 is not None:
attrs['load_as_fp16'] = load_as_fp16
helper.append_op(type="load", inputs={}, output={"Out": out}, args=attrs)
...@@ -224,7 +224,10 @@ def autodoc(comment=""): ...@@ -224,7 +224,10 @@ def autodoc(comment=""):
return __impl__ return __impl__
def templatedoc(): _inline_math_single_dollar = re.compile(r"\$([^\$]+)\$")
def templatedoc(op_type=None):
""" """
Decorator of layer function. It will use the docstring from the layer Decorator of layer function. It will use the docstring from the layer
function as the template. The template arguments are: function as the template. The template arguments are:
...@@ -238,32 +241,47 @@ def templatedoc(): ...@@ -238,32 +241,47 @@ def templatedoc():
Decorated function. Decorated function.
""" """
def trim_ending_dot(msg):
return msg.rstrip('.')
def escape_inline_math(msg):
return _inline_math_single_dollar.sub(repl=r':math:`\1`', string=msg)
def __impl__(func): def __impl__(func):
op_proto = OpProtoHolder.instance().get_op_proto(func.__name__) if op_type is None:
op_type_name = func.__name__
else:
op_type_name = op_type
op_proto = OpProtoHolder.instance().get_op_proto(op_type_name)
tmpl = string.Template(func.__doc__) tmpl = string.Template(func.__doc__)
comment_lines = op_proto.comment.split("\n") comment_lines = op_proto.comment.split("\n")
comment = "" comment = ""
for line in comment_lines: for line in comment_lines:
line = line.lstrip() line = line.strip()
comment += line if len(line) != 0:
comment += "\n" comment += escape_inline_math(line)
comment += " "
args = {"comment": comment} elif len(comment) != 0:
comment += "\n \n "
args = {"comment": trim_ending_dot(comment)}
for each_input in op_proto.inputs: for each_input in op_proto.inputs:
input_name = _convert_(each_input.name) input_name = _convert_(each_input.name)
args["{0}_comment".format(input_name)] = each_input.comment args["{0}_comment".format(input_name)] = trim_ending_dot(
each_input.comment)
args["{0}_type".format(input_name)] = "Variable" args["{0}_type".format(input_name)] = "Variable"
for each_attr in op_proto.attrs: for each_attr in op_proto.attrs:
input_name = _convert_(each_attr.name) input_name = _convert_(each_attr.name)
args["{0}_comment".format(input_name)] = each_attr.comment args["{0}_comment".format(input_name)] = trim_ending_dot(
each_attr.comment)
args["{0}_type".format(input_name)] = _type_to_str_(each_attr.type) args["{0}_type".format(input_name)] = _type_to_str_(each_attr.type)
for each_opt in op_proto.outputs: for each_opt in op_proto.outputs:
output_name = _convert_(each_opt.name) output_name = _convert_(each_opt.name)
args["{0}_comment".format(output_name)] = each_opt.comment args["{0}_comment".format(output_name)] = trim_ending_dot(
each_opt.comment)
args["{0}_type".format(output_name)] = "Variable" args["{0}_type".format(output_name)] = "Variable"
func.__doc__ = tmpl.substitute(args) func.__doc__ = tmpl.substitute(args)
return func return func
......
...@@ -11,6 +11,14 @@ ...@@ -11,6 +11,14 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
"""
When training a model, it's often useful to decay the
learning rate during training process, this is called
learning_rate_decay. There are many strategies to do
this, this module will provide some classical method.
User can also implement their own learning_rate_decay
strategy according to this module.
"""
import control_flow import control_flow
import nn import nn
...@@ -22,14 +30,6 @@ __all__ = [ ...@@ -22,14 +30,6 @@ __all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay' 'polynomial_decay', 'piecewise_decay', 'noam_decay'
] ]
"""
When training a model, it's often useful to decay the
learning rate during training process, this is called
learning_rate_decay. There are many strategies to do
this, this module will provide some classical method.
User can also implement their own learning_rate_decay
strategy according to this module.
"""
def _decay_step_counter(begin=0): def _decay_step_counter(begin=0):
...@@ -41,18 +41,20 @@ def _decay_step_counter(begin=0): ...@@ -41,18 +41,20 @@ def _decay_step_counter(begin=0):
def noam_decay(d_model, warmup_steps): def noam_decay(d_model, warmup_steps):
"""Apply decay to learning rate. """
```python Noam decay method. The numpy implementation of noam decay as follows.
lr_value = np.power(d_model, -0.5) * np.min([
np.power(current_steps, -0.5), >>> import numpy as np
np.power(warmup_steps, -1.5) * current_steps >>> lr_value = np.power(d_model, -0.5) * np.min([
]) >>> np.power(current_steps, -0.5),
``` >>> np.power(warmup_steps, -1.5) * current_steps])
Please reference `attention is all you need
<https://arxiv.org/pdf/1706.03762.pdf>`_.
Args: Args:
d_model(Variable): The dimensionality of input and output of model. d_model(Variable): The dimensionality of input and output of model.
Reference: attention is all you need
https://arxiv.org/pdf/1706.03762.pdf
warmup_steps(Variable): A super parameter. warmup_steps(Variable): A super parameter.
Returns: Returns:
......
...@@ -64,10 +64,6 @@ def auc(input, label, curve='ROC', num_thresholds=200): ...@@ -64,10 +64,6 @@ def auc(input, label, curve='ROC', num_thresholds=200):
topk_indices = helper.create_tmp_variable(dtype="int64") topk_indices = helper.create_tmp_variable(dtype="int64")
topk_out, topk_indices = nn.topk(input, k=k) topk_out, topk_indices = nn.topk(input, k=k)
auc_out = helper.create_tmp_variable(dtype="float32") auc_out = helper.create_tmp_variable(dtype="float32")
if correct is None:
correct = helper.create_tmp_variable(dtype="int64")
if total is None:
total = helper.create_tmp_variable(dtype="int64")
helper.append_op( helper.append_op(
type="accuracy", type="accuracy",
inputs={ inputs={
......
...@@ -4037,18 +4037,25 @@ def image_resize(input, ...@@ -4037,18 +4037,25 @@ def image_resize(input,
return out return out
@templatedoc(op_type="bilinear_interp")
def resize_bilinear(input, out_shape=None, scale=None, name=None): def resize_bilinear(input, out_shape=None, scale=None, name=None):
""" """
This is an alias of layer 'image_resize' with bilinear interpolation. ${comment}
Args:
input(${x_type}): ${x_comment}.
out_shape(${out_size_type}): ${out_size_comment}.
The mathematical meaning of resize bilinear layer is scale(float|None): The multiplier for the input height or width. At
Bilinear interpolation. least one of out_shape or scale must be set. And out_shape has
Bilinear interpolation is an extension of linear interpolation for a higher priority than scale. Default: None.
interpolating functions of two variables (e.g. H-direction and
W-direction in this layer) on a rectilinear 2D grid. name(str|None): The output variable name.
Returns:
For details, please refer to Wikipedia: ${out_comment}.
https://en.wikipedia.org/wiki/Bilinear_interpolation
""" """
return image_resize(input, out_shape, scale, name, 'BILINEAR') return image_resize(input, out_shape, scale, name, 'BILINEAR')
......
...@@ -73,6 +73,7 @@ __all__ = [ ...@@ -73,6 +73,7 @@ __all__ = [
'sum', 'sum',
'polygon_box_transform', 'polygon_box_transform',
'shape', 'shape',
'maxout',
] + __activations__ ] + __activations__
for _OP in set(__all__): for _OP in set(__all__):
......
...@@ -18,6 +18,7 @@ from ..framework import convert_np_dtype_to_dtype_ ...@@ -18,6 +18,7 @@ from ..framework import convert_np_dtype_to_dtype_
from ..framework import Variable from ..framework import Variable
from ..initializer import Constant, force_init_on_cpu from ..initializer import Constant, force_init_on_cpu
from ..core import VarDesc from ..core import VarDesc
from layer_function_generator import templatedoc
import numpy import numpy
__all__ = [ __all__ = [
...@@ -268,6 +269,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None): ...@@ -268,6 +269,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
return out return out
@templatedoc()
def fill_constant_batch_size_like(input, def fill_constant_batch_size_like(input,
shape, shape,
dtype, dtype,
...@@ -275,30 +277,28 @@ def fill_constant_batch_size_like(input, ...@@ -275,30 +277,28 @@ def fill_constant_batch_size_like(input,
input_dim_idx=0, input_dim_idx=0,
output_dim_idx=0): output_dim_idx=0):
""" """
**fill_constant_batch_size_like** ${comment}
This function creates a tensor of specified *shape*, *dtype* and batch size,
and initializes this with a constant supplied in *value*. The batch size is
obtained from the `input` tensor.
It also sets *stop_gradient* to True. It also sets *stop_gradient* to True.
>>> data = fluid.layers.fill_constant_batch_size_like(
>>> input=like, shape=[1], value=0, dtype='int64')
Args: Args:
input(Variable): Tensor whose dimensions will be used to get batch size input(${input_type}): ${input_comment}.
shape(tuple|list|None): Shape of output tensor
dtype(np.dtype|core.VarDesc.VarType|str): Data type of output tensor
value(float): Constant value to initialize the output tensor
input_dim_idx(int): Index of input's batch size dimension
output_dim_idx(int): Index of output's batch size dimension
Returns: shape(${shape_type}): ${shape_comment}.
Variable: The tensor variable storing the output
Examples: dtype(${dtype_type}): ${dtype_comment}.
.. code-block:: python
value(${value_type}): ${value_comment}.
data = fluid.layers.fill_constant_batch_size_like( input_dim_idx(${input_dim_idx_type}): ${input_dim_idx_comment}.
input=like, shape=[1], value=0, dtype='int64')
output_dim_idx(${output_dim_idx_type}): ${output_dim_idx_comment}.
Returns:
${out_comment}.
""" """
helper = LayerHelper("fill_constant_batch_size_like", **locals()) helper = LayerHelper("fill_constant_batch_size_like", **locals())
out = helper.create_tmp_variable(dtype=dtype) out = helper.create_tmp_variable(dtype=dtype)
...@@ -501,22 +501,6 @@ def save_combine(x, file_path, overwrite=True): ...@@ -501,22 +501,6 @@ def save_combine(x, file_path, overwrite=True):
"overwrite": overwrite}) "overwrite": overwrite})
def load(out, file_path):
"""
Loads a variable from a given file.
Args:
out(variable): The variable to be read from the disk file.
file_path(str): The path of the disk file.
"""
helper = LayerHelper("load", **locals())
helper.append_op(
type="load",
inputs={},
output={"Out": out},
args={"file_path": file_path})
def load_combine(out, file_path): def load_combine(out, file_path):
""" """
Loads a list of vairables from a single file. Loads a list of vairables from a single file.
......
...@@ -38,7 +38,7 @@ def inference_program(): ...@@ -38,7 +38,7 @@ def inference_program():
return y_predict return y_predict
def linear(): def train_program():
y = fluid.layers.data(name='y', shape=[1], dtype='float32') y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = inference_program() y_predict = inference_program()
...@@ -104,7 +104,7 @@ def main(use_cuda): ...@@ -104,7 +104,7 @@ def main(use_cuda):
# Directory for saving the trained model # Directory for saving the trained model
params_dirname = "fit_a_line.inference.model" params_dirname = "fit_a_line.inference.model"
train(use_cuda, linear, params_dirname) train(use_cuda, train_program, params_dirname)
infer(use_cuda, inference_program, params_dirname) infer(use_cuda, inference_program, params_dirname)
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import unittest
import os
import tempfile
class TestCheckpoint(unittest.TestCase):
def setUp(self):
self.dirname = tempfile.mktemp()
self.max_num_checkpoints = 3
self.epoch_interval = 1
self.step_interval = 1
self.trainer_id = 0
self.chief = self.trainer_id == 0
self.place = fluid.CPUPlace()
self.epoch_id = 100
self.step_id = 20
def test_checkpoint(self):
self.save_checkpoint()
serial = fluid.io.get_latest_checkpoint_serial(self.dirname)
self.assertTrue(serial >= 0)
trainer_args = ["epoch_id", "step_id"]
epoch_id, step_id = fluid.io.load_trainer_args(
self.dirname, serial, self.trainer_id, trainer_args)
self.assertEqual(self.step_id, int(step_id))
self.assertEqual(self.epoch_id, int(epoch_id))
program = fluid.Program()
with fluid.program_guard(program):
exe = fluid.Executor(self.place)
fluid.io.load_checkpoint(exe, self.dirname, serial, program)
fluid.io.clean_checkpoint(self.dirname, delete_dir=True)
self.assertFalse(os.path.isdir(self.dirname))
def save_checkpoint(self):
config = fluid.CheckpointConfig(self.dirname, self.max_num_checkpoints,
self.epoch_interval, self.step_interval)
trainer_args = {}
trainer_args["epoch_id"] = self.epoch_id
trainer_args["step_id"] = self.step_id
program = fluid.Program()
with fluid.program_guard(program):
program.global_block().create_var(
name="scale_0",
psersistable=True,
dtype="float32",
shape=[32, 32])
exe = fluid.Executor(self.place)
for i in xrange(10):
fluid.io.save_checkpoint(exe, config.checkpoint_dir,
self.trainer_id, trainer_args, program,
config.max_num_checkpoints)
if __name__ == '__main__':
unittest.main()
...@@ -30,9 +30,6 @@ class Memory(object): ...@@ -30,9 +30,6 @@ class Memory(object):
assert val.dtype == self.ex.dtype assert val.dtype == self.ex.dtype
self.cur = val self.cur = val
def ex(self):
return self.ex
def next(self): def next(self):
self.ex = self.cur self.ex = self.cur
self.cur = None self.cur = None
......
...@@ -387,6 +387,14 @@ class TestBook(unittest.TestCase): ...@@ -387,6 +387,14 @@ class TestBook(unittest.TestCase):
self.assertIsNotNone(output) self.assertIsNotNone(output)
print(str(program)) print(str(program))
def test_maxout(self):
program = Program()
with program_guard(program):
data = layers.data(name='x', shape=[8, 6, 6], dtype="float32")
output = layers.maxout(x=data, groups=2)
self.assertIsNotNone(output)
print(str(program))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -27,11 +27,8 @@ import parallel_executor ...@@ -27,11 +27,8 @@ import parallel_executor
from transpiler import distribute_transpiler from transpiler import distribute_transpiler
__all__ = [ __all__ = [
'Trainer', 'Trainer', 'BeginEpochEvent', 'EndEpochEvent', 'BeginStepEvent',
'BeginEpochEvent', 'EndStepEvent', 'CheckpointConfig'
'EndEpochEvent',
'BeginStepEvent',
'EndStepEvent',
] ]
...@@ -59,6 +56,35 @@ class EndStepEvent(object): ...@@ -59,6 +56,35 @@ class EndStepEvent(object):
self.metrics = metrics self.metrics = metrics
class CheckpointConfig(object):
def __init__(self,
checkpoint_dir=None,
max_num_checkpoints=3,
epoch_interval=1,
step_interval=10):
if checkpoint_dir is None:
self.checkpoint_dir = os.getcwd()
else:
self.checkpoint_dir = checkpoint_dir
self.max_num_checkpoints = max_num_checkpoints
if epoch_interval < 1:
self.epoch_interval = 1
else:
self.epoch_interval = epoch_interval
if step_interval < 1:
self.step_interval = 10
else:
self.step_interval = step_interval
self.epoch_id = 0
self.step_id = 0
self.load_serial = None
self.is_pserver = False
def check_and_get_place(place): def check_and_get_place(place):
""" """
Check the type of place or get the default place Check the type of place or get the default place
...@@ -99,13 +125,24 @@ class Trainer(object): ...@@ -99,13 +125,24 @@ class Trainer(object):
optimizer_func, optimizer_func,
param_path=None, param_path=None,
place=None, place=None,
parallel=False): parallel=False,
checkpoint_config=None):
self.__stop = False self.__stop = False
self.parallel = parallel self.parallel = parallel
# 1. we need to generate a framework.Program by calling # 1. we need to generate a framework.Program by calling
# program_func. Reference: fluid.program_guard in # program_func. Reference: fluid.program_guard in
# test_word2vec.py # test_word2vec.py
# config for checkpoint
# only chief worker will save variables
self.trainer_id = 0
self.checkpoint_cfg = checkpoint_config
if self.checkpoint_cfg:
assert isinstance(self.checkpoint_cfg, CheckpointConfig)
serial = io.get_latest_checkpoint_serial(
self.checkpoint_cfg.checkpoint_dir)
self.checkpoint_cfg.load_serial = serial if serial >= 0 else None
self.scope = core.Scope() self.scope = core.Scope()
self.startup_program = framework.Program() self.startup_program = framework.Program()
...@@ -115,9 +152,9 @@ class Trainer(object): ...@@ -115,9 +152,9 @@ class Trainer(object):
program_func_outs = train_func() program_func_outs = train_func()
self.train_func_outputs = program_func_outs if isinstance( self.train_func_outputs = program_func_outs if isinstance(
program_func_outs, list) else [program_func_outs] program_func_outs, list) else [program_func_outs]
self.test_program = self.train_program.clone() self.test_program = self.train_program.clone(for_test=True)
# The fisrt element of program_func_outs is loss. # The first element of program_func_outs is loss.
loss = self.train_func_outputs[0] loss = self.train_func_outputs[0]
optimizer = optimizer_func() optimizer = optimizer_func()
...@@ -137,9 +174,25 @@ class Trainer(object): ...@@ -137,9 +174,25 @@ class Trainer(object):
exe = executor.Executor(place) exe = executor.Executor(place)
exe.run(self.startup_program) exe.run(self.startup_program)
if param_path: if self.checkpoint_cfg and self.checkpoint_cfg.load_serial:
with self._prog_and_scope_guard():
exe = executor.Executor(place)
io.load_checkpoint(exe, self.checkpoint_cfg.checkpoint_dir,
self.checkpoint_cfg.load_serial,
self.startup_program)
if not self.checkpoint_cfg.is_pserver:
epoch_id, step_id = io.load_trainer_args(
self.checkpoint_cfg.checkpoint_dir,
self.checkpoint_cfg.load_serial, self.trainer_id,
self._get_checkpoint_load_args())
self.checkpoint_cfg.epoch_id = int(epoch_id)
self.checkpoint_cfg.step_id = int(step_id)
if param_path and os.path.isdir(param_path):
# load params from param_path into scope # load params from param_path into scope
io.load_persistables(exe, dirname=param_path) io.load_persist_vars_without_grad(
exe, dirname=param_path, program=self.startup_program)
def _transpile_nccl2_dist(self): def _transpile_nccl2_dist(self):
# PADDLE_TRAINER_IPS # PADDLE_TRAINER_IPS
...@@ -194,14 +247,18 @@ class Trainer(object): ...@@ -194,14 +247,18 @@ class Trainer(object):
current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port
# the unique trainer id, starting from 0, needed by trainer # the unique trainer id, starting from 0, needed by trainer
# only # only
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
# the role, should be either PSERVER or TRAINER # the role, should be either PSERVER or TRAINER
training_role = os.getenv("PADDLE_TRAINING_ROLE") training_role = os.getenv("PADDLE_TRAINING_ROLE")
with self._prog_and_scope_guard(): with self._prog_and_scope_guard():
t = distribute_transpiler.DistributeTranspiler() t = distribute_transpiler.DistributeTranspiler()
t.transpile( t.transpile(
trainer_id, pservers=pserver_endpoints, trainers=trainers) self.trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER": if training_role == "PSERVER":
if self.checkpoint_cfg:
self.is_pserver = True
self.train_program = t.get_pserver_program(current_endpoint) self.train_program = t.get_pserver_program(current_endpoint)
self.startup_program = t.get_startup_program(current_endpoint, self.startup_program = t.get_startup_program(current_endpoint,
self.train_program) self.train_program)
...@@ -294,11 +351,26 @@ class Trainer(object): ...@@ -294,11 +351,26 @@ class Trainer(object):
self._train_by_any_executor(event_handler, exe, num_epochs, reader) self._train_by_any_executor(event_handler, exe, num_epochs, reader)
def _train_by_any_executor(self, event_handler, exe, num_epochs, reader): def _train_by_any_executor(self, event_handler, exe, num_epochs, reader):
for epoch_id in range(num_epochs): if self.checkpoint_cfg:
epochs = [
epoch_id for epoch_id in range(num_epochs)
if epoch_id >= self.checkpoint_cfg.epoch_id
]
else:
epochs = [epoch_id for epoch_id in range(num_epochs)]
for epoch_id in epochs:
event_handler(BeginEpochEvent(epoch_id)) event_handler(BeginEpochEvent(epoch_id))
for step_id, data in enumerate(reader()): for step_id, data in enumerate(reader()):
if self.__stop: if self.__stop:
if self.checkpoint_cfg:
self._clean_checkpoint()
return return
if self.checkpoint_cfg and self.checkpoint_cfg.load_serial \
and self.checkpoint_cfg.step_id >= step_id and self.checkpoint_cfg.epoch_id == epoch_id:
continue
begin_event = BeginStepEvent(epoch_id, step_id) begin_event = BeginStepEvent(epoch_id, step_id)
event_handler(begin_event) event_handler(begin_event)
if begin_event.fetch_metrics: if begin_event.fetch_metrics:
...@@ -309,8 +381,13 @@ class Trainer(object): ...@@ -309,8 +381,13 @@ class Trainer(object):
]) ])
else: else:
metrics = exe.run(feed=data, fetch_list=[]) metrics = exe.run(feed=data, fetch_list=[])
if self.checkpoint_cfg:
self._save_checkpoint(epoch_id, step_id)
event_handler(EndStepEvent(epoch_id, step_id, metrics)) event_handler(EndStepEvent(epoch_id, step_id, metrics))
event_handler(EndEpochEvent(epoch_id)) event_handler(EndEpochEvent(epoch_id))
if self.checkpoint_cfg:
self._clean_checkpoint()
def _test_by_executor(self, reader, feed_order, fetch_list): def _test_by_executor(self, reader, feed_order, fetch_list):
with executor.scope_guard(self.scope): with executor.scope_guard(self.scope):
...@@ -349,6 +426,38 @@ class Trainer(object): ...@@ -349,6 +426,38 @@ class Trainer(object):
loss_name=self.train_func_outputs[0].name) loss_name=self.train_func_outputs[0].name)
return self._get_parallel_executor() return self._get_parallel_executor()
def _clean_checkpoint(self):
assert self.checkpoint_cfg
io.clean_checkpoint(checkpoint_dir=self.checkpoint_cfg.checkpoint_dir)
def _get_checkpoint_load_args(self):
"""
epoch_id and step_id are runtime arguments, they are not variables, will load them independently.
"""
return ["epoch_id", "step_id"]
def _get_checkpoint_save_args(self, epoch_id, step_id):
"""
epoch_id and step_id are runtime arguments, they are not variables, will save them independently.
"""
trainer_args = {}
trainer_args["epoch_id"] = epoch_id
trainer_args["step_id"] = step_id
return trainer_args
def _save_checkpoint(self, epoch_id, step_id):
assert self.checkpoint_cfg
if epoch_id % self.checkpoint_cfg.epoch_interval == 0 and step_id % self.checkpoint_cfg.step_interval == 0:
exe = executor.Executor(self.place)
io.save_checkpoint(
executor=exe,
checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
trainer_id=self.trainer_id,
trainer_args=self._get_checkpoint_save_args(epoch_id, step_id),
main_program=self.train_program,
max_num_checkpoints=self.checkpoint_cfg.max_num_checkpoints)
def build_feed_var_list(program, feed_order): def build_feed_var_list(program, feed_order):
if not isinstance(program, framework.Program): if not isinstance(program, framework.Program):
......
...@@ -177,6 +177,7 @@ class DistributeTranspiler: ...@@ -177,6 +177,7 @@ class DistributeTranspiler:
dtype=table_grad_var.dtype) dtype=table_grad_var.dtype)
for index in range(len(self.pserver_endpoints)) for index in range(len(self.pserver_endpoints))
] ]
return param_list, grad_list
def _init_splited_vars(self, slice_var_up): def _init_splited_vars(self, slice_var_up):
# update these mappings for further transpile: # update these mappings for further transpile:
...@@ -199,8 +200,8 @@ class DistributeTranspiler: ...@@ -199,8 +200,8 @@ class DistributeTranspiler:
grad_list.append(g) grad_list.append(g)
param_grad_set.add(g.name) param_grad_set.add(g.name)
self._update_dist_lookup_table_vars(param_list, grad_list, param_list, grad_list = self._update_dist_lookup_table_vars(
self.params_grads) param_list, grad_list, self.params_grads)
if slice_var_up: if slice_var_up:
# when we slice var up into blocks, we will slice the var according to # when we slice var up into blocks, we will slice the var according to
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册