提交 b7146d60 编写于 作者: D Dang Qingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into quantize_transpiler_update

......@@ -110,6 +110,20 @@ function(find_fluid_modules TARGET_NAME)
endif()
endfunction(find_fluid_modules)
# find all third_party modules is used for paddle static library
# for reduce the dependency when building the inference libs.
set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY)
function(find_fluid_thirdparties TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "third_party" pos)
if(pos GREATER 1)
get_property(fluid_ GLOBAL PROPERTY FLUID_THIRD_PARTY)
set(fluid_third_partys ${fluid_third_partys} ${TARGET_NAME})
set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY "${fluid_third_partys}")
endif()
endfunction(find_fluid_thirdparties)
function(merge_static_libs TARGET_NAME)
set(libs ${ARGN})
list(REMOVE_DUPLICATES libs)
......@@ -204,18 +218,13 @@ function(merge_static_libs TARGET_NAME)
foreach(lib ${libs})
# Get the file names of the libraries to be merged
#if(NOT $<TARGET_FILE:${lib}> MATCHES "lib.*\\.lib")
# message("library" ${lib})
# set(libfiles ${libfiles} lib$<TARGET_FILE:${lib}>)
#else()
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
#endif()
endforeach()
# windows cmd return error in clean env.
# COMMAND del "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
# msvc will put libarary in directory of "/Release/xxxlib" by default
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.lib ${libfiles}
COMMAND cmake -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}"
COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/lib${TARGET_NAME}.lib ${libfiles}
)
endif(WIN32)
endfunction(merge_static_libs)
......
......@@ -2,6 +2,6 @@
Thanks for reading PaddlePaddle documentation.
Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [Fluiddoc Repo](https://github.com/PaddlePaddle/Paddle) and updated in Fluiddoc Repo.
Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [FluidDoc Repo](https://github.com/PaddlePaddle/FluidDoc) and updated there.
Please turn to Fluiddoc Repo for the latest documentation.
Please turn to FluidDoc Repo for the latest documentation.
......@@ -22,9 +22,6 @@ paddle.fluid.Operator.rename_input ArgSpec(args=['self', 'old_name', 'new_name']
paddle.fluid.Operator.rename_output ArgSpec(args=['self', 'old_name', 'new_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.set_attr ArgSpec(args=['self', 'name', 'val'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.to_string ArgSpec(args=['self', 'throw_on_error'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Parameter.__init__ ArgSpec(args=['self', 'block', 'shape', 'dtype'], varargs=None, keywords='kwargs', defaults=None)
paddle.fluid.Parameter.astype ArgSpec(args=['self', 'dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Parameter.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.program_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
......@@ -35,25 +32,12 @@ paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None,
paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False))
paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.scope_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.Trainer.__init__ ArgSpec(args=['self', 'train_func', 'optimizer_func', 'param_path', 'place', 'parallel', 'checkpoint_config'], varargs=None, keywords=None, defaults=(None, None, False, None))
paddle.fluid.Trainer.save_inference_model ArgSpec(args=['self', 'param_path', 'feeded_var_names', 'target_var_indexes'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.save_params ArgSpec(args=['self', 'param_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.test ArgSpec(args=['self', 'reader', 'feed_order'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.train ArgSpec(args=['self', 'num_epochs', 'event_handler', 'reader', 'feed_order'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.BeginEpochEvent.__init__ ArgSpec(args=['self', 'epoch_id'], varargs=None, keywords=None, defaults=None)
paddle.fluid.EndEpochEvent.__init__ ArgSpec(args=['self', 'epoch_id'], varargs=None, keywords=None, defaults=None)
paddle.fluid.BeginStepEvent.__init__ ArgSpec(args=['self', 'epoch_id', 'step_id'], varargs=None, keywords=None, defaults=None)
paddle.fluid.EndStepEvent.__init__ ArgSpec(args=['self', 'epoch_id', 'step_id', 'metrics'], varargs=None, keywords=None, defaults=None)
paddle.fluid.CheckpointConfig.__init__ ArgSpec(args=['self', 'checkpoint_dir', 'max_num_checkpoints', 'epoch_interval', 'step_interval'], varargs=None, keywords=None, defaults=(None, 3, 1, 10))
paddle.fluid.Inferencer.__init__ ArgSpec(args=['self', 'infer_func', 'param_path', 'place', 'parallel'], varargs=None, keywords=None, defaults=(None, False))
paddle.fluid.Inferencer.infer ArgSpec(args=['self', 'inputs', 'return_numpy'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspilerConfig.__init__
......@@ -160,7 +144,16 @@ paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None
paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None))
paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
paddle.fluid.layers.stanh ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None))
paddle.fluid.layers.hard_sigmoid ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None))
paddle.fluid.layers.swish ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None))
paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.layers.brelu ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None))
paddle.fluid.layers.leaky_relu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None))
paddle.fluid.layers.soft_relu ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None))
paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None))
paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
......@@ -169,6 +162,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
......@@ -233,15 +234,7 @@ paddle.fluid.layers.Print ArgSpec(args=['input', 'first_n', 'message', 'summariz
paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
paddle.fluid.layers.mean ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.mul ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.scale ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.elementwise_add ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.elementwise_div ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.elementwise_sub ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.elementwise_mul ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.elementwise_max ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.elementwise_min ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.elementwise_pow ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.clip ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.clip_by_norm ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
......@@ -256,32 +249,23 @@ paddle.fluid.layers.sum ArgSpec(args=[], varargs='args', keywords='kwargs', defa
paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.logsigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.exp ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.tanh ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.tanh_shrink ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.softshrink ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sqrt ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.abs ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.ceil ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.floor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.cos ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sin ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.round ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.reciprocal ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.square ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.softplus ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.softsign ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.brelu ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.leaky_relu ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.soft_relu ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.elu ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.relu6 ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.pow ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.stanh ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.hard_sigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.swish ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.tanh ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.tanh_shrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.softshrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sqrt ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.abs ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.ceil ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.floor ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.cos ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sin ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.round ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.reciprocal ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.square ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.softplus ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.softsign ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.uniform_random ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=(None, None, None, None))
paddle.fluid.layers.hard_shrink ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.cumsum ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None))
......@@ -340,7 +324,7 @@ paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
......@@ -354,6 +338,7 @@ paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'fi
paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max'))
paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,))
paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0))
paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn', 'use_mkldnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True, False))
paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
......
......@@ -22,6 +22,7 @@
#include "paddle/fluid/framework/details/op_handle_base.h"
#include "paddle/fluid/framework/garbage_collector.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor.h"
namespace paddle {
......@@ -46,17 +47,15 @@ class ReferenceCountOpHandle : public OpHandleBase {
const std::vector<std::string> &var_names,
GarbageCollector<Tensor> *gc,
AtomicReferenceCountMap *ref_cnts)
: OpHandleBase(node),
scope_(scope),
var_names_(var_names),
gc_(gc),
ref_cnts_(ref_cnts) {
: OpHandleBase(node), scope_(scope), gc_(gc), ref_cnts_(ref_cnts) {
dev_ctx_ = static_cast<platform::CUDADeviceContext *>(
platform::DeviceContextPool::Instance().Get(place));
if (IsStreamGarabageCollector()) {
PADDLE_ENFORCE(cudaSetDevice(place.device));
PADDLE_ENFORCE(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
}
for (auto &name : var_names) AddVar(name);
}
~ReferenceCountOpHandle() {
......@@ -69,19 +68,35 @@ class ReferenceCountOpHandle : public OpHandleBase {
std::string Name() const override { return "reference_count"; }
void AddVar(const std::string &name) {
auto it = var_names_.find(name);
if (it != var_names_.end())
++(it->second);
else
var_names_[name] = 1;
}
protected:
void RunImpl() override {
auto *exec_scope = scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
std::vector<LoDTensor *> tensors;
for (auto &name : var_names_) {
std::vector<Tensor *> tensors;
for (auto &pair : var_names_) {
auto &name = pair.first;
auto it = ref_cnts_->find(name);
if (it == ref_cnts_->end()) continue;
auto *var = exec_scope->FindVar(name);
if (var == nullptr || !var->IsType<LoDTensor>()) continue;
if (it->second.fetch_sub(1) <= 1) {
tensors.emplace_back(var->GetMutable<LoDTensor>());
if (var == nullptr) continue;
if (var->IsType<LoDTensor>()) {
if (it->second.fetch_sub(pair.second) <= pair.second) {
tensors.emplace_back(var->GetMutable<LoDTensor>());
}
} else if (var->IsType<SelectedRows>()) {
if (it->second.fetch_sub(pair.second) <= pair.second) {
tensors.emplace_back(
var->GetMutable<SelectedRows>()->mutable_value());
}
}
}
......@@ -91,7 +106,7 @@ class ReferenceCountOpHandle : public OpHandleBase {
}
private:
void ClearTensors(const std::vector<LoDTensor *> &tensors) {
void ClearTensors(const std::vector<Tensor *> &tensors) {
auto *gc = dynamic_cast<StreamGarbageCollector<Tensor> *>(gc_);
if (gc != nullptr) {
auto compute_stream = dev_ctx_->stream();
......@@ -112,7 +127,7 @@ class ReferenceCountOpHandle : public OpHandleBase {
const Scope *scope_;
platform::CUDADeviceContext *dev_ctx_;
std::vector<std::string> var_names_;
std::unordered_map<std::string, int> var_names_;
GarbageCollector<Tensor> *gc_; // not own
AtomicReferenceCountMap *ref_cnts_; // not own
cudaEvent_t event_;
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <queue>
#include <string>
#include <vector>
......@@ -23,6 +24,25 @@ namespace paddle {
namespace framework {
namespace details {
static ComputationOpHandle *FindNextComputationOpHandle(VarHandle *var_in) {
std::queue<VarHandleBase *> queue;
queue.push(var_in);
do {
auto *var = queue.front();
queue.pop();
for (auto *op : var->PendingOps()) {
auto *compute_op = dynamic_cast<ComputationOpHandle *>(op);
if (compute_op != nullptr && compute_op->GetPlace() == var_in->place_) {
return compute_op;
}
for (auto *out_var : op->Outputs()) {
queue.push(out_var);
}
}
} while (!queue.empty());
return nullptr;
}
std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
auto &ref_cnts = Get<DeviceReferenceCountMap>(kGlobalReferenceCount);
......@@ -34,6 +54,9 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
// Step 2: Find all variables in non-computation ops which refers to variables
// in computation ops
std::unordered_set<std::string> names;
std::unordered_map<OpHandleBase *, std::unique_ptr<ReferenceCountOpHandle>>
compute_ref_cnt_map;
auto get_ref_cnts_from_compute_op = [&](
const std::unique_ptr<OpHandleBase> &op,
const std::vector<VarHandleBase *> &vars) {
......@@ -54,15 +77,18 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
VarDesc *var_desc = var_handle->Node()->Var();
auto var_name = var_handle->Node()->Name();
// This is wierd but there is really some variables without var_desc
// This is weird but there is really some variables without var_desc
// in computation_op
if (var_desc == nullptr) {
if (compute_op->Node()->Op()->Block()->FindVar(var_name) == nullptr)
continue;
} else {
if (var_desc->Persistable() ||
var_desc->Proto()->type().type() != proto::VarType::LOD_TENSOR)
if (var_desc->Persistable()) continue;
auto var_type = var_desc->Proto()->type().type();
if (var_type != proto::VarType::LOD_TENSOR &&
var_type != proto::VarType::SELECTED_ROWS) {
continue;
}
}
// compute op only runs in one device
......@@ -93,12 +119,33 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
if (ref_cnts.count(place.device) &&
ref_cnts[place.device]->count(var_name)) {
++(*ref_cnts[place.device])[var_name];
auto *next_compute_op = FindNextComputationOpHandle(var_handle);
if (next_compute_op != nullptr) {
if (compute_ref_cnt_map.count(next_compute_op)) {
compute_ref_cnt_map[next_compute_op]->AddVar(var_name);
VLOG(5) << "Add reference count of " << var_name << " to Operator "
<< next_compute_op->Name();
} else {
// Create new reference_count_op_handle
ir::Node *ref_cnt_node = graph->CreateEmptyNode(
"reference_count", ir::Node::Type::kOperation);
auto *ref_cnt_handle = new ReferenceCountOpHandle(
ref_cnt_node, next_compute_op->GetScope(), place, {var_name},
gcs[place.device].get(), cur_ref_cnts[place.device].get());
if (next_compute_op->Outputs().empty()) {
auto *dep_var = new DummyVarHandle(graph->CreateControlDepVar());
next_compute_op->AddOutput(dep_var);
graph->Get<GraphDepVars>(kGraphDepVars).emplace(dep_var);
}
ref_cnt_handle->AddInput(next_compute_op->Outputs().front());
compute_ref_cnt_map[next_compute_op].reset(ref_cnt_handle);
}
}
}
}
};
std::unordered_map<OpHandleBase *, ReferenceCountOpHandle *>
compute_ref_cnt_map;
auto &all_ops = graph->Get<GraphOps>(kGraphOps);
for (auto &op : all_ops) {
auto in_var_names = get_ref_cnts_from_compute_op(op, op->Inputs());
......@@ -113,11 +160,13 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
auto *ref_cnt_handle = new ReferenceCountOpHandle(
ref_cnt_node, compute_op->GetScope(), place, in_var_names,
gcs[place.device].get(), cur_ref_cnts[place.device].get());
auto *dep_var = new DummyVarHandle(graph->CreateControlDepVar());
compute_op->AddOutput(dep_var);
ref_cnt_handle->AddInput(dep_var);
graph->Get<GraphDepVars>(kGraphDepVars).emplace(dep_var);
compute_ref_cnt_map[compute_op] = ref_cnt_handle;
if (compute_op->Outputs().empty()) {
auto *dep_var = new DummyVarHandle(graph->CreateControlDepVar());
compute_op->AddOutput(dep_var);
graph->Get<GraphDepVars>(kGraphDepVars).emplace(dep_var);
}
ref_cnt_handle->AddInput(compute_op->Outputs().front());
compute_ref_cnt_map[compute_op].reset(ref_cnt_handle);
}
for (auto &op : all_ops) {
......@@ -131,7 +180,11 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
new_all_ops.emplace_back(std::move(op));
auto it = compute_ref_cnt_map.find(new_all_ops.back().get());
if (it != compute_ref_cnt_map.end()) {
new_all_ops.emplace_back(it->second);
// Add LeafNode to ReferenceCountOpHandle
auto *dummy_leaf = new DummyVarHandle(graph->CreateControlDepVar());
graph->Get<GraphDepVars>(kGraphDepVars).emplace(dummy_leaf);
it->second->AddOutput(dummy_leaf);
new_all_ops.emplace_back(std::move(it->second));
}
}
......
......@@ -13,6 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// logging.h and windows.h conflict
#define GLOG_NO_ABBREVIATED_SEVERITIES
// solve static linking error in windows
// https://github.com/google/glog/issues/301
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/framework/tensor.h"
#include "unsupported/Eigen/CXX11/Tensor"
......@@ -46,11 +51,13 @@ struct EigenTensor {
using ConstType =
Eigen::TensorMap<Eigen::Tensor<const T, D, MajorType, IndexType>>;
static Type From(Tensor& tensor, DDim dims) {
static Type From(Tensor& tensor, DDim dims) { // NOLINT
return Type(tensor.data<T>(), EigenDim<D>::From(dims));
}
static Type From(Tensor& tensor) { return From(tensor, tensor.dims_); }
static Type From(Tensor& tensor) { // NOLINT
return From(tensor, tensor.dims_);
} // NOLINT
static ConstType From(const Tensor& tensor, DDim dims) {
return ConstType(tensor.data<T>(), EigenDim<D>::From(dims));
......@@ -64,7 +71,8 @@ struct EigenTensor {
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> {
static typename EigenMatrix::Type Reshape(Tensor& tensor, int num_col_dims) {
static typename EigenMatrix::Type Reshape(Tensor& tensor, // NOLINT
int num_col_dims) {
int rank = tensor.dims_.size();
PADDLE_ENFORCE(num_col_dims > 0 && num_col_dims < rank,
"`num_col_dims` must be between (0, rank_of_tensor).");
......@@ -86,11 +94,12 @@ template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
// Flatten reshapes a Tensor into an EigenVector.
static typename EigenVector::Type Flatten(Tensor& tensor) {
static typename EigenVector::Type Flatten(Tensor& tensor) { // NOLINT
return EigenVector::From(tensor, {product(tensor.dims_)});
}
static typename EigenVector::ConstType Flatten(const Tensor& tensor) {
static typename EigenVector::ConstType Flatten(
const Tensor& tensor) { // NOLINT
return EigenVector::From(tensor, {product(tensor.dims_)});
}
};
......@@ -104,7 +113,7 @@ struct EigenScalar {
using ConstType = Eigen::TensorMap<
Eigen::TensorFixedSize<const T, Eigen::Sizes<>, MajorType, IndexType>>;
static Type From(Tensor& tensor) { return Type(tensor.data<T>()); }
static Type From(Tensor& tensor) { return Type(tensor.data<T>()); } // NOLINT
static ConstType From(const Tensor& tensor) {
return ConstType(tensor.data<T>());
......
......@@ -54,6 +54,10 @@ class CompileTimeInferShapeContext : public InferShapeContext {
size_t j = 0) const override {
PADDLE_ENFORCE_LT(i, Inputs(in).size());
PADDLE_ENFORCE_LT(j, Outputs(out).size());
PADDLE_ENFORCE(Inputs(in)[i] != framework::kEmptyVarName,
"The %s[%d] is @EMPTY@", in, i);
PADDLE_ENFORCE(Outputs(out)[j] != framework::kEmptyVarName,
"The %s[%d] is @EMPTY@", out, j);
auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
if (in_var->GetType() != proto::VarType::LOD_TENSOR) {
......@@ -63,6 +67,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
PADDLE_ENFORCE_EQ(in_var->GetType(), proto::VarType::LOD_TENSOR,
"The %d-th output of Output(%s) must be LoDTensor.", j,
out);
out_var->SetLoDLevel(in_var->GetLoDLevel());
}
......
......@@ -23,6 +23,11 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#if defined(_WIN32)
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#endif
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
......@@ -241,22 +246,20 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
* we will use and tell the compiler to
* link them into target.
*/
#define USE_OP_ITSELF(op_type) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_itself_##op_type, \
"USE_OP_ITSELF must be called in global namespace"); \
extern int TouchOpRegistrar_##op_type(); \
static int use_op_itself_##op_type##_ __attribute__((unused)) = \
TouchOpRegistrar_##op_type()
#define USE_OP_ITSELF(op_type) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_itself_##op_type, \
"USE_OP_ITSELF must be called in global namespace"); \
extern int TouchOpRegistrar_##op_type(); \
UNUSED static int use_op_itself_##op_type##_ = TouchOpRegistrar_##op_type()
#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_kernel_##op_type##_##LIBRARY_TYPE##__, \
"USE_OP_DEVICE_KERNEL must be in global namespace"); \
extern int TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE(); \
static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_ \
__attribute__((unused)) = \
TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE()
UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_ = \
TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE()
// TODO(fengjiayi): The following macros
// seems ugly, do we have better method?
......
......@@ -11,6 +11,9 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
......
......@@ -20,6 +20,8 @@ limitations under the License. */
#include <tuple>
#include <unordered_map>
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "glog/logging.h" // For VLOG
#include "paddle/fluid/framework/attribute.h"
......
......@@ -46,6 +46,16 @@ std::vector<DDim> InferShapeContext::GetReaderDims(
return this->GetRepeatedDims(arg_names[0]);
}
void InferShapeContext::ShareLoDs(const std::string &in,
const std::string &out) const {
PADDLE_ENFORCE_EQ(Inputs(in).size(), Outputs(out).size(),
"The number of arguments in %s and %s is not equal.", in,
out);
for (size_t i = 0; i < in.size(); ++i) {
ShareLoD(in, out, i, i);
}
}
DDim InferShapeContext::GetInputsElementDim(const std::string &name,
int idx) const {
const std::vector<std::string> &names = Inputs(name);
......
......@@ -56,6 +56,8 @@ class InferShapeContext {
virtual const std::vector<std::string> &Outputs(
const std::string &name) const = 0;
void ShareLoDs(const std::string &in, const std::string &out) const;
virtual void ShareLoD(const std::string &in, const std::string &out,
size_t i = 0, size_t j = 0) const = 0;
......
......@@ -71,7 +71,7 @@ bool AnalysisPredictor::Init(
inference_program_ = paddle::inference::Load(
executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
} else {
LOG(ERROR) << "fail to load inference model.";
LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
return false;
}
......@@ -109,8 +109,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
argument_.origin_program_desc.reset(
new ProgramDesc(*inference_program_->Proto()));
PADDLE_ENFORCE(config_.ir_mode == AnalysisConfig::IrPassMode::kExclude,
"Only kExclude is supported yet.");
PADDLE_ENFORCE(
config_.ir_mode == contrib::AnalysisConfig::IrPassMode::kExclude,
"Only kExclude is supported yet.");
Analyzer().DisableIrPasses(config_.ir_passes).Run(&argument_);
CHECK(argument_.transformed_program_desc);
......@@ -126,8 +127,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig& config) {
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<contrib::AnalysisConfig, PaddleEngineKind::kAnalysis>(
const contrib::AnalysisConfig& config) {
VLOG(3) << "create AnalysisConfig";
if (config.use_gpu) {
// 1. GPU memeroy
......@@ -154,4 +156,11 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
return predictor;
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<contrib::AnalysisConfig>(
const contrib::AnalysisConfig& config) {
return CreatePaddlePredictor<contrib::AnalysisConfig,
PaddleEngineKind::kAnalysis>(config);
}
} // namespace paddle
......@@ -30,7 +30,7 @@ using framework::proto::ProgramDesc;
*/
class AnalysisPredictor : public NativePaddlePredictor {
public:
explicit AnalysisPredictor(const AnalysisConfig& config)
explicit AnalysisPredictor(const contrib::AnalysisConfig& config)
: NativePaddlePredictor(config), config_(config) {}
bool Init(const std::shared_ptr<framework::Scope>& parent_scope);
......@@ -46,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor {
Argument& analysis_argument() { return argument_; }
private:
AnalysisConfig config_;
contrib::AnalysisConfig config_;
Argument argument_;
};
......
......@@ -31,21 +31,24 @@
namespace paddle {
using paddle::contrib::AnakinConfig;
template <typename Target>
PaddleInferenceAnakinPredictor<Target>::PaddleInferenceAnakinPredictor(
const AnakinConfig &config) {
const contrib::AnakinConfig &config) {
CHECK(Init(config));
}
template <>
PaddleInferenceAnakinPredictor<anakin::X86>::PaddleInferenceAnakinPredictor(
const AnakinConfig &config) {
const contrib::AnakinConfig &config) {
omp_set_dynamic(0);
omp_set_num_threads(1);
mkl_set_num_threads(1);
CHECK(Init(config));
}
template <typename Target>
bool PaddleInferenceAnakinPredictor<Target>::Init(const AnakinConfig &config) {
bool PaddleInferenceAnakinPredictor<Target>::Init(
const contrib::AnakinConfig &config) {
if (!(graph_.load(config.model_file))) {
VLOG(3) << "fail to load graph from " << config.model_file;
return false;
......@@ -200,10 +203,11 @@ template class PaddleInferenceAnakinPredictor<anakin::X86>;
// A factory to help create difference predictor.
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) {
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
const contrib::AnakinConfig &config) {
VLOG(3) << "Anakin Predictor create.";
if (config.target_type == AnakinConfig::NVGPU) {
if (config.target_type == contrib::AnakinConfig::NVGPU) {
#ifdef PADDLE_WITH_CUDA
VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ].";
std::unique_ptr<PaddlePredictor> x(
......@@ -213,7 +217,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
LOG(ERROR) << "AnakinConfig::NVGPU could not used in ONLY-CPU environment";
return nullptr;
#endif
} else if (config.target_type == AnakinConfig::X86) {
} else if (config.target_type == contrib::AnakinConfig::X86) {
VLOG(3) << "Anakin Predictor create on [ Intel X86 ].";
std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor<anakin::X86>(config));
......
......@@ -29,6 +29,8 @@ limitations under the License. */
namespace paddle {
using contrib::AnakinConfig;
template <typename Target>
class PaddleInferenceAnakinPredictor : public PaddlePredictor {
public:
......
......@@ -101,14 +101,11 @@ bool NativePaddlePredictor::Init(
inference_program_ = paddle::inference::Load(
executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
} else {
LOG(ERROR) << "fail to load inference model.";
LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
return false;
}
ctx_ = executor_->Prepare(*inference_program_, 0);
if (config_._use_mkldnn) {
executor_->EnableMKLDNN(*inference_program_);
}
executor_->CreateVariables(*inference_program_,
sub_scope_ ? sub_scope_ : scope_.get(), 0);
......@@ -330,4 +327,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
#endif
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
const NativeConfig &config) {
return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}
} // namespace paddle
......@@ -14,6 +14,12 @@
#pragma once
// logging.h and windows.h conflict
#define GLOG_NO_ABBREVIATED_SEVERITIES
// solve static linking error in windows
// https://github.com/google/glog/issues/301
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include <map>
#include <memory>
......
......@@ -25,10 +25,11 @@ using inference::analysis::Argument;
using inference::Singleton;
using inference::analysis::Analyzer;
using framework::proto::ProgramDesc;
using paddle::contrib::MixedRTConfig;
class TensorRTSubgraphPredictor : public NativePaddlePredictor {
public:
explicit TensorRTSubgraphPredictor(const TensorRTConfig& config)
explicit TensorRTSubgraphPredictor(const MixedRTConfig& config)
: NativePaddlePredictor(config), config_(config) {}
bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
......@@ -115,13 +116,13 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
}
private:
TensorRTConfig config_;
MixedRTConfig config_;
};
template <>
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
const TensorRTConfig& config) {
CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
const MixedRTConfig& config) {
VLOG(3) << "create TensorRTSubgraphPredictor";
if (config.use_gpu) {
// 1. GPU memeroy
......@@ -150,6 +151,13 @@ CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
return std::move(predictor);
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<MixedRTConfig>(
const MixedRTConfig& config) {
return CreatePaddlePredictor<MixedRTConfig,
PaddleEngineKind::kAutoMixedTensorRT>(config);
}
} // namespace paddle
USE_TRT_CONVERTER(elementwise_add_weight);
......
......@@ -20,6 +20,8 @@
namespace paddle {
using contrib::MixedRTConfig;
DEFINE_string(dirname, "", "Directory of the inference model.");
void CompareTensorRTWithFluid(bool enable_tensorrt) {
......@@ -32,7 +34,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) {
config0.fraction_of_gpu_memory = 0.3;
config0.device = 0;
TensorRTConfig config1;
MixedRTConfig config1;
config1.model_dir = FLAGS_dirname + "word2vec.inference.model";
config1.use_gpu = true;
config1.fraction_of_gpu_memory = 0.3;
......@@ -42,7 +44,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) {
auto predictor0 =
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config0);
auto predictor1 =
CreatePaddlePredictor<TensorRTConfig,
CreatePaddlePredictor<MixedRTConfig,
PaddleEngineKind::kAutoMixedTensorRT>(config1);
for (int batch_id = 0; batch_id < 1; batch_id++) {
......
cmake_minimum_required(VERSION 3.0)
project(cpp_inference_demo CXX C)
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON)
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF)
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON)
macro(safe_set_static_flag)
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/MD")
endforeach(flag_var)
endmacro()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if (WIN32)
set(CMAKE_STATIC_LIBRARY_PREFIX "lib")
if (WITH_STATIC_LIB)
safe_set_static_flag()
add_definitions(-DSTATIC_LIB)
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "/w")
set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} "/w")
endif()
set(CMAKE_STATIC_LIBRARY_PREFIX "lib")
else()
set(CMAKE_STATIC_LIBRARY_PREFIX "")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_STATIC_LIBRARY_PREFIX "")
endif()
message("flags" ${CMAKE_CXX_FLAGS})
if(NOT DEFINED PADDLE_LIB)
message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib")
......@@ -16,14 +35,18 @@ if(NOT DEFINED DEMO_NAME)
message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name")
endif()
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON)
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF)
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON)
if(WITH_GPU)
set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library")
if(NOT WIN32)
set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library")
else()
if(CUDA_LIB STREQUAL "")
set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64")
endif()
endif(NOT WIN32)
endif()
include_directories("D:/Paddle/")
include_directories("${PADDLE_LIB}")
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
include_directories("${PADDLE_LIB}/third_party/install/glog/include")
......@@ -83,10 +106,18 @@ set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
${CMAKE_STATIC_LIBRARY_PREFIX}glog ${CMAKE_STATIC_LIBRARY_PREFIX}gflags ${CMAKE_STATIC_LIBRARY_PREFIX}protobuf
${EXTERNAL_LIB})
# NOTE(dzhwinter) shlwapi is deprecated.
set(DEPS ${DEPS} libcmt shlwapi)
endif(NOT WIN32)
if(WITH_GPU)
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
if(NOT WIN32)
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
else()
set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} )
endif()
endif()
target_link_libraries(${DEMO_NAME} ${DEPS})
......@@ -18,6 +18,8 @@ limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <algorithm>
#include <memory>
#include <thread> //NOLINT
#include "paddle/fluid/inference/paddle_inference_api.h"
......@@ -67,7 +69,8 @@ void Main(bool use_gpu) {
0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements);
i++) {
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
result[i]);
}
......@@ -113,7 +116,8 @@ void MainThreads(int num_threads, bool use_gpu) {
const size_t num_elements =
outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements);
i++) {
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
result[i]);
}
......
# windows inference
本文介绍windows inference,目前只提供了静态编译,编译出paddle_fluid.lib,包含了除openblas.dll之外的所有第三方依赖库。
1. 下载最新的paddle_fluid.lib和openblas.dll,并把它们放在同一个目录下。
2. 准备预训练好的模型文件,例如models中的模型,可以将模型用safe_inference_model接口保存下来。将模型文件放到该目录下
3. 进入Paddle/paddle/fluid/inference/api/demo_ci目录,新建build目录,然后使用cmake生成vs2015的solution文件。
其中PADDLE_LIB是前面的paddle_fluid.lib对应文件夹, CUDA_LIB指定为x64格式下的cuda系统库目录文件夹。
```shell
cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=ON -DWITH_MKL=OFF -DWITH_STATIC_LIB=ON -DCMAKE_BUILD_TYPE=Release -DDEMO_NAME=inference_icnet -DPADDLE_LIB=D:\to_the_paddle_fluid.lib -DCUDA_LIB=D:\tools\v8.0\lib\x64
```
然后用vs2015打开对应的项目文件,注意使用静态链接 "/MT",生成对应的exe。将openblas.dll放到exe所在目录。
4. 该exe即为项目生成文件,可绑定运行。
## FAQ
1. cmake需要您手动下载,并添加到系统路径里
2. 路径中的不要包含空格,例如发现CUDA_LIB路径是Program Files(x86)可能会出错。可以将CUDA拷贝到一个新位置。
......@@ -28,34 +28,61 @@ limitations under the License. */
namespace paddle {
// Data type.
enum PaddleDType {
FLOAT32,
INT64,
// TODO(Superjomn) support more data types if needed.
};
/*
* Memory menage for PaddleTensor.
* The PaddleBuf holds a buffer for data input or output. The memory can be
* allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
* should be reused for better performance.
*
* For user allocated memory, the following API can be used:
* - PaddleBuf(void* data, size_t length) to set an external memory by
* specifying
* the memory address and length.
* - Reset(void* data, size_t length) to reset the PaddleBuf with an external
* memory.
* ATTENTION, for user allocated memory, deallocation should be done by users
* externally after the program finished. The PaddleBuf won't do any allocation
* or deallocation.
*
* To have the PaddleBuf allocate and manage the memory:
* - PaddleBuf(size_t length) will allocate a memory of size `length`.
* - Resize(size_t length) resize the memory to no less than `length`, ATTENTION
* if the allocated memory is larger than `length`, nothing will done.
*/
class PaddleBuf {
public:
PaddleBuf() = default;
PaddleBuf(PaddleBuf&& other);
// Copy only available when memory is managed externally.
explicit PaddleBuf(const PaddleBuf&);
PaddleBuf& operator=(const PaddleBuf&);
PaddleBuf& operator=(PaddleBuf&&);
// Do not own the memory.
PaddleBuf(void* data, size_t length)
: data_(data), length_(length), memory_owned_{false} {}
// Own memory.
// PaddleBuf allocate memory internally, and manage it.
explicit PaddleBuf(size_t length)
: data_(new char[length]), length_(length), memory_owned_(true) {}
// Resize to `length` bytes.
// Set external memory, the PaddleBuf won't manage it.
PaddleBuf(void* data, size_t length)
: data_(data), length_(length), memory_owned_{false} {}
// Copy only available when memory is managed externally.
explicit PaddleBuf(const PaddleBuf&);
// Resize the memory.
void Resize(size_t length);
// Reset to external memory.
// Reset to external memory, with address and length set.
void Reset(void* data, size_t length);
// Tell whether the buffer is empty.
bool empty() const { return length_ == 0; }
// Get the memory address.
void* data() const { return data_; }
// Get the memory length.
size_t length() const { return length_; }
~PaddleBuf() { Free(); }
PaddleBuf& operator=(const PaddleBuf&);
PaddleBuf& operator=(PaddleBuf&&);
PaddleBuf() = default;
PaddleBuf(PaddleBuf&& other);
private:
void Free();
......@@ -64,6 +91,7 @@ class PaddleBuf {
bool memory_owned_{true};
};
// Basic input and output data structure for PaddlePredictor.
struct PaddleTensor {
PaddleTensor() = default;
std::string name; // variable name.
......@@ -73,19 +101,8 @@ struct PaddleTensor {
std::vector<std::vector<size_t>> lod; // Tensor+LoD equals LoDTensor
};
enum class PaddleEngineKind {
kNative = 0, // Use the native Fluid facility.
kAnakin, // Use Anakin for inference.
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
kAnalysis
// TODO(Superjomn) support following engines latter.
// kTensorRT, // Use TensorRT for inference.
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
};
/*
* A simple Inference API for Paddle. Currently this API can be used by
* non-sequence scenerios.
* A simple Inference API for Paddle.
*/
class PaddlePredictor {
public:
......@@ -120,26 +137,53 @@ struct NativeConfig : public PaddlePredictor::Config {
// GPU related fields.
bool use_gpu{false};
int device{0};
float fraction_of_gpu_memory{-1.f}; // Negative to notify initialization.
// NOTE: NOT use it, just for the internal test, will discard later
bool _use_mkldnn{false};
// Specify the variable's name of each input.
bool specify_input_name{false};
float fraction_of_gpu_memory{-1.f}; // Change to a float in (0,1] if needed.
// Specify the exact path of program and parameter files.
std::string prog_file;
std::string param_file;
// Specify the variable's name of each input if input tensors don't follow the
// `feeds` and `fetches` of the phase `save_inference_model`.
bool specify_input_name{false};
};
// Configurations for Anakin engine.
struct AnakinConfig : public PaddlePredictor::Config {
enum TargetType { NVGPU = 0, X86 };
int device;
std::string model_file;
int max_batch_size{-1};
TargetType target_type;
// A factory to help create different predictors.
//
// Usage:
//
// NativeConfig config;
// ... // change the configs.
// auto native_predictor = CreatePaddlePredictor(config);
//
// FOR EXTENSION DEVELOPER:
// Different predictors are designated by config type. Similar configs can be
// merged, but there shouldn't be a huge config containing different fields for
// more than one kind of predictors.
template <typename ConfigT>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
// NOTE The following APIs are too trivial, we will discard it in the following
// versions.
enum class PaddleEngineKind {
kNative = 0, // Use the native Fluid facility.
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
kAnalysis, // More optimization.
kAnakin // Use Anakin for inference, not mature yet.
};
struct TensorRTConfig : public NativeConfig {
template <typename ConfigT, PaddleEngineKind engine>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
// ==
//
// -----------------------------------------------------------------------------------
// NOTE: The following APIs are not mature yet, we are still working on them.
namespace contrib {
// Accelerate GPU computation with TensorRT engine.
struct MixedRTConfig : public NativeConfig {
// Determine whether a subgraph will be executed by TRT.
int min_subgraph_size{1};
// While TensorRT allows an engine optimized for a given max batch size
......@@ -154,7 +198,6 @@ struct TensorRTConfig : public NativeConfig {
// NOTE WIP, not stable yet.
struct AnalysisConfig : public NativeConfig {
//
enum class IrPassMode {
kSystem, // Use system default passes, not customize.
kInclude, // Specify the passes in `ir_passes`.
......@@ -165,18 +208,21 @@ struct AnalysisConfig : public NativeConfig {
IrPassMode ir_mode{IrPassMode::kExclude};
// attention lstm fuse works only on some specific models, disable as default.
std::vector<std::string> ir_passes{"attention_lstm_fuse_pass"};
// NOTE this is just for internal development, please not use it.
bool _use_mkldnn{false};
};
// A factory to help create different predictors.
//
// FOR EXTENSION DEVELOPER:
// Different predictors are designated by config type and engine kind. Similar
// configs can be merged, but there shouldn't be a huge config containing
// different fields for more than one kind of predictors.
//
// Similarly, each engine kind should map to a unique predictor implementation.
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
// Configurations for Anakin engine.
struct AnakinConfig : public PaddlePredictor::Config {
enum TargetType { NVGPU = 0, X86 };
int device;
std::string model_file;
int max_batch_size{-1};
TargetType target_type;
};
} // namespace contrib
int PaddleDtypeSize(PaddleDType dtype);
......
......@@ -22,10 +22,10 @@ DEFINE_string(model, "", "Directory of the inference model(mobile_v2).");
namespace paddle {
AnakinConfig GetConfig() {
AnakinConfig config;
contrib::AnakinConfig GetConfig() {
contrib::AnakinConfig config;
// using AnakinConfig::X86 if you need to use cpu to do inference
config.target_type = AnakinConfig::NVGPU;
config.target_type = contrib::AnakinConfig::NVGPU;
config.model_file = FLAGS_model;
config.device = 0;
config.max_batch_size = 1;
......@@ -33,9 +33,10 @@ AnakinConfig GetConfig() {
}
TEST(inference, anakin) {
AnakinConfig config = GetConfig();
auto config = GetConfig();
auto predictor =
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config);
CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
config);
float data[1 * 3 * 224 * 224] = {1.0f};
PaddleTensor tensor;
......
......@@ -97,10 +97,10 @@ void Data::get_batch_data(
namespace paddle {
AnakinConfig GetConfig() {
AnakinConfig config;
contrib::AnakinConfig GetConfig() {
contrib::AnakinConfig config;
// using AnakinConfig::X86 if you need to use cpu to do inference
config.target_type = AnakinConfig::X86;
config.target_type = contrib::AnakinConfig::X86;
config.model_file = FLAGS_model;
config.device = 0;
config.max_batch_size = 1000; // the max number of token
......@@ -121,9 +121,10 @@ void set_tensor(std::string name, std::vector<int> shape,
}
void single_test() {
AnakinConfig config = GetConfig();
auto config = GetConfig();
auto predictor =
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config);
CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
config);
int max_batch_size = 1000;
std::string feature_file = FLAGS_datapath;
......
......@@ -95,7 +95,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void SetConfig(AnalysisConfig *cfg) {
void SetConfig(contrib::AnalysisConfig *cfg) {
cfg->prog_file = FLAGS_infer_model + "/__model__";
cfg->param_file = FLAGS_infer_model + "/param";
cfg->use_gpu = false;
......@@ -117,7 +117,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
// Easy for profiling independently.
TEST(Analyzer_Chinese_ner, profile) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
......@@ -141,7 +141,7 @@ TEST(Analyzer_Chinese_ner, profile) {
// Check the fuse status
TEST(Analyzer_Chinese_ner, fuse_statis) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
......@@ -155,7 +155,7 @@ TEST(Analyzer_Chinese_ner, fuse_statis) {
// Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_Chinese_ner, compare) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
......
......@@ -149,7 +149,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void SetConfig(AnalysisConfig *cfg) {
void SetConfig(contrib::AnalysisConfig *cfg) {
cfg->prog_file = FLAGS_infer_model + "/__model__";
cfg->param_file = FLAGS_infer_model + "/param";
cfg->use_gpu = false;
......@@ -172,7 +172,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
// Easy for profiling independently.
TEST(Analyzer_rnn1, profile) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
......@@ -183,7 +183,7 @@ TEST(Analyzer_rnn1, profile) {
// Check the fuse status
TEST(Analyzer_rnn1, fuse_statis) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
......@@ -198,7 +198,7 @@ TEST(Analyzer_rnn1, fuse_statis) {
// Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_rnn1, compare) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
......@@ -208,7 +208,7 @@ TEST(Analyzer_rnn1, compare) {
// Test Multi-Thread.
TEST(Analyzer_rnn1, multi_thread) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
......
......@@ -38,6 +38,8 @@ DEFINE_bool(use_analysis, true,
namespace paddle {
namespace inference {
using contrib::AnalysisConfig;
void CompareResult(const std::vector<PaddleTensor> &outputs,
const std::vector<PaddleTensor> &ref_outputs) {
EXPECT_GT(outputs.size(), 0UL);
......@@ -74,11 +76,11 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
}
}
std::unique_ptr<PaddlePredictor> GetPrediction(AnalysisConfig config,
bool use_analysis = true) {
std::unique_ptr<PaddlePredictor> CreateTestPredictor(
const AnalysisConfig &config, bool use_analysis = true) {
if (use_analysis) {
return CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
return CreatePaddlePredictor<contrib::AnalysisConfig,
PaddleEngineKind::kAnalysis>(config);
} else {
return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
config);
......@@ -92,7 +94,7 @@ size_t GetSize(const PaddleTensor &out) {
std::unordered_map<std::string, int> GetFuseStatis(AnalysisConfig config,
int *num_ops) {
auto predictor = GetPrediction(config);
auto predictor = CreateTestPredictor(config);
AnalysisPredictor *analysis_predictor =
dynamic_cast<AnalysisPredictor *>(predictor.get());
auto &fuse_statis = analysis_predictor->analysis_argument()
......@@ -113,11 +115,12 @@ std::unordered_map<std::string, int> GetFuseStatis(AnalysisConfig config,
}
void TestOneThreadPrediction(
AnalysisConfig config, const std::vector<std::vector<PaddleTensor>> inputs,
const AnalysisConfig &config,
const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, bool use_analysis = true) {
int batch_size = FLAGS_batch_size;
int num_times = FLAGS_repeat;
auto predictor = GetPrediction(config, use_analysis);
auto predictor = CreateTestPredictor(config, use_analysis);
Timer timer;
timer.tic();
for (int i = 0; i < num_times; i++) {
......@@ -130,7 +133,8 @@ void TestOneThreadPrediction(
}
void TestMultiThreadPrediction(
AnalysisConfig config, const std::vector<std::vector<PaddleTensor>> inputs,
const AnalysisConfig &config,
const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, int num_threads,
bool use_analysis = true) {
int batch_size = FLAGS_batch_size;
......@@ -140,7 +144,7 @@ void TestMultiThreadPrediction(
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
for (int tid = 0; tid < num_threads; ++tid) {
predictors.emplace_back(GetPrediction(config, use_analysis));
predictors.emplace_back(CreateTestPredictor(config, use_analysis));
}
for (int tid = 0; tid < num_threads; ++tid) {
threads.emplace_back([&, tid]() {
......@@ -164,8 +168,8 @@ void TestMultiThreadPrediction(
}
}
void TestPrediction(AnalysisConfig config,
const std::vector<std::vector<PaddleTensor>> inputs,
void TestPrediction(const AnalysisConfig &config,
const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<PaddleTensor> *outputs, int num_threads,
bool use_analysis = FLAGS_use_analysis) {
LOG(INFO) << "use_analysis: " << use_analysis;
......@@ -178,8 +182,8 @@ void TestPrediction(AnalysisConfig config,
}
void CompareNativeAndAnalysis(
AnalysisConfig config,
const std::vector<std::vector<PaddleTensor>> inputs) {
const AnalysisConfig &config,
const std::vector<std::vector<PaddleTensor>> &inputs) {
std::vector<PaddleTensor> native_outputs, analysis_outputs;
TestOneThreadPrediction(config, inputs, &native_outputs, false);
TestOneThreadPrediction(config, inputs, &analysis_outputs, true);
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <math.h> // for sqrt in CPU and CUDA
#include <Eigen/Dense>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
......@@ -306,26 +307,43 @@ class AdamOpKernel : public framework::OpKernel<T> {
VLOG(3) << "grad row size is 0!!";
return;
}
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
scatter::MergeAdd<DeviceContext, T> merge_func;
auto& grad_merge = *(ctx.scope()
.NewScope()
.Var("sparse_adam_grad_merge")
->GetMutable<framework::SelectedRows>());
merge_func(ctx.template device_context<DeviceContext>(), grad,
&grad_merge);
std::vector<int64_t> cpu_rows(grad.rows().begin(), grad.rows().end());
bool is_strict_sorted = true;
for (size_t i = 1; i < cpu_rows.size(); ++i) {
if (cpu_rows[i - 1] >= cpu_rows[i]) {
is_strict_sorted = false;
break;
}
}
const framework::SelectedRows* grad_merge_ptr;
if (is_strict_sorted) {
grad_merge_ptr = &grad;
} else {
// merge duplicated rows if any.
// The rows of grad_merge have been sorted inside MergeAdd functor
scatter::MergeAdd<DeviceContext, T> merge_func;
auto* grad_merge_var = const_cast<framework::Scope&>(ctx.scope())
.Var()
->GetMutable<framework::SelectedRows>();
merge_func(ctx.template device_context<DeviceContext>(), grad,
grad_merge_var);
grad_merge_ptr = grad_merge_var;
}
auto& grad_merge = *grad_merge_ptr;
auto& grad_tensor = grad_merge.value();
const T* grad_data = grad_tensor.template data<T>();
int64_t* rows = nullptr;
// When compiled without CUDA, the CUDAMutableData() interface should not be
const int64_t* rows = nullptr;
// When compiled without CUDA, the CUDAData() interface should not be
// provided.
#if defined(PADDLE_WITH_CUDA)
if (platform::is_gpu_place(ctx.GetPlace())) {
rows = grad_merge.mutable_rows()->CUDAMutableData(ctx.GetPlace());
rows = grad_merge.rows().CUDAData(ctx.GetPlace());
} else {
#endif
rows = grad_merge.mutable_rows()->data();
rows = grad_merge.rows().data();
#if defined(PADDLE_WITH_CUDA)
}
......
......@@ -94,8 +94,20 @@ class ConcatOpGrad : public framework::OperatorWithKernel {
: OperatorWithKernel(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const override {
ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X"));
ctx->ShareLoD("X", framework::GradVarName("X"));
auto in_x = "X";
auto out_x_g_n = framework::GradVarName(in_x);
ctx->SetOutputsDim(out_x_g_n, ctx->GetInputsDim(in_x));
auto &in_names = ctx->Inputs(in_x);
auto &out_names = ctx->Outputs(out_x_g_n);
PADDLE_ENFORCE_EQ(
in_names.size(), out_names.size(),
"The number of arguments in %s[%d] and %s[%d] is not equal.", in_x,
in_names.size(), out_x_g_n, out_names.size());
for (size_t i = 0; i < in_names.size(); ++i) {
if (out_names[i] != framework::kEmptyVarName) {
ctx->ShareLoD(in_x, out_x_g_n, i, i);
}
}
}
};
......
......@@ -23,8 +23,6 @@ namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
static constexpr int kROISize = 4;
template <typename T>
bool GT_E(T a, T b) {
return (a > b) || fabs(a - b) < 1e-4;
......
......@@ -46,9 +46,15 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment(R"DOC(
**Scale operator**
Multiply the input tensor with a float scalar to scale the input tensor.
Apply scaling and bias addition to the input tensor.
$$Out = scale*X$$
if bias_after_scale=True:
$$Out = scale*X + bias$$
else:
$$Out = scale*(X + bias)$$
)DOC");
AddAttr<float>("scale", "The scaling factor of the scale operator.")
.SetDefault(1.0);
......
......@@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/platform/cudnn_helper.h"
#include <gtest/gtest.h>
......
......@@ -16,6 +16,9 @@ limitations under the License. */
#include <string>
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "gflags/gflags.h"
#include "glog/logging.h"
......
......@@ -19,17 +19,8 @@ from .framework import *
# import all class inside executor into fluid module
from . import executor
from .executor import *
from . import trainer
from .trainer import Trainer
from .trainer import BeginEpochEvent
from .trainer import EndEpochEvent
from .trainer import BeginStepEvent
from .trainer import EndStepEvent
from .trainer import CheckpointConfig
from . import inferencer
from .inferencer import Inferencer
from . import io
from . import evaluator
......
......@@ -280,7 +280,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
group_scale_name = self.group_name + "_scale"
if group_scale_name not in self.context:
group_norm_var = layers.sums(input=self.context[self.group_name])
layers.sqrt(x=group_norm_var, out=group_norm_var)
group_norm_var = layers.sqrt(x=group_norm_var)
clip_var = self.context[self.group_name + "_clip"]
group_scale_var = layers.elementwise_div(
x=clip_var,
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
from .. import core
from .. import executor
from .. import framework
from .. import io
from .. import parallel_executor
from .. import unique_name
from .trainer import check_and_get_place
__all__ = ['Inferencer', ]
class Inferencer(object):
"""
Inferencer High Level API.
Args:
infer_func (Python func): Infer function that will return predict Variable
param_path (str): The path where the inference model is saved by fluid.io.save_params
place (Place): place to do the inference
parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
Examples:
.. code-block:: python
def inference_program():
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
return y_predict
place = fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path="/tmp/model", place=place)
"""
def __init__(self, infer_func, param_path, place=None, parallel=False):
self.param_path = param_path
self.scope = core.Scope()
self.parallel = parallel
self.place = check_and_get_place(place)
self.inference_program = framework.Program()
with framework.program_guard(self.inference_program):
with unique_name.guard():
self.predict_var = infer_func()
with self._prog_and_scope_guard():
# load params from param_path into scope
io.load_params(executor.Executor(self.place), param_path)
if parallel:
with self._prog_and_scope_guard():
self.exe = parallel_executor.ParallelExecutor(
use_cuda=isinstance(self.place, core.CUDAPlace),
loss_name=self.predict_var.name)
else:
self.exe = executor.Executor(self.place)
self.inference_program = self.inference_program.clone(for_test=True)
def infer(self, inputs, return_numpy=True):
"""
Do Inference for Inputs
Args:
inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
return_numpy (bool): transform return value into numpy or not
Returns:
Tensor or Numpy: the predict value of the inference model for the inputs
Examples:
.. code-block:: python
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
results = inferencer.infer({'x': tensor_x})
"""
if not isinstance(inputs, dict):
raise ValueError(
"inputs should be a map of {'input_name': input_var}")
with self._prog_and_scope_guard():
results = self.exe.run(feed=inputs,
fetch_list=[self.predict_var.name],
return_numpy=return_numpy)
return results
@contextlib.contextmanager
def _prog_and_scope_guard(self):
with framework.program_guard(main_program=self.inference_program):
with executor.scope_guard(self.scope):
yield
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
import os
import errno
import shutil
import six
import time
from .. import core
from .. import data_feeder
from .. import executor
from .. import framework
from .. import io
# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
from .. import optimizer as opt_module
from .. import parallel_executor
from ..transpiler import distribute_transpiler
__all__ = [
'Trainer', 'BeginEpochEvent', 'EndEpochEvent', 'BeginStepEvent',
'EndStepEvent', 'CheckpointConfig'
]
class BeginEpochEvent(object):
"""
The begin of a training epoch.
Args:
epoch_id(int): The current epoch ID.
"""
def __init__(self, epoch_id):
self.epoch = epoch_id
class EndEpochEvent(object):
"""
The end of a training epoch.
Args:
epoch_id(int): The current epoch ID.
"""
def __init__(self, epoch_id):
self.epoch = epoch_id
class BeginStepEvent(object):
"""
The begin of a training epoch.
Args:
epoch_id(int): The current epoch ID.
step_id(int): The current step ID.
"""
def __init__(self, epoch_id, step_id):
self.epoch = epoch_id
self.step = step_id
self.fetch_metrics = True
"""
If fetch_metrics is true, the metrics will be fetched at the
EndStepEvent. Default is True.
"""
class EndStepEvent(object):
"""
The end of a training step.
Args:
epoch_id(int): The current epoch ID.
step_id(int): The current step ID.
metrics(list): A list of fetched tensor. The order of this list is same
as the :code:`train_func` returns.
"""
def __init__(self, epoch_id, step_id, metrics):
self.epoch = epoch_id
self.step = step_id
self.metrics = metrics
class CheckpointConfig(object):
"""
Parameter object for :code:`save_checkpoint` and
:code:`fluid.Trainer`. Used to configuration how to save checkpoint.
Args:
checkpoint_dir(str): Directory path to save check point. Default is the
current directory.
max_num_checkpoints(int): The max number of local check points.
epoch_interval(int): Every number of epoch to save check point.
step_interval(int): Every number of step to save check point.
Examples:
>>> config = fluid.CheckpointConfig("./checkpoints")
>>> trainer = fluid.Trainer(train_func=train_program,
>>> place=place,
>>> optimizer_func=optimizer_func,
>>> checkpoint_config=config)
>>> trainer.train(...)
"""
def __init__(self,
checkpoint_dir=None,
max_num_checkpoints=3,
epoch_interval=1,
step_interval=10):
assert epoch_interval >= 1
assert step_interval >= 1
self.checkpoint_dir = checkpoint_dir \
if checkpoint_dir is not None else os.getcwd()
self.max_num_checkpoints = max_num_checkpoints
self.epoch_interval = epoch_interval
self.step_interval = step_interval
self.epoch_id = 0
self.step_id = 0
self.load_serial = None
self.pserver_id = None
self.lookup_table_name = None
def check_and_get_place(place):
"""
Check the type of place or get the default place
Args:
place(None|core.CUDAPlace|core.CPUPlace): the place that trainer will be executed on.
Raises:
TypeError if the type mismatched.
Returns:
the original place if it is not None.
if fluid is compiled with CUDA, returns CUDAPlace(0) by default.
Otherwise returns CPUPlace by default.
"""
if place is None:
if core.is_compiled_with_cuda():
return core.CUDAPlace(0)
else:
return core.CPUPlace()
else:
if not isinstance(place, core.CUDAPlace) and not isinstance(
place, core.CPUPlace):
raise TypeError("Place should be either CUDAPlace or CPUPlace")
return place
class Trainer(object):
"""
A trainer wraps MultiGPU/MultiNode training loops and can be used to train a
simple neural network easily.
This API takes a :code:`train_func`. A :code:`train_func` is a function that
return loss as it first return value. The reset value can be fetched by
EndStepEvent.metrics
This API also takes a :code:`optimizer_func` that will return an optimizer
instance.
For example, to train a MLP for MNIST dataset, the sample program is
>>> import paddle.fluid as fluid
>>>
>>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10):
>>> hidden = image
>>> for layer_size in layer_sizes:
>>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation)
>>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax")
>>>
>>> def train_mnist_mlp():
>>> img = fluid.layers.data(name='image', shape=[784])
>>> label = fluid.layers.data(name='label', shape=[1], dtype='int64')
>>> prediction = mlp(img)
>>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label))
>>>
>>> def optimizer():
>>> return fluid.optimizer.Adam()
>>>
>>> trainer = Trainer(train_func=train_mnist_mlp,
>>> optimizer_func=optimizer,
>>> place=fluid.CUDAPlace(0),
>>> parallel=True)
>>>
>>> def train_callback(event):
>>> if isinstance(event, fluid.EndStepEvent):
>>> print "Epoch ID", event.epoch, "Step ID",\
>>> event.step, "AvgLoss", event.metrics[0]
>>> elif isinstance(event, fluid.EndEpochEvent):
>>> trainer.save_params("./model_{0}".format(event.epoch))
>>>
>>> trainer.train(num_epochs=100, event_handler=train_callback)
For more example, please see :ref:`api_guide_high_level_api`.
Args:
train_func(callable): A function which will return loss. The loss must be
a scalar tensor.
optimizer_func(callable): A function that returns an Optimizer object.
place(CUDAPlace|CPUPlace): The device place of this trainer. If
:code:`parallel=True,` all CUDA Places will be used if :code:`place`
is a :code:`CUDAPlace`.
parallel(bool): True if use multiple devices.
checkpoint_config(CheckpointConfig): Configuration about how to save
checkpoints.
"""
def __init__(self,
train_func,
optimizer_func,
param_path=None,
place=None,
parallel=False,
checkpoint_config=None):
self.__stop = False
self.parallel = parallel
# config for checkpoint
# only chief worker will save variables
self.trainer_id = 0
self.checkpoint_cfg = checkpoint_config
if self.checkpoint_cfg:
assert isinstance(self.checkpoint_cfg, CheckpointConfig)
serial = _get_latest_checkpoint_serial(
self.checkpoint_cfg.checkpoint_dir)
self.checkpoint_cfg.load_serial = serial if serial >= 0 else None
self.scope = core.Scope()
# 1. we need to generate a framework.Program by calling
# program_func. Reference: fluid.program_guard in
# test_word2vec.py
self.startup_program = framework.Program()
self.train_program = framework.Program()
with framework.program_guard(self.train_program, self.startup_program):
program_func_outs = train_func()
self.train_func_outputs = program_func_outs if isinstance(
program_func_outs, list) else [program_func_outs]
self.test_program = self.train_program.clone(for_test=True)
# The first element of program_func_outs is loss.
loss = self.train_func_outputs[0]
optimizer = optimizer_func()
if not isinstance(optimizer, opt_module.Optimizer):
raise TypeError(
"The optimizer should be an instance of Optimizer")
optimize_ops, params_grads = optimizer.minimize(loss)
self.place = check_and_get_place(place)
self._dist_transpile_if_necessary(optimize_ops, params_grads)
# 2. move the default_main_program to self.program and run the
# default_startup program on an empty core.Scope()
# Run startup program
with self._prog_and_scope_guard():
exe = executor.Executor(place)
exe.run(self.startup_program)
if self.checkpoint_cfg and self.checkpoint_cfg.load_serial is not None:
self._load_checkpoint()
if param_path and os.path.isdir(param_path):
with self._prog_and_scope_guard():
# load params from param_path into scope
io.load_persistables(
executor=exe,
dirname=param_path,
main_program=self.startup_program)
def _transpile_nccl2_dist(self):
# PADDLE_TRAINER_IPS
if "PADDLE_TRAINER_IPS" not in os.environ:
self.nccl_id_var = None
else:
self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
port = os.getenv("PADDLE_PSERVER_PORT")
worker_ips = os.getenv("PADDLE_TRAINER_IPS")
worker_endpoints = []
for ip in worker_ips.split(","):
worker_endpoints.append(':'.join([ip, port]))
self.num_trainers = len(worker_endpoints)
current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port
worker_endpoints.remove(current_endpoint)
# TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id
# in ParallelExecutor to start
# distributed training using NCCL2
self.nccl_id_var = self.startup_program.global_block().create_var(
name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW)
self.startup_program.global_block().append_op(
type="gen_nccl_id",
inputs={},
outputs={"NCCLID": self.nccl_id_var},
attrs={
"endpoint": current_endpoint,
"endpoint_list": worker_endpoints,
"trainer_id": self.trainer_id
})
def _dist_transpile_if_necessary(self, optimize_ops, params_grads):
self._transpile_nccl2_dist()
if self.nccl_id_var != None:
return
if "PADDLE_TRAINING_ROLE" not in os.environ:
return
# the port of all pservers, needed by both trainer and pserver
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
# comma separated ips of all pservers, needed by trainer and
# pserver
pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "")
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist)
# total number of workers/trainers in the job, needed by
# trainer and pserver
trainers = int(os.getenv("PADDLE_TRAINERS"))
# the IP of the local machine, needed by pserver only
current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port
# the unique trainer id, starting from 0, needed by trainer
# only
self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
# the role, should be either PSERVER or TRAINER
training_role = os.getenv("PADDLE_TRAINING_ROLE")
with self._prog_and_scope_guard():
t = distribute_transpiler.DistributeTranspiler()
t.transpile(
self.trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
if self.checkpoint_cfg:
pserver_id = eplist.index(current_endpoint)
self.checkpoint_cfg.pserver_id = pserver_id
if t.has_distributed_lookup_table:
self.checkpoint_cfg.lookup_table_name = t.table_name
self.train_program = t.get_pserver_program(current_endpoint)
self.startup_program = t.get_startup_program(current_endpoint,
self.train_program)
elif training_role == "TRAINER":
self.train_program = t.get_trainer_program()
else:
raise ValueError(
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
)
def stop(self):
"""
stop training
"""
self.__stop = True
def train(self, num_epochs, event_handler, reader=None, feed_order=None):
"""
Start the train loop to train the model.
Args:
num_epochs(int): The number of epoch. An epoch will process all data in reader
event_handler(callable): The event handler. A function with type (ev:Event)->void
reader(callable): A reader creator object. See also
:ref:`api_guide_python_reader` .
feed_order(list): Feeding order of reader. None will following the defining
order in program
Returns:
None
"""
training_role = os.getenv("PADDLE_TRAINING_ROLE", "")
if training_role == "PSERVER":
with self._prog_and_scope_guard():
exe = executor.Executor(self.place)
exe.run()
return
if self.parallel:
self._train_by_parallel_executor(num_epochs, event_handler, reader,
feed_order)
else:
self._train_by_executor(num_epochs, event_handler, reader,
feed_order)
def test(self, reader, feed_order):
"""
Test the model on given test data
Args:
reader(callable): The reader that yields test data.
feed_order(list): Feeding order of reader. None will following the
defining order in program
"""
return self._test_by_executor(reader, feed_order,
self.train_func_outputs)
def save_params(self, param_path):
"""
Save all parameters into :code:`param_path`.
Args:
param_path(str): The path to save parameters.
Returns:
None
"""
with self._prog_and_scope_guard():
exe = executor.Executor(self.place)
io.save_persistables(exe, dirname=param_path)
def save_inference_model(self, param_path, feeded_var_names,
target_var_indexes):
"""
Save model for cpp inference into :code:`param_path`.
Args:
param_path(str): The path to save parameters.
feeded_var_names(list(str)): The name of the vars that you
need to feed in before run program.
target_var_indexes(list(int)): the index of target var that
you need to return in trainer.train_func.
Returns:
None
"""
with self._prog_and_scope_guard():
exe = executor.Executor(self.place)
target_vars = [
self.train_func_outputs[index] for index in target_var_indexes
]
io.save_inference_model(param_path, feeded_var_names, target_vars,
exe)
@contextlib.contextmanager
def _prog_and_scope_guard(self):
with framework.program_guard(
main_program=self.train_program,
startup_program=self.startup_program):
with executor.scope_guard(self.scope):
yield
def _train_by_executor(self, num_epochs, event_handler, reader, feed_order):
"""
Train by Executor and single device.
Args:
num_epochs:
event_handler:
reader:
feed_order:
Returns:
"""
with self._prog_and_scope_guard():
feed_var_list = build_feed_var_list(self.train_program, feed_order)
feeder = data_feeder.DataFeeder(
feed_list=feed_var_list, place=self.place)
exe = executor.Executor(self.place)
reader = feeder.decorate_reader(reader, multi_devices=False)
self._train_by_any_executor(event_handler, exe, num_epochs, reader)
def _train_by_any_executor(self, event_handler, exe, num_epochs, reader):
if self.checkpoint_cfg:
epochs = [
epoch_id for epoch_id in range(num_epochs)
if epoch_id >= self.checkpoint_cfg.epoch_id
]
else:
epochs = [epoch_id for epoch_id in range(num_epochs)]
for epoch_id in epochs:
event_handler(BeginEpochEvent(epoch_id))
for step_id, data in enumerate(reader()):
if self.__stop:
if self.checkpoint_cfg:
self._clean_checkpoint()
return
if self.checkpoint_cfg and self.checkpoint_cfg.load_serial \
and self.checkpoint_cfg.step_id >= step_id and self.checkpoint_cfg.epoch_id == epoch_id:
continue
begin_event = BeginStepEvent(epoch_id, step_id)
event_handler(begin_event)
if begin_event.fetch_metrics:
metrics = exe.run(feed=data,
fetch_list=[
var.name
for var in self.train_func_outputs
])
else:
metrics = exe.run(feed=data, fetch_list=[])
if self.checkpoint_cfg:
self._save_checkpoint(epoch_id, step_id)
event_handler(EndStepEvent(epoch_id, step_id, metrics))
event_handler(EndEpochEvent(epoch_id))
if self.checkpoint_cfg:
self._clean_checkpoint()
def _test_by_executor(self, reader, feed_order, fetch_list):
with executor.scope_guard(self.scope):
feed_var_list = build_feed_var_list(self.test_program, feed_order)
feeder = data_feeder.DataFeeder(
feed_list=feed_var_list, place=self.place)
exe = executor.Executor(self.place)
accumulated = len(fetch_list) * [0]
count = 0
for data in reader():
outs = exe.run(program=self.test_program,
feed=feeder.feed(data),
fetch_list=fetch_list)
accumulated = [x[0] + x[1][0] for x in zip(accumulated, outs)]
count += 1
return [x / count for x in accumulated]
def _train_by_parallel_executor(self, num_epochs, event_handler, reader,
feed_order):
with self._prog_and_scope_guard():
pe = self._get_or_create_parallel_executor()
feed_var_list = build_feed_var_list(self.train_program, feed_order)
feeder = data_feeder.DataFeeder(
feed_list=feed_var_list, place=self.place)
reader = feeder.decorate_reader(reader, multi_devices=True)
self._train_by_any_executor(event_handler, pe, num_epochs, reader)
def _get_parallel_executor(self):
return getattr(self, 'parallel_executor', None)
def _get_or_create_parallel_executor(self):
if self._get_parallel_executor() is None:
self.parallel_executor = parallel_executor.ParallelExecutor(
use_cuda=isinstance(self.place, core.CUDAPlace),
loss_name=self.train_func_outputs[0].name)
return self._get_parallel_executor()
def _clean_checkpoint(self):
assert self.checkpoint_cfg
clean_checkpoint(checkpoint_dir=self.checkpoint_cfg.checkpoint_dir)
def _get_checkpoint_load_args(self):
"""
epoch_id and step_id are runtime arguments, they are not variables, will load them independently.
"""
return ["epoch_id", "step_id"]
def _get_checkpoint_save_args(self, epoch_id, step_id):
"""
epoch_id and step_id are runtime arguments, they are not variables, will save them independently.
"""
trainer_args = {}
trainer_args["epoch_id"] = epoch_id
trainer_args["step_id"] = step_id
return trainer_args
def _save_checkpoint(self, epoch_id, step_id):
assert self.checkpoint_cfg
if epoch_id % self.checkpoint_cfg.epoch_interval == 0 \
and step_id % self.checkpoint_cfg.step_interval == 0:
exe = executor.Executor(self.place)
save_checkpoint(
executor=exe,
checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
trainer_id=self.trainer_id,
trainer_args=self._get_checkpoint_save_args(epoch_id, step_id),
main_program=self.train_program,
max_num_checkpoints=self.checkpoint_cfg.max_num_checkpoints)
def _load_checkpoint(self):
with self._prog_and_scope_guard():
exe = executor.Executor(self.place)
load_checkpoint(
executor=exe,
checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
main_program=self.startup_program)
if not self.checkpoint_cfg.pserver_id:
load_trainer_args = self._get_checkpoint_load_args()
trainer_args = load_checkpoint(
executor=exe,
checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
main_program=self.startup_program,
role_id=self.trainer_id,
is_trainer=True,
load_trainer_args=load_trainer_args)
if len(trainer_args) != 2:
raise ValueError(
"the return trainer_args length do not equal _get_checkpoint_load_args"
)
self.checkpoint_cfg.epoch_id = int(trainer_args[0])
self.checkpoint_cfg.step_id = int(trainer_args[1])
else:
if self.checkpoint_cfg.lookup_table_name:
load_checkpoint(
executor=exe,
checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
main_program=self.startup_program,
role_id=self.checkpoint_cfg.pserver_id,
is_trainer=False,
load_trainer_args=None,
load_lookup_table=self.checkpoint_cfg.lookup_table_name)
def build_feed_var_list(program, feed_order):
if not isinstance(program, framework.Program):
raise TypeError("The 'program' should be an object of Program")
if isinstance(feed_order, list):
feed_var_list = [
program.global_block().var(var_name) for var_name in feed_order
]
else:
if not isinstance(feed_order, dict):
raise TypeError(
"The 'feed_order' should be either None, list or dict.")
if not sorted(feed_order.values()) == list(range(len(feed_order))):
raise ValueError(
"The values of 'feed_order' should be a permutation of [0, len(feed_order))"
)
sorted_pair_list = sorted(
six.iteritems(feed_order), key=lambda item: item[1])
feed_var_list = [
program.global_block().var(pair[0]) for pair in sorted_pair_list
]
return feed_var_list
# move Checkpoint APIs from io.py to trainer.py, make all of them are private.
SUCCESS_MARK_FILENAME = "_SUCCESS"
CHECKPOINT_PREFIX = "checkpoint"
MODEL_DIR = "__model__"
LOOKUP_TABLE_DIR = "__lookup_table__"
TRAINER_PREFIX = "trainer"
CHECKPOINT_SEPARATOR = "_"
def save_checkpoint(executor,
checkpoint_dir,
trainer_id,
main_program,
trainer_args=None,
max_num_checkpoints=3,
lookup_table=None,
pserver_endpoints=None):
"""
This function filters out all checkpoint variables from the give
main_program and then saves these variables to the `checkpoint_dir`
directory.
In the training precess, we generally save a checkpoint in each
iteration. So there might be a lot of checkpoints in the
`checkpoint_dir`. To avoid them taking too much disk space, the
`max_num_checkpoints` are introduced to limit the total number of
checkpoints. If the number of existing checkpints is greater than
the `max_num_checkpoints`, oldest ones will be scroll deleted.
A variable is a checkpoint variable and will be saved if it meets
all following conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
Args:
executor(Executor): The executor to run for save checkpoint.
checkpoint_dir(str): The folder where to save checkpoints.
trainer_id(int): currect trainer id, if id is equal to 0, the trainer
is chief.
trainer_args(dict|None): Current training arguments. Such as 'epoch_id'
and 'step_id'.
Defaut: None
main_program(Program): The program whose checkpoint variables will
be saved.
max_num_checkpoints(int): The max number of total number of existing
checkpoints.
Default: 3
lookup_table(string|None): the lookup table name, when use distribute
lookup table, we can get lookup table name by DistributeTranspiler.
table_name
pserver_endpoints(list|None): the parameter server ip:port list.
when use distribute lookup table, we can get pserver_endpoints by
distribute arguments.
Returns:
None
Raises:
ValueError: If `checkpoint_dir` is None.
AssertionError: If `trainer_args` is not a dict.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
path = "./checkpoints"
prog = fluid.default_main_program()
trainer_args = {"epoch_id": 200,
"step_id": 20} # just an example
table_name = "share_w"
ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"]
save_checkpoint(executor=exe,
checkpoint_dir=path,
trainer_id=0,
trainer_args=trainer_args,
main_program=prog,
max_num_checkpoints=3,
lookup_table=table_name,
pserver_endpoints = ps_endpoints)
"""
if checkpoint_dir is None:
raise ValueError("'checkpoint_dir' should not be None")
if main_program is None:
raise ValueError('main_program should not be None.')
if trainer_args:
assert isinstance(trainer_args, dict)
is_chief = trainer_id == 0
_make_chekcpoint_dirs(checkpoint_dir)
serial = _get_latest_checkpoint_serial(checkpoint_dir) + 1
cur_dir = _get_serial_dir(checkpoint_dir, serial)
_save_trainer_args(cur_dir, trainer_id, trainer_args)
if is_chief:
_save_persist_vars_without_grad(executor, cur_dir, main_program)
if is_chief and lookup_table and pserver_endpoints:
_save_pserver_vars_by_notify(executor, cur_dir, lookup_table,
pserver_endpoints)
_scroll_delete(checkpoint_dir, max_num_checkpoints)
def load_checkpoint(executor,
checkpoint_dir,
main_program,
role_id=0,
is_trainer=True,
load_trainer_args=None,
load_lookup_table=None):
"""
This function filters out all checkpoint variables from the give
main_program and then try to load these variables from the
`checkpoint_dir` directory.
In the training precess, we generally save a checkpoint in each
iteration. So there are more than one checkpoint in the
`checkpoint_dir` (each checkpoint has its own sub folder), use
`serial` to specify which serial of checkpoint you would like to
load.
A variable is a checkpoint variable and will be loaded if it meets
all following conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
Args:
executor(Executor): The executor to run for loading checkpoint.
checkpoint_dir(str): The folder where all checkpoints are.
serial(int): The serial of checkpoint you would like to load.
main_program(Program): The program whose checkpoint variables will
be loaded.
role_id(int): the trainer id or the parameter server id.
is_trainer(bool): trainer is True and parameter server is False.
load_trainer_args(list|None): list about load trainer args.
load_lookup_table(str|None): the lookup table name
Returns:
None
Raises:
ValueError: If `checkpoint_dir` is None.
ValueError: If `main_program` is None.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
path = "./checkpoints"
prog = fluid.default_main_program()
load_checkpoint(executor=exe, checkpoint_dir=path,
serial=9, main_program=prog)
# In this example, `load_checkpoint` function
# will first filters out all checkpoint variables in the default
# main program, and then try to load these variables form the
# folder "./checkpoints/checkpoint_9/__model__".
"""
if checkpoint_dir is None:
raise ValueError("'checkpoint_dir' should not be None")
serial = _get_latest_checkpoint_serial(checkpoint_dir)
# there are nothing need to be loaded
if serial is None or serial < 0:
return
if main_program is None:
raise ValueError('main_program should not be None.')
if is_trainer and load_trainer_args is None:
cur_dir = _get_serial_dir(checkpoint_dir, serial)
_load_persist_vars_without_grad(executor, cur_dir, main_program, True)
return
if is_trainer and load_trainer_args:
return _load_trainer_args(checkpoint_dir, serial, role_id,
load_trainer_args)
if not is_trainer and load_lookup_table:
_load_lookup_table_vars(executor, checkpoint_dir, main_program, role_id,
load_lookup_table)
def clean_checkpoint(checkpoint_dir, delete_dir=False):
"""
clean the checkpoint dir, when the train exits normally,
the trainer will call clean_checkpoint to delete checkpoint directory saved before.
delete_dir only works when the directory is empty, otherwise, OSError is raised.
: param checkpoint_dir
: param delete_dir
"""
if checkpoint_dir is None:
raise ValueError("'checkpoint_dir' should not be None")
_scroll_delete(checkpoint_dir, max_num_checkpoints=0)
if delete_dir and not os.listdir(checkpoint_dir):
os.rmdir(checkpoint_dir)
def _load_persist_vars_without_grad(executor,
dirname,
program,
has_model_dir=False):
"""
This function filters out all checkpoint variables from the give
program and then trys to load these variables from the given directory.
A variable is a checkpoint variable if it meets all following
conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
Args:
executor(Executor): The executor to run for loading variables.
dirname(str): The directory path.
program(Program): The program whose checkpoint variables will
be loaded.
has_model_dir(bool): if True, the function loads variables
from a sub directory named '__model__'.
Default: False
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
_load_persist_vars_without_grad(executor=exe,
dirname=param_path, program=prog, has_model_dir=True)
# In this example, `_load_persist_vars_without_grad` function
# will first filters out all checkpoint variables in the default
# main program, and then trys to load these variables form the
# folder "./my_paddle_model/__model__".
"""
if has_model_dir:
dirname = _get_model_dir(dirname)
io.load_vars(
executor,
dirname=dirname,
main_program=program,
predicate=_is_checkpoint_var,
filename=None)
def _load_lookup_table_vars(executor, dirname, program, pserver_id, table_name):
"""
The parameter server will load lookup table's local file in
selectedrows variable.
Args:
executor(Executor): The executor to run for loading persistable variables
dirname(str): The directory path
main_program(Program): Find the variable named table_name in main_program
pserver_id(int): the serial number in pserver_endpoints list
table_name(str): lookup table name
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
dirname = "./checkpoints/checkpoint_9/"
prog = fluid.default_main_program()
pserver_id = 1
table_name = "share_w"
_load_lookup_table_vars(executor=exe,
dirname=dirname, program=prog, pserver_id=pserver_id,
table_name=table_name)
"""
for var in program.list_vars():
if var.name == table_name:
lookup_table_var = var
break
assert lookup_table_var is not None
lookup_table_dir = os.path.join(dirname, LOOKUP_TABLE_DIR)
table_file = table_name + CHECKPOINT_SEPARATOR + str(pserver_id)
load_prog = framework.Program()
load_block = load_prog.global_block()
load_block.append_op(
type='load',
inputs={},
outputs={'Out': [lookup_table_var]},
attrs={'file_path': os.path.join(lookup_table_dir, table_file)})
executor.run(load_prog)
def _save_persist_vars_without_grad(executor, dirname, program):
"""
This function filters out all checkpoint variables from the give
program and then save these variables to a sub-folder '__model__' of
the given directory.
A variable is a checkpoint variable if it meets all following
conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
Args:
executor(Executor): The executor to run for saving variables.
dirname(str): The directory path.
program(Program): The program whose checkpoint variables will
be saved.
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
_save_persist_vars_without_grad(executor=exe,
dirname=param_path, program=prog)
# In this example, `_save_persist_vars_without_grad` function
# will first filters out all checkpoint variables in the default
# main program, and then saves these variables to the folder
# "./my_paddle_model/__model__".
"""
cur_dir = _get_model_dir(dirname)
io.save_vars(
executor,
dirname=cur_dir,
main_program=program,
vars=None,
predicate=_is_checkpoint_var,
filename=None)
_write_success(cur_dir)
def _save_pserver_vars_by_notify(executor, dirname, lookup_table,
ps_endpoint_list):
"""
This function will send checkpoint notify message from Trainer 0
to all the pservers.
The checkpoint notify message contains lookup table name,
the absolute path on pserver to save lookup_table.
Args:
executor(Executor): The executor to run for send checkpoint notify.
dirname(str): The folder where to save checkpoints.
lookup_table(string): the lookup table name, when use distribute
lookup table, we can get lookup table name by DistributeTranspiler.
table_name
ps_endpoint_list(list): the parameter server ip:port list.
when use distribute lookup table, we can get ps_endpoint_list by
distribute arguments.
Return:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
table_name = "share_w"
ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"]
_save_pserver_vars_by_notify(executor=exe,
dirname=param_path, lookup_table=table_name,
ps_endpoint_list=ps_endpoints)
"""
cur_dir = _get_lookuptable_dir(dirname)
checkpoint_notify_program = framework.Program()
checkpoint_notify_block = checkpoint_notify_program.global_block()
attrs = {}
attrs['epmap'] = ps_endpoint_list
attrs['dir'] = cur_dir
attrs['lookup_table'] = lookup_table
checkpoint_notify_block.append_op(
type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs)
executor.run(checkpoint_notify_program)
def _save_trainer_args(dirname, trainer_id, trainer_args):
assert isinstance(trainer_args, dict)
cur_dir = _get_trainer_dir(dirname, trainer_id)
for name, value in six.iteritems(trainer_args):
args_file = os.path.join(cur_dir, name)
with open(args_file, 'w') as f:
f.write(str(value))
_write_success(cur_dir)
def _load_trainer_args(checkpoint_dir, serial, trainer_id, trainer_args):
"""
trainer will load some args from it's independent directory,
such as epoch_id and step_id.
Args:
checkpoint_dir(str): The folder where all checkpoints are.
serial(int): The serial of checkpoint you would like to load.
trainer_id(int): current trainer id.
trainer_args(list): list about load trainer args
Return:
None
Examples:
.. code-block:: python
param_path = "./checkpoint/"
serial = 7
trainer_id = 2
trainer_args = ["epoch_id", "step_id"]
_load_trainer_args(checkpoint_dir=param_path, serial=serial,
trainer_id=trainer_id, trainer_args=trainer_args)
"""
assert isinstance(trainer_args, list)
cur_dir = _get_serial_dir(checkpoint_dir, serial)
cur_dir = _get_trainer_dir(cur_dir, trainer_id)
ret_values = []
for arg in trainer_args:
cur_file = os.path.join(cur_dir, arg)
with open(cur_file, 'r') as f:
contents = f.read()
ret_values.append(contents.strip())
return ret_values
def _is_checkpoint_var(var):
"""
the checkpoint will not save or load all the variables.
var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded.
: param var(Variable)
"""
if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \
var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \
var.desc.type() == core.VarDesc.VarType.RAW:
return False
# @GRAD are named for gradient variables, checkpoint will not save it.
if "@GRAD" in var.name:
return False
# .trainer_ are named for distribute train variables, checkpoint will not save it.
if ".trainer_" in var.name:
return False
# .block is named for distribute train variables, checkpoint will not save it.
if ".block" in var.name:
return False
return var.persistable
def _make_chekcpoint_dirs(dirs):
"""
_make_chekcpoint_dirs will makdir local directory directly, when the directory is exist, it will igore it.
"""
assert dirs is not None
if os.path.isfile(dirs):
raise OSError(errno.ENOTDIR, "dirs path shoule be a Directory.", dirs)
if not os.path.isdir(dirs):
try:
os.makedirs(dirs)
except OSError as err:
if err.errno != errno.EEXIST:
raise err
def _get_dir_serial(dirname):
_, serial = dirname.split(CHECKPOINT_SEPARATOR)
try:
serial_num = int(serial)
except ValueError:
serial_num = -1
return serial_num
def _get_serial_dir(dirname, serial):
serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial)
serial_dir = os.path.join(dirname, serial_folder)
_make_chekcpoint_dirs(serial_dir)
return serial_dir
def _get_model_dir(dirname):
model_dir = os.path.join(dirname, MODEL_DIR)
_make_chekcpoint_dirs(model_dir)
return model_dir
def _get_lookuptable_dir(dirname):
lookuptable_dir = os.path.join(dirname, LOOKUP_TABLE_DIR)
_make_chekcpoint_dirs(lookuptable_dir)
return lookuptable_dir
def _get_trainer_dir(dirname, trainer_id):
trainer_folder = TRAINER_PREFIX + CHECKPOINT_SEPARATOR + str(trainer_id)
trainer_dir = os.path.join(dirname, trainer_folder)
_make_chekcpoint_dirs(trainer_dir)
return trainer_dir
def _scroll_delete(dirname, max_num_checkpoints=3):
dirs = os.listdir(dirname)
serial_map = {}
for serial in dirs:
serial_num = _get_dir_serial(serial)
serial_map[serial_num] = serial
if len(list(serial_map.keys())) <= max_num_checkpoints:
return
serials = list(serial_map.keys())
serials.sort(reverse=True)
serials = serials[max_num_checkpoints:]
for serial in serials:
cur_dir = _get_serial_dir(dirname, serial)
try:
shutil.rmtree(cur_dir)
except OSError as err:
if err.errno != errno.ENOENT:
raise err
def _write_success(dirname):
"""
write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct.
: param dirname
"""
success_file = os.path.join(dirname, SUCCESS_MARK_FILENAME)
with open(success_file, 'a') as f:
now = time.ctime()
f.write(now)
def _get_latest_checkpoint_serial(checkpoint_dir):
"""
get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory
: param checkpoint_dir
"""
if not checkpoint_dir:
return -1
def has_success(checkpoint_dir, cur_dir):
"""
is _SUCCESS in this dir
"""
serial = _get_dir_serial(cur_dir)
if serial == -1 or not os.path.isdir(
os.path.join(checkpoint_dir, cur_dir)):
return -1
success_path = os.path.join(
_get_serial_dir(checkpoint_dir, serial), MODEL_DIR,
SUCCESS_MARK_FILENAME)
if os.path.isfile(success_path):
return serial
if not os.path.isdir(checkpoint_dir):
return -1
current_dir = -1
dirs = os.listdir(checkpoint_dir)
for cur_dir in dirs:
success_num = has_success(checkpoint_dir, cur_dir)
if success_num > current_dir:
current_dir = success_num
return current_dir
......@@ -38,7 +38,6 @@ from . import unique_name
__all__ = [
'Program',
'Operator',
'Parameter',
'default_startup_program',
'default_main_program',
'program_guard',
......@@ -489,7 +488,8 @@ class OpProtoHolder(object):
def generated_op_attr_names():
return {
core.op_proto_and_checker_maker.kOpRoleAttrName(),
core.op_proto_and_checker_maker.kOpRoleVarAttrName()
core.op_proto_and_checker_maker.kOpRoleVarAttrName(),
core.op_proto_and_checker_maker.kOpNameScopeAttrName()
}
......
......@@ -12,101 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
from . import core
from . import executor
from . import framework
from . import io
from . import parallel_executor
from . import unique_name
from .trainer import check_and_get_place
__all__ = ['Inferencer', ]
class Inferencer(object):
"""
Inferencer High Level API.
Args:
infer_func (Python func): Infer function that will return predict Variable
param_path (str): The path where the inference model is saved by fluid.io.save_params
place (Place): place to do the inference
parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
Examples:
.. code-block:: python
def inference_program():
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
return y_predict
place = fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path="/tmp/model", place=place)
"""
def __init__(self, infer_func, param_path, place=None, parallel=False):
self.param_path = param_path
self.scope = core.Scope()
self.parallel = parallel
self.place = check_and_get_place(place)
self.inference_program = framework.Program()
with framework.program_guard(self.inference_program):
with unique_name.guard():
self.predict_var = infer_func()
with self._prog_and_scope_guard():
# load params from param_path into scope
io.load_params(executor.Executor(self.place), param_path)
if parallel:
with self._prog_and_scope_guard():
self.exe = parallel_executor.ParallelExecutor(
use_cuda=isinstance(self.place, core.CUDAPlace),
loss_name=self.predict_var.name)
else:
self.exe = executor.Executor(self.place)
self.inference_program = self.inference_program.clone(for_test=True)
def infer(self, inputs, return_numpy=True):
"""
Do Inference for Inputs
Args:
inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
return_numpy (bool): transform return value into numpy or not
Returns:
Tensor or Numpy: the predict value of the inference model for the inputs
Examples:
.. code-block:: python
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
results = inferencer.infer({'x': tensor_x})
"""
if not isinstance(inputs, dict):
raise ValueError(
"inputs should be a map of {'input_name': input_var}")
with self._prog_and_scope_guard():
results = self.exe.run(feed=inputs,
fetch_list=[self.predict_var.name],
return_numpy=return_numpy)
return results
@contextlib.contextmanager
def _prog_and_scope_guard(self):
with framework.program_guard(main_program=self.inference_program):
with executor.scope_guard(self.scope):
yield
# NOTE: inferencer is moved into fluid.contrib.inferencer.
__all__ = []
......@@ -23,7 +23,10 @@ from ..proto import framework_pb2
from ..framework import OpProtoHolder, Variable
from ..layer_helper import LayerHelper
__all__ = ['deprecated', 'generate_layer_fn', 'autodoc', 'templatedoc']
__all__ = [
'deprecated', 'generate_layer_fn', 'generate_layer_fn_noattr', 'autodoc',
'templatedoc'
]
def _convert_(name):
......@@ -58,7 +61,7 @@ def escape_math(text):
_two_dollar_pattern_.sub(r"!!\1!!", text)))
def _generate_doc_string_(op_proto):
def _generate_doc_string_(op_proto, additional_args_lines=None):
"""
Generate docstring by OpProto
......@@ -98,6 +101,13 @@ def _generate_doc_string_(op_proto):
buf.write(escape_math(each_attr.comment))
buf.write('\n')
if additional_args_lines is not None:
for line in additional_args_lines:
line = line.strip()
buf.write(' ')
buf.write(line)
buf.write('\n')
if len(op_proto.outputs) != 0:
buf.write('\nReturns:\n')
buf.write(' ')
......@@ -205,6 +215,29 @@ def generate_layer_fn(op_type):
return func
def generate_layer_fn_noattr(op_type):
"""Register the Python layer for an Operator without Attribute.
Args:
op_type: The name of the operator to be created.
This function takes in the operator type (sigmoid, exp , tanh etc) and
creates the operator functionality.
"""
op_proto = OpProtoHolder.instance().get_op_proto(op_type)
def func(x, name=None):
helper = LayerHelper(op_type, **locals())
output = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(type=op_type, inputs={"X": x}, outputs={"Out": output})
return output
func.__name__ = op_type
func.__doc__ = _generate_doc_string_(op_proto)
return func
def deprecated(func_or_class):
"""
Deprecated warning decorator. It will result a warning message.
......
......@@ -68,7 +68,7 @@ def noam_decay(d_model, warmup_steps):
a = global_step**-0.5
b = (warmup_steps**-1.5) * global_step
lr_value = (d_model**-0.5) * ops.elementwise_min(a, b)
lr_value = (d_model**-0.5) * nn.elementwise_min(a, b)
return lr_value
......@@ -241,7 +241,7 @@ def polynomial_decay(learning_rate,
else:
decay_steps_var = tensor.fill_constant(
shape=[1], dtype='float32', value=float(decay_steps))
global_step = ops.elementwise_min(x=global_step, y=decay_steps_var)
global_step = nn.elementwise_min(x=global_step, y=decay_steps_var)
decayed_lr = (learning_rate - end_learning_rate) * \
((1 - global_step / decay_steps) ** power) + end_learning_rate
......
......@@ -20,9 +20,9 @@ from __future__ import print_function
import numpy as np
from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant
from ..framework import Variable
from ..framework import Variable, OpProtoHolder
from ..param_attr import ParamAttr
from .layer_function_generator import autodoc, templatedoc
from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_
from .tensor import concat
from . import utils
from .. import unique_name
......@@ -107,7 +107,16 @@ __all__ = [
'log',
'crop',
'rank_loss',
'elu',
'relu6',
'pow',
'stanh',
'hard_sigmoid',
'swish',
'prelu',
'brelu',
'leaky_relu',
'soft_relu',
'flatten',
'sequence_mask',
'stack',
......@@ -116,6 +125,14 @@ __all__ = [
'sequence_enumerate',
'expand',
'sequence_concat',
'scale',
'elementwise_add',
'elementwise_div',
'elementwise_sub',
'elementwise_mul',
'elementwise_max',
'elementwise_min',
'elementwise_pow',
]
......@@ -3605,7 +3622,7 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None):
attrs={
'transpose_X': transpose_x,
'transpose_Y': transpose_y,
'alpha': alpha,
'alpha': float(alpha),
})
return out
......@@ -5895,6 +5912,148 @@ def pad2d(input,
return out
@templatedoc()
def elu(x, alpha=1.0, name=None):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
alpha(${alpha_type}|1.0): ${alpha_comment}
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
output(${out_type}): ${out_comment}
"""
helper = LayerHelper('elu', **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='elu',
inputs={'X': x},
outputs={'Out': out},
attrs={'alpha': alpha})
return out
@templatedoc()
def relu6(x, threshold=6.0, name=None):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
threshold(${threshold_type}|6.0): ${threshold_comment}
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
output(${out_type}): ${out_comment}
"""
helper = LayerHelper('relu6', **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='relu6',
inputs={'X': x},
outputs={'Out': out},
attrs={'threshold': threshold})
return out
@templatedoc()
def pow(x, factor=1.0, name=None):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
factor(${factor_type}|1.0): ${factor_comment}
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
output(${out_type}): ${out_comment}
"""
helper = LayerHelper('pow', **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='pow',
inputs={'X': x},
outputs={'Out': out},
attrs={'factor': factor})
return out
@templatedoc()
def stanh(x, scale_a=2.0 / 3.0, scale_b=1.7159, name=None):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
scale_a(${scale_a_type}|2.0 / 3.0): ${scale_a_comment}
scale_b(${scale_b_type}|1.7159): ${scale_b_comment}
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
output(${out_type}): ${out_comment}
"""
helper = LayerHelper('stanh', **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='stanh',
inputs={'X': x},
outputs={'Out': out},
attrs={'scale_a': scale_a,
'scale_b': scale_b})
return out
@templatedoc()
def hard_sigmoid(x, slope=0.2, offset=0.5, name=None):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
slope(${slope_type}|0.2): ${slope_comment}
offset(${offset_type}|0.5): ${offset_comment}
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
output(${out_type}): ${out_comment}
"""
helper = LayerHelper('hard_sigmoid', **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='hard_sigmoid',
inputs={'X': x},
outputs={'Out': out},
attrs={'slope': slope,
'offset': offset})
return out
@templatedoc()
def swish(x, beta=1.0, name=None):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
beta(${beta_type}|1.0): ${beta_comment}
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
output(${out_type}): ${out_comment}
"""
helper = LayerHelper('swish', **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='swish',
inputs={'X': x},
outputs={'Out': out},
attrs={'slope': beta})
return out
def prelu(x, mode, param_attr=None, name=None):
"""
Equation:
......@@ -5948,6 +6107,74 @@ def prelu(x, mode, param_attr=None, name=None):
return out
@templatedoc()
def brelu(x, t_min=0.0, t_max=24.0, name=None):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
t_min(${t_min_type}|0.0): ${t_min_comment}
t_max(${t_max_type}|24.0): ${t_max_comment}
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
output(${out_type}): ${out_comment}
"""
helper = LayerHelper('brelu', **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='brelu',
inputs={'X': x},
outputs={'Out': out},
attrs={'t_min': t_min,
't_max': t_max})
return out
@templatedoc()
def leaky_relu(x, alpha=0.02, name=None):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
alpha(${alpha_type}|0.02): ${alpha_comment}
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
output(${out_type}): ${out_comment}
"""
helper = LayerHelper('leaky_relu', **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='leaky_relu',
inputs={'X': x},
outputs={'Out': out},
attrs={'alpha': alpha})
return out
@templatedoc()
def soft_relu(x, threshold=40.0, name=None):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
threshold(${threshold_type}|40.0): ${threshold_comment}
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
output(${out_type}): ${out_comment}
"""
helper = LayerHelper('soft_relu', **locals())
out = helper.create_tmp_variable(dtype=x.dtype)
helper.append_op(
type='soft_relu',
inputs={'X': x},
outputs={'Out': out},
attrs={'threshold': threshold})
return out
def flatten(x, axis=1, name=None):
"""
**Flatten layer**
......@@ -6234,3 +6461,105 @@ def expand(x, expand_times, name=None):
outputs={'Out': out},
attrs={'expand_times': expand_times})
return out
def _elementwise_op(helper):
op_type = helper.layer_type
x = helper.kwargs.get('x', None)
y = helper.kwargs.get('y', None)
assert x is not None, 'x cannot be None in {}'.format(op_type)
assert y is not None, 'y cannot be None in {}'.format(op_type)
axis = helper.kwargs.get('axis', -1)
use_mkldnn = helper.kwargs.get('use_mkldnn', False)
name = helper.kwargs.get('name', None)
if name is None:
out = helper.create_tmp_variable(dtype=x.dtype)
else:
out = helper.create_variable(
name=name, dtype=x.dtype, persistable=False)
helper.append_op(
type=op_type,
inputs={'X': x,
'Y': y},
outputs={'Out': out},
attrs={'axis': axis,
'use_mkldnn': use_mkldnn})
return helper.append_activation(out)
@templatedoc()
def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
scale(${scale_type}): ${scale_comment}
bias(${bias_type}): ${bias_comment}
bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment}
act(basestring|None): Activation applied to the output.
name(basestring|None): Name of the output.
Returns:
out(${out_type}): ${out_comment}
"""
helper = LayerHelper('scale', **locals())
if name is None:
out = helper.create_tmp_variable(dtype=x.dtype)
else:
out = helper.create_variable(
name=name, dtype=x.dtype, persistable=False)
helper.append_op(
type='scale',
inputs={'X': x},
outputs={'Out': out},
attrs={
'scale': float(scale),
'bias': float(bias),
'bias_after_scale': bias_after_scale
})
return helper.append_activation(out)
def elementwise_add(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
return _elementwise_op(LayerHelper('elementwise_add', **locals()))
def elementwise_div(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
return _elementwise_op(LayerHelper('elementwise_div', **locals()))
def elementwise_sub(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
return _elementwise_op(LayerHelper('elementwise_sub', **locals()))
def elementwise_mul(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
return _elementwise_op(LayerHelper('elementwise_mul', **locals()))
def elementwise_max(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
return _elementwise_op(LayerHelper('elementwise_max', **locals()))
def elementwise_min(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
return _elementwise_op(LayerHelper('elementwise_min', **locals()))
def elementwise_pow(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
return _elementwise_op(LayerHelper('elementwise_pow', **locals()))
for func in [
elementwise_add, elementwise_div, elementwise_sub, elementwise_mul,
elementwise_max, elementwise_min, elementwise_pow
]:
op_proto = OpProtoHolder.instance().get_op_proto(func.__name__)
func.__doc__ = _generate_doc_string_(
op_proto,
additional_args_lines=[
"act (basestring|None): Activation applied to the output.",
"name (basestring|None): Name of the output."
])
......@@ -13,9 +13,9 @@
# limitations under the License.
from __future__ import print_function
from .layer_function_generator import generate_layer_fn
from .layer_function_generator import generate_layer_fn, generate_layer_fn_noattr
__activations__ = [
__activations_noattr__ = [
'sigmoid',
'logsigmoid',
'exp',
......@@ -33,29 +33,12 @@ __activations__ = [
'square',
'softplus',
'softsign',
'brelu',
'leaky_relu',
'soft_relu',
'elu',
'relu6',
'pow',
'stanh',
'hard_sigmoid',
'swish',
]
__all__ = [
'mean',
'mul',
'scale',
'sigmoid_cross_entropy_with_logits',
'elementwise_add',
'elementwise_div',
'elementwise_sub',
'elementwise_mul',
'elementwise_max',
'elementwise_min',
'elementwise_pow',
'clip',
'clip_by_norm',
'logical_and',
......@@ -70,11 +53,21 @@ __all__ = [
'slice',
'shape',
'maxout',
] + __activations__
]
for _OP in set(__all__):
globals()[_OP] = generate_layer_fn(_OP)
# It is a hot fix in some unittest using:
# fluid.layers.scale(x=x, scale=10.0, out=out_var)
# e.g.: test_program_code.py, test_dist_train.py
globals()['_scale'] = generate_layer_fn('scale')
__all__ += __activations_noattr__
for _OP in set(__activations_noattr__):
globals()[_OP] = generate_layer_fn_noattr(_OP)
__all__ += ["uniform_random"]
_uniform_random_ = generate_layer_fn('uniform_random')
......
......@@ -21,6 +21,7 @@ __all__ = [
"sequence_conv_pool",
"glu",
"scaled_dot_product_attention",
"img_conv_group",
]
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import contextlib
import numpy
import unittest
......@@ -57,11 +67,11 @@ def optimizer_func():
def train(use_cuda, train_program, params_dirname, inference_model_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program, place=place, optimizer_func=optimizer_func)
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
if event.step == 10:
test_metrics = trainer.test(
reader=test_reader, feed_order=['x', 'y'])
......@@ -91,7 +101,7 @@ def infer(use_cuda, inference_program, params_dirname=None):
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program, param_path=params_dirname, place=place)
batch_size = 10
......
......@@ -14,11 +14,22 @@
from __future__ import print_function
import sys
import paddle
import paddle.fluid as fluid
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.core as core
import numpy
import six
import os
import cifar10_small_test_set
......@@ -106,7 +117,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False)
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
avg_cost, accuracy = trainer.test(
reader=test_reader, feed_order=['pixel', 'label'])
......@@ -118,7 +129,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program,
optimizer_func=optimizer_func,
place=place,
......@@ -133,7 +144,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
def infer(use_cuda, inference_program, parallel, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program,
param_path=params_dirname,
place=place,
......
......@@ -14,11 +14,22 @@
from __future__ import print_function
import sys
import paddle
import paddle.fluid as fluid
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.core as core
import numpy
import six
import os
import cifar10_small_test_set
......@@ -83,7 +94,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False)
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
avg_cost, accuracy = trainer.test(
reader=test_reader, feed_order=['pixel', 'label'])
......@@ -95,7 +106,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program,
place=place,
optimizer_func=optimizer_func,
......@@ -110,7 +121,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
def infer(use_cuda, inference_program, parallel, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program,
param_path=params_dirname,
place=place,
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import numpy as np
WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict()
......@@ -149,7 +159,7 @@ def optimize_func():
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program, place=place, optimizer_func=optimize_func)
feed_order = [
......@@ -164,7 +174,7 @@ def train(use_cuda, train_program, params_dirname):
# place)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.conll05.test(), batch_size=BATCH_SIZE)
avg_cost_set = trainer.test(
......@@ -184,7 +194,7 @@ def train(use_cuda, train_program, params_dirname):
if math.isnan(float(avg_cost)):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
elif isinstance(event, EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, list(map(np.array, event.metrics))))
if event.step == 1: # Run 2 iterations to speed CI
......@@ -204,7 +214,7 @@ def train(use_cuda, train_program, params_dirname):
def infer(use_cuda, inference_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
inference_program, param_path=params_dirname, place=place)
# Setup input by creating LoDTensor to represent sequence of words.
......
......@@ -13,17 +13,28 @@
# limitations under the License.
from __future__ import print_function
import contextlib
import sys
import numpy as np
import paddle
import paddle.fluid as fluid
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.framework as framework
import paddle.fluid.layers as pd
from paddle.fluid.executor import Executor
from functools import partial
import unittest
import os
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
......@@ -198,12 +209,12 @@ def train(use_cuda, is_sparse, is_local=True):
]
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
print('pass_id=' + str(event.epoch) + ' batch=' + str(event.step))
if event.step == 10:
trainer.stop()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=partial(train_program, is_sparse),
place=place,
optimizer_func=optimizer_func)
......
......@@ -14,14 +14,22 @@
from __future__ import print_function
import argparse
import sys
import paddle.fluid as fluid
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.core as core
import paddle
import six
import sys
import numpy
import unittest
import math
import sys
import os
......@@ -68,14 +76,14 @@ def optimizer_func():
def train(use_cuda, train_program, parallel, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program,
place=place,
optimizer_func=optimizer_func,
parallel=parallel)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
avg_cost, acc = trainer.test(
......@@ -91,7 +99,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
elif isinstance(event, EndStepEvent):
print(
("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch,
......@@ -112,7 +120,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
def infer(use_cuda, inference_program, parallel, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program,
param_path=params_dirname,
place=place,
......
......@@ -14,14 +14,22 @@
from __future__ import print_function
import argparse
import sys
import paddle.fluid as fluid
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.core as core
import paddle
import six
import sys
import numpy
import unittest
import math
import sys
import os
......@@ -55,14 +63,14 @@ def optimizer_func():
def train(use_cuda, train_program, params_dirname, parallel):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program,
place=place,
optimizer_func=optimizer_func,
parallel=parallel)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
avg_cost, acc = trainer.test(
......@@ -94,7 +102,7 @@ def train(use_cuda, train_program, params_dirname, parallel):
def infer(use_cuda, inference_program, parallel, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program,
param_path=params_dirname,
place=place,
......
......@@ -19,6 +19,16 @@ import sys
import numpy as np
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.layers as layers
import paddle.fluid.nets as nets
......@@ -164,7 +174,7 @@ def optimizer_func():
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program, place=place, optimizer_func=optimizer_func)
feed_order = [
......@@ -173,7 +183,7 @@ def train(use_cuda, train_program, params_dirname):
]
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
test_reader = paddle.batch(
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
avg_cost_set = trainer.test(
......@@ -208,7 +218,7 @@ def train(use_cuda, train_program, params_dirname):
def infer(use_cuda, inference_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
inference_program, param_path=params_dirname, place=place)
# Use the first data from paddle.dataset.movielens.test() as input.
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
from functools import partial
import numpy as np
......@@ -72,13 +82,13 @@ def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=partial(train_program, word_dict),
place=place,
optimizer_func=optimizer_func)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
avg_cost, acc = trainer.test(
......@@ -96,7 +106,7 @@ def train(use_cuda, train_program, params_dirname):
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
elif isinstance(event, EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, list(map(np.array, event.metrics))))
if event.step == 1: # Run 2 iterations to speed CI
......@@ -119,7 +129,7 @@ def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=params_dirname,
place=place)
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
from functools import partial
import numpy as np
......@@ -87,13 +97,13 @@ def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=partial(train_program, word_dict),
place=place,
optimizer_func=optimizer_func)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
avg_cost, acc = trainer.test(
......@@ -111,7 +121,7 @@ def train(use_cuda, train_program, params_dirname):
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
elif isinstance(event, EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, list(map(np.array, event.metrics))))
if event.step == 1: # Run 2 iterations to speed CI
......@@ -134,7 +144,7 @@ def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=params_dirname,
place=place)
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
from functools import partial
import numpy as np
......@@ -79,13 +89,13 @@ def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=partial(train_program, word_dict),
place=place,
optimizer_func=optimizer_func)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict),
batch_size=BATCH_SIZE,
......@@ -105,7 +115,7 @@ def train(use_cuda, train_program, params_dirname):
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
elif isinstance(event, EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, list(map(np.array, event.metrics))))
if event.step == 1: # Run 2 iterations to speed CI
......@@ -129,7 +139,7 @@ def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=params_dirname,
place=place)
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import numpy as np
import math
import sys
......@@ -95,7 +105,7 @@ def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
outs = trainer.test(
reader=test_reader,
feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw'])
......@@ -109,7 +119,7 @@ def train(use_cuda, train_program, params_dirname):
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program, optimizer_func=optimizer_func, place=place)
trainer.train(
......@@ -121,7 +131,7 @@ def train(use_cuda, train_program, params_dirname):
def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program, param_path=params_dirname, place=place)
# Setup inputs by creating 4 LoDTensors representing 4 words. Here each word
......
......@@ -25,13 +25,14 @@ class TestDistSeResneXt2x2(TestDistBase):
self.check_with_place("dist_se_resnext.py", delta=1e-7)
class TestDistseResnXt2x2WithMemopt(TestDistBase):
def _setup_config(self):
self._sync_mode = True
self._mem_opt = True
def test_dist_train(self):
self.check_with_place("dist_se_resnext.py", delta=1e-7)
# TODO(typhoonzero): fix this test
# class TestDistseResnXt2x2WithMemopt(TestDistBase):
# def _setup_config(self):
# self._sync_mode = True
# self._mem_opt = True
# def test_dist_train(self):
# self.check_with_place("dist_se_resnext.py", delta=1e-7)
class TestDistSeResneXt2x2Async(TestDistBase):
......
......@@ -27,6 +27,7 @@ import paddle.fluid.layers as layers
from paddle.fluid.layers.io import ListenAndServ
from paddle.fluid.layers.io import Recv
from paddle.fluid.layers.io import Send
import paddle.fluid.layers.ops as ops
from paddle.fluid import core
......@@ -89,7 +90,7 @@ class TestSendOp(unittest.TestCase):
name="X",
append_batch_size=False)
fluid.initializer.Constant(value=1.0)(x, main.global_block())
layers.scale(x=x, scale=10.0, out=out_var)
ops._scale(x=x, scale=10.0, out=out_var)
self.server_exe = fluid.Executor(place)
self.server_exe.run(main)
......
......@@ -659,5 +659,25 @@ class TestLoadSliceVar(TranspilerTest):
pserver2._slice_vars_and_attrs[idx][2].shape))
class TestNCCL2Transpile(TranspilerTest):
def test_nccl2_transpile(self):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
self.net_conf()
config = fluid.DistributeTranspilerConfig()
config.mode = "nccl2"
t = fluid.DistributeTranspiler(config=config)
t.transpile(
0,
trainers="127.0.0.1:6174,127.0.0.1:6175",
current_endpoint="127.0.0.1:6174",
startup_program=startup)
print([op.type for op in startup.global_block().ops])
self.assertEqual(startup.global_block().ops[-1].type, "gen_nccl_id")
self.assertIsNotNone(startup.global_block().vars.get("NCCLID"))
if __name__ == "__main__":
unittest.main()
......@@ -573,6 +573,158 @@ class TestBook(unittest.TestCase):
self.assertIsNotNone(out)
print(str(program))
def test_brelu(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.brelu(input, t_min=1.0, t_max=20.0, name='brelu')
self.assertIsNotNone(out)
print(str(program))
def test_leaky_relu(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.leaky_relu(input, alpha=0.1, name='leaky_relu')
self.assertIsNotNone(out)
print(str(program))
def test_soft_relu(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.soft_relu(input, threshold=30.0, name='soft_relu')
self.assertIsNotNone(out)
print(str(program))
def test_sigmoid(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.sigmoid(input, name='sigmoid')
self.assertIsNotNone(out)
print(str(program))
def test_logsigmoid(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.logsigmoid(input, name='logsigmoid')
self.assertIsNotNone(out)
print(str(program))
def test_exp(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.exp(input, name='exp')
self.assertIsNotNone(out)
print(str(program))
def test_tanh(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.tanh(input, name='tanh')
self.assertIsNotNone(out)
print(str(program))
def test_tanh_shrink(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.tanh_shrink(input, name='tanh_shrink')
self.assertIsNotNone(out)
print(str(program))
def test_sqrt(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.sqrt(input, name='sqrt')
self.assertIsNotNone(out)
print(str(program))
def test_abs(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.abs(input, name='abs')
self.assertIsNotNone(out)
print(str(program))
def test_ceil(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.ceil(input, name='ceil')
self.assertIsNotNone(out)
print(str(program))
def test_floor(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.floor(input, name='floor')
self.assertIsNotNone(out)
print(str(program))
def test_cos(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.cos(input, name='cos')
self.assertIsNotNone(out)
print(str(program))
def test_sin(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.sin(input, name='sin')
self.assertIsNotNone(out)
print(str(program))
def test_round(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.round(input, name='round')
self.assertIsNotNone(out)
print(str(program))
def test_reciprocal(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.reciprocal(input, name='reciprocal')
self.assertIsNotNone(out)
print(str(program))
def test_square(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.square(input, name='square')
self.assertIsNotNone(out)
print(str(program))
def test_softplus(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.softplus(input, name='softplus')
self.assertIsNotNone(out)
print(str(program))
def test_softsign(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.softsign(input, name='softsign')
self.assertIsNotNone(out)
print(str(program))
def test_roi_perspective_transform(self):
program = Program()
with program_guard(program):
......@@ -606,6 +758,14 @@ class TestBook(unittest.TestCase):
out = layers.expand(x, [1, 2])
print(str(program))
def test_softshrink(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.softshrink(input, name='softshrink')
self.assertIsNotNone(out)
print(str(program))
if __name__ == '__main__':
unittest.main()
......@@ -25,6 +25,7 @@ import paddle.fluid.layers as layers
from paddle.fluid.layers.io import ListenAndServ
from paddle.fluid.layers.io import Recv
from paddle.fluid.layers.io import Send
import paddle.fluid.layers.ops as ops
from paddle.fluid.transpiler.details import program_to_code
......@@ -52,7 +53,7 @@ class TestProgram2Code(unittest.TestCase):
name="X",
append_batch_size=False)
fluid.initializer.Constant(value=1.0)(x, main.global_block())
layers.scale(x=x, scale=10.0, out=out_var)
ops._scale(x=x, scale=10.0, out=out_var)
program_to_code(main)
......
......@@ -12,1247 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
import os
import errno
import shutil
import six
import time
from . import core
from . import data_feeder
from . import executor
from . import framework
from . import io
# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module
from . import optimizer as opt_module
from . import parallel_executor
from .transpiler import distribute_transpiler
__all__ = [
'Trainer', 'BeginEpochEvent', 'EndEpochEvent', 'BeginStepEvent',
'EndStepEvent', 'CheckpointConfig'
]
class BeginEpochEvent(object):
"""
The begin of a training epoch.
Args:
epoch_id(int): The current epoch ID.
"""
def __init__(self, epoch_id):
self.epoch = epoch_id
class EndEpochEvent(object):
"""
The end of a training epoch.
Args:
epoch_id(int): The current epoch ID.
"""
def __init__(self, epoch_id):
self.epoch = epoch_id
class BeginStepEvent(object):
"""
The begin of a training epoch.
Args:
epoch_id(int): The current epoch ID.
step_id(int): The current step ID.
"""
def __init__(self, epoch_id, step_id):
self.epoch = epoch_id
self.step = step_id
self.fetch_metrics = True
"""
If fetch_metrics is true, the metrics will be fetched at the
EndStepEvent. Default is True.
"""
class EndStepEvent(object):
"""
The end of a training step.
Args:
epoch_id(int): The current epoch ID.
step_id(int): The current step ID.
metrics(list): A list of fetched tensor. The order of this list is same
as the :code:`train_func` returns.
"""
def __init__(self, epoch_id, step_id, metrics):
self.epoch = epoch_id
self.step = step_id
self.metrics = metrics
class CheckpointConfig(object):
"""
Parameter object for :code:`save_checkpoint` and
:code:`fluid.Trainer`. Used to configuration how to save checkpoint.
Args:
checkpoint_dir(str): Directory path to save check point. Default is the
current directory.
max_num_checkpoints(int): The max number of local check points.
epoch_interval(int): Every number of epoch to save check point.
step_interval(int): Every number of step to save check point.
Examples:
>>> config = fluid.CheckpointConfig("./checkpoints")
>>> trainer = fluid.Trainer(train_func=train_program,
>>> place=place,
>>> optimizer_func=optimizer_func,
>>> checkpoint_config=config)
>>> trainer.train(...)
"""
def __init__(self,
checkpoint_dir=None,
max_num_checkpoints=3,
epoch_interval=1,
step_interval=10):
assert epoch_interval >= 1
assert step_interval >= 1
self.checkpoint_dir = checkpoint_dir \
if checkpoint_dir is not None else os.getcwd()
self.max_num_checkpoints = max_num_checkpoints
self.epoch_interval = epoch_interval
self.step_interval = step_interval
self.epoch_id = 0
self.step_id = 0
self.load_serial = None
self.pserver_id = None
self.lookup_table_name = None
def check_and_get_place(place):
"""
Check the type of place or get the default place
Args:
place(None|core.CUDAPlace|core.CPUPlace): the place that trainer will be executed on.
Raises:
TypeError if the type mismatched.
Returns:
the original place if it is not None.
if fluid is compiled with CUDA, returns CUDAPlace(0) by default.
Otherwise returns CPUPlace by default.
"""
if place is None:
if core.is_compiled_with_cuda():
return core.CUDAPlace(0)
else:
return core.CPUPlace()
else:
if not isinstance(place, core.CUDAPlace) and not isinstance(
place, core.CPUPlace):
raise TypeError("Place should be either CUDAPlace or CPUPlace")
return place
class Trainer(object):
"""
A trainer wraps MultiGPU/MultiNode training loops and can be used to train a
simple neural network easily.
This API takes a :code:`train_func`. A :code:`train_func` is a function that
return loss as it first return value. The reset value can be fetched by
EndStepEvent.metrics
This API also takes a :code:`optimizer_func` that will return an optimizer
instance.
For example, to train a MLP for MNIST dataset, the sample program is
>>> import paddle.fluid as fluid
>>>
>>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10):
>>> hidden = image
>>> for layer_size in layer_sizes:
>>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation)
>>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax")
>>>
>>> def train_mnist_mlp():
>>> img = fluid.layers.data(name='image', shape=[784])
>>> label = fluid.layers.data(name='label', shape=[1], dtype='int64')
>>> prediction = mlp(img)
>>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label))
>>>
>>> def optimizer():
>>> return fluid.optimizer.Adam()
>>>
>>> trainer = Trainer(train_func=train_mnist_mlp,
>>> optimizer_func=optimizer,
>>> place=fluid.CUDAPlace(0),
>>> parallel=True)
>>>
>>> def train_callback(event):
>>> if isinstance(event, fluid.EndStepEvent):
>>> print "Epoch ID", event.epoch, "Step ID",\
>>> event.step, "AvgLoss", event.metrics[0]
>>> elif isinstance(event, fluid.EndEpochEvent):
>>> trainer.save_params("./model_{0}".format(event.epoch))
>>>
>>> trainer.train(num_epochs=100, event_handler=train_callback)
For more example, please see :ref:`api_guide_high_level_api`.
Args:
train_func(callable): A function which will return loss. The loss must be
a scalar tensor.
optimizer_func(callable): A function that returns an Optimizer object.
place(CUDAPlace|CPUPlace): The device place of this trainer. If
:code:`parallel=True,` all CUDA Places will be used if :code:`place`
is a :code:`CUDAPlace`.
parallel(bool): True if use multiple devices.
checkpoint_config(CheckpointConfig): Configuration about how to save
checkpoints.
"""
def __init__(self,
train_func,
optimizer_func,
param_path=None,
place=None,
parallel=False,
checkpoint_config=None):
self.__stop = False
self.parallel = parallel
# config for checkpoint
# only chief worker will save variables
self.trainer_id = 0
self.checkpoint_cfg = checkpoint_config
if self.checkpoint_cfg:
assert isinstance(self.checkpoint_cfg, CheckpointConfig)
serial = _get_latest_checkpoint_serial(
self.checkpoint_cfg.checkpoint_dir)
self.checkpoint_cfg.load_serial = serial if serial >= 0 else None
self.scope = core.Scope()
# 1. we need to generate a framework.Program by calling
# program_func. Reference: fluid.program_guard in
# test_word2vec.py
self.startup_program = framework.Program()
self.train_program = framework.Program()
with framework.program_guard(self.train_program, self.startup_program):
program_func_outs = train_func()
self.train_func_outputs = program_func_outs if isinstance(
program_func_outs, list) else [program_func_outs]
self.test_program = self.train_program.clone(for_test=True)
# The first element of program_func_outs is loss.
loss = self.train_func_outputs[0]
optimizer = optimizer_func()
if not isinstance(optimizer, opt_module.Optimizer):
raise TypeError(
"The optimizer should be an instance of Optimizer")
optimize_ops, params_grads = optimizer.minimize(loss)
self.place = check_and_get_place(place)
self._dist_transpile_if_necessary(optimize_ops, params_grads)
# 2. move the default_main_program to self.program and run the
# default_startup program on an empty core.Scope()
# Run startup program
with self._prog_and_scope_guard():
exe = executor.Executor(place)
exe.run(self.startup_program)
if self.checkpoint_cfg and self.checkpoint_cfg.load_serial is not None:
self._load_checkpoint()
if param_path and os.path.isdir(param_path):
with self._prog_and_scope_guard():
# load params from param_path into scope
io.load_persistables(
executor=exe,
dirname=param_path,
main_program=self.startup_program)
def _transpile_nccl2_dist(self):
# PADDLE_TRAINER_IPS
if "PADDLE_TRAINER_IPS" not in os.environ:
self.nccl_id_var = None
else:
self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID"))
port = os.getenv("PADDLE_PSERVER_PORT")
worker_ips = os.getenv("PADDLE_TRAINER_IPS")
worker_endpoints = []
for ip in worker_ips.split(","):
worker_endpoints.append(':'.join([ip, port]))
self.num_trainers = len(worker_endpoints)
current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port
worker_endpoints.remove(current_endpoint)
# TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id
# in ParallelExecutor to start
# distributed training using NCCL2
self.nccl_id_var = self.startup_program.global_block().create_var(
name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW)
self.startup_program.global_block().append_op(
type="gen_nccl_id",
inputs={},
outputs={"NCCLID": self.nccl_id_var},
attrs={
"endpoint": current_endpoint,
"endpoint_list": worker_endpoints,
"trainer_id": self.trainer_id
})
def _dist_transpile_if_necessary(self, optimize_ops, params_grads):
self._transpile_nccl2_dist()
if self.nccl_id_var != None:
return
if "PADDLE_TRAINING_ROLE" not in os.environ:
return
# the port of all pservers, needed by both trainer and pserver
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
# comma separated ips of all pservers, needed by trainer and
# pserver
pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "")
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist)
# total number of workers/trainers in the job, needed by
# trainer and pserver
trainers = int(os.getenv("PADDLE_TRAINERS"))
# the IP of the local machine, needed by pserver only
current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port
# the unique trainer id, starting from 0, needed by trainer
# only
self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
# the role, should be either PSERVER or TRAINER
training_role = os.getenv("PADDLE_TRAINING_ROLE")
with self._prog_and_scope_guard():
t = distribute_transpiler.DistributeTranspiler()
t.transpile(
self.trainer_id, pservers=pserver_endpoints, trainers=trainers)
if training_role == "PSERVER":
if self.checkpoint_cfg:
pserver_id = eplist.index(current_endpoint)
self.checkpoint_cfg.pserver_id = pserver_id
if t.has_distributed_lookup_table:
self.checkpoint_cfg.lookup_table_name = t.table_name
self.train_program = t.get_pserver_program(current_endpoint)
self.startup_program = t.get_startup_program(current_endpoint,
self.train_program)
elif training_role == "TRAINER":
self.train_program = t.get_trainer_program()
else:
raise ValueError(
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
)
def stop(self):
"""
stop training
"""
self.__stop = True
def train(self, num_epochs, event_handler, reader=None, feed_order=None):
"""
Start the train loop to train the model.
Args:
num_epochs(int): The number of epoch. An epoch will process all data in reader
event_handler(callable): The event handler. A function with type (ev:Event)->void
reader(callable): A reader creator object. See also
:ref:`api_guide_python_reader` .
feed_order(list): Feeding order of reader. None will following the defining
order in program
Returns:
None
"""
training_role = os.getenv("PADDLE_TRAINING_ROLE", "")
if training_role == "PSERVER":
with self._prog_and_scope_guard():
exe = executor.Executor(self.place)
exe.run()
return
if self.parallel:
self._train_by_parallel_executor(num_epochs, event_handler, reader,
feed_order)
else:
self._train_by_executor(num_epochs, event_handler, reader,
feed_order)
def test(self, reader, feed_order):
"""
Test the model on given test data
Args:
reader(callable): The reader that yields test data.
feed_order(list): Feeding order of reader. None will following the
defining order in program
"""
return self._test_by_executor(reader, feed_order,
self.train_func_outputs)
def save_params(self, param_path):
"""
Save all parameters into :code:`param_path`.
Args:
param_path(str): The path to save parameters.
Returns:
None
"""
with self._prog_and_scope_guard():
exe = executor.Executor(self.place)
io.save_persistables(exe, dirname=param_path)
def save_inference_model(self, param_path, feeded_var_names,
target_var_indexes):
"""
Save model for cpp inference into :code:`param_path`.
Args:
param_path(str): The path to save parameters.
feeded_var_names(list(str)): The name of the vars that you
need to feed in before run program.
target_var_indexes(list(int)): the index of target var that
you need to return in trainer.train_func.
Returns:
None
"""
with self._prog_and_scope_guard():
exe = executor.Executor(self.place)
target_vars = [
self.train_func_outputs[index] for index in target_var_indexes
]
io.save_inference_model(param_path, feeded_var_names, target_vars,
exe)
@contextlib.contextmanager
def _prog_and_scope_guard(self):
with framework.program_guard(
main_program=self.train_program,
startup_program=self.startup_program):
with executor.scope_guard(self.scope):
yield
def _train_by_executor(self, num_epochs, event_handler, reader, feed_order):
"""
Train by Executor and single device.
Args:
num_epochs:
event_handler:
reader:
feed_order:
Returns:
"""
with self._prog_and_scope_guard():
feed_var_list = build_feed_var_list(self.train_program, feed_order)
feeder = data_feeder.DataFeeder(
feed_list=feed_var_list, place=self.place)
exe = executor.Executor(self.place)
reader = feeder.decorate_reader(reader, multi_devices=False)
self._train_by_any_executor(event_handler, exe, num_epochs, reader)
def _train_by_any_executor(self, event_handler, exe, num_epochs, reader):
if self.checkpoint_cfg:
epochs = [
epoch_id for epoch_id in range(num_epochs)
if epoch_id >= self.checkpoint_cfg.epoch_id
]
else:
epochs = [epoch_id for epoch_id in range(num_epochs)]
for epoch_id in epochs:
event_handler(BeginEpochEvent(epoch_id))
for step_id, data in enumerate(reader()):
if self.__stop:
if self.checkpoint_cfg:
self._clean_checkpoint()
return
if self.checkpoint_cfg and self.checkpoint_cfg.load_serial \
and self.checkpoint_cfg.step_id >= step_id and self.checkpoint_cfg.epoch_id == epoch_id:
continue
begin_event = BeginStepEvent(epoch_id, step_id)
event_handler(begin_event)
if begin_event.fetch_metrics:
metrics = exe.run(feed=data,
fetch_list=[
var.name
for var in self.train_func_outputs
])
else:
metrics = exe.run(feed=data, fetch_list=[])
if self.checkpoint_cfg:
self._save_checkpoint(epoch_id, step_id)
event_handler(EndStepEvent(epoch_id, step_id, metrics))
event_handler(EndEpochEvent(epoch_id))
if self.checkpoint_cfg:
self._clean_checkpoint()
def _test_by_executor(self, reader, feed_order, fetch_list):
with executor.scope_guard(self.scope):
feed_var_list = build_feed_var_list(self.test_program, feed_order)
feeder = data_feeder.DataFeeder(
feed_list=feed_var_list, place=self.place)
exe = executor.Executor(self.place)
accumulated = len(fetch_list) * [0]
count = 0
for data in reader():
outs = exe.run(program=self.test_program,
feed=feeder.feed(data),
fetch_list=fetch_list)
accumulated = [x[0] + x[1][0] for x in zip(accumulated, outs)]
count += 1
return [x / count for x in accumulated]
def _train_by_parallel_executor(self, num_epochs, event_handler, reader,
feed_order):
with self._prog_and_scope_guard():
pe = self._get_or_create_parallel_executor()
feed_var_list = build_feed_var_list(self.train_program, feed_order)
feeder = data_feeder.DataFeeder(
feed_list=feed_var_list, place=self.place)
reader = feeder.decorate_reader(reader, multi_devices=True)
self._train_by_any_executor(event_handler, pe, num_epochs, reader)
def _get_parallel_executor(self):
return getattr(self, 'parallel_executor', None)
def _get_or_create_parallel_executor(self):
if self._get_parallel_executor() is None:
self.parallel_executor = parallel_executor.ParallelExecutor(
use_cuda=isinstance(self.place, core.CUDAPlace),
loss_name=self.train_func_outputs[0].name)
return self._get_parallel_executor()
def _clean_checkpoint(self):
assert self.checkpoint_cfg
clean_checkpoint(checkpoint_dir=self.checkpoint_cfg.checkpoint_dir)
def _get_checkpoint_load_args(self):
"""
epoch_id and step_id are runtime arguments, they are not variables, will load them independently.
"""
return ["epoch_id", "step_id"]
def _get_checkpoint_save_args(self, epoch_id, step_id):
"""
epoch_id and step_id are runtime arguments, they are not variables, will save them independently.
"""
trainer_args = {}
trainer_args["epoch_id"] = epoch_id
trainer_args["step_id"] = step_id
return trainer_args
def _save_checkpoint(self, epoch_id, step_id):
assert self.checkpoint_cfg
if epoch_id % self.checkpoint_cfg.epoch_interval == 0 \
and step_id % self.checkpoint_cfg.step_interval == 0:
exe = executor.Executor(self.place)
save_checkpoint(
executor=exe,
checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
trainer_id=self.trainer_id,
trainer_args=self._get_checkpoint_save_args(epoch_id, step_id),
main_program=self.train_program,
max_num_checkpoints=self.checkpoint_cfg.max_num_checkpoints)
def _load_checkpoint(self):
with self._prog_and_scope_guard():
exe = executor.Executor(self.place)
load_checkpoint(
executor=exe,
checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
main_program=self.startup_program)
if not self.checkpoint_cfg.pserver_id:
load_trainer_args = self._get_checkpoint_load_args()
trainer_args = load_checkpoint(
executor=exe,
checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
main_program=self.startup_program,
role_id=self.trainer_id,
is_trainer=True,
load_trainer_args=load_trainer_args)
if len(trainer_args) != 2:
raise ValueError(
"the return trainer_args length do not equal _get_checkpoint_load_args"
)
self.checkpoint_cfg.epoch_id = int(trainer_args[0])
self.checkpoint_cfg.step_id = int(trainer_args[1])
else:
if self.checkpoint_cfg.lookup_table_name:
load_checkpoint(
executor=exe,
checkpoint_dir=self.checkpoint_cfg.checkpoint_dir,
main_program=self.startup_program,
role_id=self.checkpoint_cfg.pserver_id,
is_trainer=False,
load_trainer_args=None,
load_lookup_table=self.checkpoint_cfg.lookup_table_name)
def build_feed_var_list(program, feed_order):
if not isinstance(program, framework.Program):
raise TypeError("The 'program' should be an object of Program")
if isinstance(feed_order, list):
feed_var_list = [
program.global_block().var(var_name) for var_name in feed_order
]
else:
if not isinstance(feed_order, dict):
raise TypeError(
"The 'feed_order' should be either None, list or dict.")
if not sorted(feed_order.values()) == list(range(len(feed_order))):
raise ValueError(
"The values of 'feed_order' should be a permutation of [0, len(feed_order))"
)
sorted_pair_list = sorted(
six.iteritems(feed_order), key=lambda item: item[1])
feed_var_list = [
program.global_block().var(pair[0]) for pair in sorted_pair_list
]
return feed_var_list
# move Checkpoint APIs from io.py to trainer.py, make all of them are private.
SUCCESS_MARK_FILENAME = "_SUCCESS"
CHECKPOINT_PREFIX = "checkpoint"
MODEL_DIR = "__model__"
LOOKUP_TABLE_DIR = "__lookup_table__"
TRAINER_PREFIX = "trainer"
CHECKPOINT_SEPARATOR = "_"
def save_checkpoint(executor,
checkpoint_dir,
trainer_id,
main_program,
trainer_args=None,
max_num_checkpoints=3,
lookup_table=None,
pserver_endpoints=None):
"""
This function filters out all checkpoint variables from the give
main_program and then saves these variables to the `checkpoint_dir`
directory.
In the training precess, we generally save a checkpoint in each
iteration. So there might be a lot of checkpoints in the
`checkpoint_dir`. To avoid them taking too much disk space, the
`max_num_checkpoints` are introduced to limit the total number of
checkpoints. If the number of existing checkpints is greater than
the `max_num_checkpoints`, oldest ones will be scroll deleted.
A variable is a checkpoint variable and will be saved if it meets
all following conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
Args:
executor(Executor): The executor to run for save checkpoint.
checkpoint_dir(str): The folder where to save checkpoints.
trainer_id(int): currect trainer id, if id is equal to 0, the trainer
is chief.
trainer_args(dict|None): Current training arguments. Such as 'epoch_id'
and 'step_id'.
Defaut: None
main_program(Program): The program whose checkpoint variables will
be saved.
max_num_checkpoints(int): The max number of total number of existing
checkpoints.
Default: 3
lookup_table(string|None): the lookup table name, when use distribute
lookup table, we can get lookup table name by DistributeTranspiler.
table_name
pserver_endpoints(list|None): the parameter server ip:port list.
when use distribute lookup table, we can get pserver_endpoints by
distribute arguments.
Returns:
None
Raises:
ValueError: If `checkpoint_dir` is None.
AssertionError: If `trainer_args` is not a dict.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
path = "./checkpoints"
prog = fluid.default_main_program()
trainer_args = {"epoch_id": 200,
"step_id": 20} # just an example
table_name = "share_w"
ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"]
save_checkpoint(executor=exe,
checkpoint_dir=path,
trainer_id=0,
trainer_args=trainer_args,
main_program=prog,
max_num_checkpoints=3,
lookup_table=table_name,
pserver_endpoints = ps_endpoints)
"""
if checkpoint_dir is None:
raise ValueError("'checkpoint_dir' should not be None")
if main_program is None:
raise ValueError('main_program should not be None.')
if trainer_args:
assert isinstance(trainer_args, dict)
is_chief = trainer_id == 0
_make_chekcpoint_dirs(checkpoint_dir)
serial = _get_latest_checkpoint_serial(checkpoint_dir) + 1
cur_dir = _get_serial_dir(checkpoint_dir, serial)
_save_trainer_args(cur_dir, trainer_id, trainer_args)
if is_chief:
_save_persist_vars_without_grad(executor, cur_dir, main_program)
if is_chief and lookup_table and pserver_endpoints:
_save_pserver_vars_by_notify(executor, cur_dir, lookup_table,
pserver_endpoints)
_scroll_delete(checkpoint_dir, max_num_checkpoints)
def load_checkpoint(executor,
checkpoint_dir,
main_program,
role_id=0,
is_trainer=True,
load_trainer_args=None,
load_lookup_table=None):
"""
This function filters out all checkpoint variables from the give
main_program and then try to load these variables from the
`checkpoint_dir` directory.
In the training precess, we generally save a checkpoint in each
iteration. So there are more than one checkpoint in the
`checkpoint_dir` (each checkpoint has its own sub folder), use
`serial` to specify which serial of checkpoint you would like to
load.
A variable is a checkpoint variable and will be loaded if it meets
all following conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
Args:
executor(Executor): The executor to run for loading checkpoint.
checkpoint_dir(str): The folder where all checkpoints are.
serial(int): The serial of checkpoint you would like to load.
main_program(Program): The program whose checkpoint variables will
be loaded.
role_id(int): the trainer id or the parameter server id.
is_trainer(bool): trainer is True and parameter server is False.
load_trainer_args(list|None): list about load trainer args.
load_lookup_table(str|None): the lookup table name
Returns:
None
Raises:
ValueError: If `checkpoint_dir` is None.
ValueError: If `main_program` is None.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
path = "./checkpoints"
prog = fluid.default_main_program()
load_checkpoint(executor=exe, checkpoint_dir=path,
serial=9, main_program=prog)
# In this example, `load_checkpoint` function
# will first filters out all checkpoint variables in the default
# main program, and then try to load these variables form the
# folder "./checkpoints/checkpoint_9/__model__".
"""
if checkpoint_dir is None:
raise ValueError("'checkpoint_dir' should not be None")
serial = _get_latest_checkpoint_serial(checkpoint_dir)
# there are nothing need to be loaded
if serial is None or serial < 0:
return
if main_program is None:
raise ValueError('main_program should not be None.')
if is_trainer and load_trainer_args is None:
cur_dir = _get_serial_dir(checkpoint_dir, serial)
_load_persist_vars_without_grad(executor, cur_dir, main_program, True)
return
if is_trainer and load_trainer_args:
return _load_trainer_args(checkpoint_dir, serial, role_id,
load_trainer_args)
if not is_trainer and load_lookup_table:
_load_lookup_table_vars(executor, checkpoint_dir, main_program, role_id,
load_lookup_table)
def clean_checkpoint(checkpoint_dir, delete_dir=False):
"""
clean the checkpoint dir, when the train exits normally,
the trainer will call clean_checkpoint to delete checkpoint directory saved before.
delete_dir only works when the directory is empty, otherwise, OSError is raised.
: param checkpoint_dir
: param delete_dir
"""
if checkpoint_dir is None:
raise ValueError("'checkpoint_dir' should not be None")
_scroll_delete(checkpoint_dir, max_num_checkpoints=0)
if delete_dir and not os.listdir(checkpoint_dir):
os.rmdir(checkpoint_dir)
def _load_persist_vars_without_grad(executor,
dirname,
program,
has_model_dir=False):
"""
This function filters out all checkpoint variables from the give
program and then trys to load these variables from the given directory.
A variable is a checkpoint variable if it meets all following
conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
Args:
executor(Executor): The executor to run for loading variables.
dirname(str): The directory path.
program(Program): The program whose checkpoint variables will
be loaded.
has_model_dir(bool): if True, the function loads variables
from a sub directory named '__model__'.
Default: False
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
_load_persist_vars_without_grad(executor=exe,
dirname=param_path, program=prog, has_model_dir=True)
# In this example, `_load_persist_vars_without_grad` function
# will first filters out all checkpoint variables in the default
# main program, and then trys to load these variables form the
# folder "./my_paddle_model/__model__".
"""
if has_model_dir:
dirname = _get_model_dir(dirname)
io.load_vars(
executor,
dirname=dirname,
main_program=program,
predicate=_is_checkpoint_var,
filename=None)
def _load_lookup_table_vars(executor, dirname, program, pserver_id, table_name):
"""
The parameter server will load lookup table's local file in
selectedrows variable.
Args:
executor(Executor): The executor to run for loading persistable variables
dirname(str): The directory path
main_program(Program): Find the variable named table_name in main_program
pserver_id(int): the serial number in pserver_endpoints list
table_name(str): lookup table name
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
dirname = "./checkpoints/checkpoint_9/"
prog = fluid.default_main_program()
pserver_id = 1
table_name = "share_w"
_load_lookup_table_vars(executor=exe,
dirname=dirname, program=prog, pserver_id=pserver_id,
table_name=table_name)
"""
for var in program.list_vars():
if var.name == table_name:
lookup_table_var = var
break
assert lookup_table_var is not None
lookup_table_dir = os.path.join(dirname, LOOKUP_TABLE_DIR)
table_file = table_name + CHECKPOINT_SEPARATOR + str(pserver_id)
load_prog = framework.Program()
load_block = load_prog.global_block()
load_block.append_op(
type='load',
inputs={},
outputs={'Out': [lookup_table_var]},
attrs={'file_path': os.path.join(lookup_table_dir, table_file)})
executor.run(load_prog)
def _save_persist_vars_without_grad(executor, dirname, program):
"""
This function filters out all checkpoint variables from the give
program and then save these variables to a sub-folder '__model__' of
the given directory.
A variable is a checkpoint variable if it meets all following
conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
Args:
executor(Executor): The executor to run for saving variables.
dirname(str): The directory path.
program(Program): The program whose checkpoint variables will
be saved.
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
_save_persist_vars_without_grad(executor=exe,
dirname=param_path, program=prog)
# In this example, `_save_persist_vars_without_grad` function
# will first filters out all checkpoint variables in the default
# main program, and then saves these variables to the folder
# "./my_paddle_model/__model__".
"""
cur_dir = _get_model_dir(dirname)
io.save_vars(
executor,
dirname=cur_dir,
main_program=program,
vars=None,
predicate=_is_checkpoint_var,
filename=None)
_write_success(cur_dir)
def _save_pserver_vars_by_notify(executor, dirname, lookup_table,
ps_endpoint_list):
"""
This function will send checkpoint notify message from Trainer 0
to all the pservers.
The checkpoint notify message contains lookup table name,
the absolute path on pserver to save lookup_table.
Args:
executor(Executor): The executor to run for send checkpoint notify.
dirname(str): The folder where to save checkpoints.
lookup_table(string): the lookup table name, when use distribute
lookup table, we can get lookup table name by DistributeTranspiler.
table_name
ps_endpoint_list(list): the parameter server ip:port list.
when use distribute lookup table, we can get ps_endpoint_list by
distribute arguments.
Return:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
table_name = "share_w"
ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"]
_save_pserver_vars_by_notify(executor=exe,
dirname=param_path, lookup_table=table_name,
ps_endpoint_list=ps_endpoints)
"""
cur_dir = _get_lookuptable_dir(dirname)
checkpoint_notify_program = framework.Program()
checkpoint_notify_block = checkpoint_notify_program.global_block()
attrs = {}
attrs['epmap'] = ps_endpoint_list
attrs['dir'] = cur_dir
attrs['lookup_table'] = lookup_table
checkpoint_notify_block.append_op(
type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs)
executor.run(checkpoint_notify_program)
def _save_trainer_args(dirname, trainer_id, trainer_args):
assert isinstance(trainer_args, dict)
cur_dir = _get_trainer_dir(dirname, trainer_id)
for name, value in six.iteritems(trainer_args):
args_file = os.path.join(cur_dir, name)
with open(args_file, 'w') as f:
f.write(str(value))
_write_success(cur_dir)
def _load_trainer_args(checkpoint_dir, serial, trainer_id, trainer_args):
"""
trainer will load some args from it's independent directory,
such as epoch_id and step_id.
Args:
checkpoint_dir(str): The folder where all checkpoints are.
serial(int): The serial of checkpoint you would like to load.
trainer_id(int): current trainer id.
trainer_args(list): list about load trainer args
Return:
None
Examples:
.. code-block:: python
param_path = "./checkpoint/"
serial = 7
trainer_id = 2
trainer_args = ["epoch_id", "step_id"]
_load_trainer_args(checkpoint_dir=param_path, serial=serial,
trainer_id=trainer_id, trainer_args=trainer_args)
"""
assert isinstance(trainer_args, list)
cur_dir = _get_serial_dir(checkpoint_dir, serial)
cur_dir = _get_trainer_dir(cur_dir, trainer_id)
ret_values = []
for arg in trainer_args:
cur_file = os.path.join(cur_dir, arg)
with open(cur_file, 'r') as f:
contents = f.read()
ret_values.append(contents.strip())
return ret_values
def _is_checkpoint_var(var):
"""
the checkpoint will not save or load all the variables.
var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded.
: param var(Variable)
"""
if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \
var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \
var.desc.type() == core.VarDesc.VarType.RAW:
return False
# @GRAD are named for gradient variables, checkpoint will not save it.
if "@GRAD" in var.name:
return False
# .trainer_ are named for distribute train variables, checkpoint will not save it.
if ".trainer_" in var.name:
return False
# .block is named for distribute train variables, checkpoint will not save it.
if ".block" in var.name:
return False
return var.persistable
def _make_chekcpoint_dirs(dirs):
"""
_make_chekcpoint_dirs will makdir local directory directly, when the directory is exist, it will igore it.
"""
assert dirs is not None
if os.path.isfile(dirs):
raise OSError(errno.ENOTDIR, "dirs path shoule be a Directory.", dirs)
if not os.path.isdir(dirs):
try:
os.makedirs(dirs)
except OSError as err:
if err.errno != errno.EEXIST:
raise err
def _get_dir_serial(dirname):
_, serial = dirname.split(CHECKPOINT_SEPARATOR)
try:
serial_num = int(serial)
except ValueError:
serial_num = -1
return serial_num
def _get_serial_dir(dirname, serial):
serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial)
serial_dir = os.path.join(dirname, serial_folder)
_make_chekcpoint_dirs(serial_dir)
return serial_dir
def _get_model_dir(dirname):
model_dir = os.path.join(dirname, MODEL_DIR)
_make_chekcpoint_dirs(model_dir)
return model_dir
def _get_lookuptable_dir(dirname):
lookuptable_dir = os.path.join(dirname, LOOKUP_TABLE_DIR)
_make_chekcpoint_dirs(lookuptable_dir)
return lookuptable_dir
def _get_trainer_dir(dirname, trainer_id):
trainer_folder = TRAINER_PREFIX + CHECKPOINT_SEPARATOR + str(trainer_id)
trainer_dir = os.path.join(dirname, trainer_folder)
_make_chekcpoint_dirs(trainer_dir)
return trainer_dir
def _scroll_delete(dirname, max_num_checkpoints=3):
dirs = os.listdir(dirname)
serial_map = {}
for serial in dirs:
serial_num = _get_dir_serial(serial)
serial_map[serial_num] = serial
if len(list(serial_map.keys())) <= max_num_checkpoints:
return
serials = list(serial_map.keys())
serials.sort(reverse=True)
serials = serials[max_num_checkpoints:]
for serial in serials:
cur_dir = _get_serial_dir(dirname, serial)
try:
shutil.rmtree(cur_dir)
except OSError as err:
if err.errno != errno.ENOENT:
raise err
def _write_success(dirname):
"""
write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct.
: param dirname
"""
success_file = os.path.join(dirname, SUCCESS_MARK_FILENAME)
with open(success_file, 'a') as f:
now = time.ctime()
f.write(now)
def _get_latest_checkpoint_serial(checkpoint_dir):
"""
get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory
: param checkpoint_dir
"""
if not checkpoint_dir:
return -1
def has_success(checkpoint_dir, cur_dir):
"""
is _SUCCESS in this dir
"""
serial = _get_dir_serial(cur_dir)
if serial == -1 or not os.path.isdir(
os.path.join(checkpoint_dir, cur_dir)):
return -1
success_path = os.path.join(
_get_serial_dir(checkpoint_dir, serial), MODEL_DIR,
SUCCESS_MARK_FILENAME)
if os.path.isfile(success_path):
return serial
if not os.path.isdir(checkpoint_dir):
return -1
current_dir = -1
dirs = os.listdir(checkpoint_dir)
for cur_dir in dirs:
success_num = has_success(checkpoint_dir, cur_dir)
if success_num > current_dir:
current_dir = success_num
return current_dir
# NOTE: Trainer is moved into fluid.contrib.trainer.
__all__ = []
......@@ -136,6 +136,8 @@ class DistributeTranspilerConfig(object):
slice_var_up = True
split_method = None
min_block_size = 8192
# supported modes: pserver, nccl2
mode = "pserver"
print_log = False
......@@ -144,27 +146,30 @@ class DistributeTranspiler(object):
**DistributeTranspiler**
Convert the fluid program to distributed data-parallelism programs.
Supports two modes: pserver mode and nccl2 mode.
The main_program will be transformed to use a remote parameter server
to do parameter optimization. And the optimization graph will be put
into a parameter server program.
In pserver mode, the main_program will be transformed to use a remote
parameter server to do parameter optimization. And the optimization
graph will be put into a parameter server program.
In nccl2 mode, the transpiler will append a NCCL_ID broadcasting
op in startup_program to share the NCCL_ID across the job nodes.
After transpile_nccl2 called, you ***must*** pass trainer_id and
num_trainers argument to ParallelExecutor to enable NCCL2 distributed
mode.
Examples:
.. code-block:: python
# Define your model before these codes.
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "")
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist)
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
# for pserver mode
pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
current_endpoint = "192.168.0.1:6174"
trainer_id = 0
trainers = 4
role = os.getenv("PADDLE_TRAINING_ROLE")
t = distribute_transpiler.DistributeTranspiler()
t = fluid.DistributeTranspiler()
t.transpile(
trainer_id, pservers=pserver_endpoints, trainers=trainers)
if role == "PSERVER":
......@@ -173,6 +178,18 @@ class DistributeTranspiler(object):
pserver_program)
elif role == "TRAINER":
trainer_program = t.get_trainer_program()
# for nccl2 mode
config = fluid.DistributeTranspilerConfig()
config.mode = "nccl2"
t = fluid.DistributeTranspiler(config=config)
t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep)
exe = fluid.ParallelExecutor(
use_cuda,
loss_name=loss_var.name,
num_trainers=len(trainers.split(",)),
trainer_id=trainer_id
)
"""
def __init__(self, config=None):
......@@ -190,13 +207,41 @@ class DistributeTranspiler(object):
assert (self.config.min_block_size >= 8192)
assert (self.config.split_method.__bases__[0] == PSDispatcher)
def _transpile_nccl2(self,
trainer_id,
trainers,
current_endpoint,
startup_program=None):
if not startup_program:
startup_program = default_startup_program()
if trainer_id >= 0:
worker_endpoints = trainers.split(",")
# send NCCL_ID to others or recv from trainer 0
worker_endpoints.remove(current_endpoint)
nccl_id_var = startup_program.global_block().create_var(
name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW)
startup_program.global_block().append_op(
type="gen_nccl_id",
inputs={},
outputs={"NCCLID": nccl_id_var},
attrs={
"endpoint": current_endpoint,
"endpoint_list": worker_endpoints,
"trainer_id": trainer_id
})
return nccl_id_var
else:
raise ValueError("must set trainer_id > 0")
def transpile(self,
trainer_id,
program=None,
pservers="127.0.0.1:6174",
trainers=1,
sync_mode=True,
startup_program=None):
startup_program=None,
current_endpoint="127.0.0.1:6174"):
"""
Run the transpiler.
......@@ -207,10 +252,15 @@ class DistributeTranspiler(object):
default is fluid.default_main_program().
pservers (str): comma separated ip:port string for the pserver
list.
trainers (int): number of trainers in the distributed job.
trainers (int|str): in pserver mode this is the number of
trainers, in nccl2 mode this is a string of trainer
endpoints.
sync_mode (bool): Do sync training or not, default is True.
startup_program (Program|None): startup_program to transpile,
default is fluid.default_main_program().
current_endpoint (str): need pass current endpoint when
transpile as nccl2 distributed mode. In pserver mode
this argument is not used.
"""
if program is None:
program = default_main_program()
......@@ -220,6 +270,15 @@ class DistributeTranspiler(object):
self.startup_program = startup_program
self.origin_startup_program = self.startup_program.clone()
if self.config.mode == "nccl2":
assert (isinstance(trainers, str))
self._transpile_nccl2(
trainer_id,
trainers,
current_endpoint,
startup_program=startup_program)
return
self.trainer_num = trainers
self.sync_mode = sync_mode
self.trainer_id = trainer_id
......@@ -1082,7 +1141,7 @@ to transpile() call.")
if self.sync_mode else []
},
attrs={
"sync_mode": False,
"sync_mode": self.sync_mode,
"epmap": pserver_endpoints,
RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE,
OP_ROLE_VAR_ATTR_NAME: [
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册