diff --git a/cmake/generic.cmake b/cmake/generic.cmake index a67512578147fc7223714dbc4cd124b831fb4775..5bf82b4ddf10a646ca540ac4ee2cfd3d3bc6cf58 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -110,6 +110,20 @@ function(find_fluid_modules TARGET_NAME) endif() endfunction(find_fluid_modules) +# find all third_party modules is used for paddle static library +# for reduce the dependency when building the inference libs. +set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY) +function(find_fluid_thirdparties TARGET_NAME) + get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) + string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) + string(FIND "${__target_path}" "third_party" pos) + if(pos GREATER 1) + get_property(fluid_ GLOBAL PROPERTY FLUID_THIRD_PARTY) + set(fluid_third_partys ${fluid_third_partys} ${TARGET_NAME}) + set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY "${fluid_third_partys}") + endif() +endfunction(find_fluid_thirdparties) + function(merge_static_libs TARGET_NAME) set(libs ${ARGN}) list(REMOVE_DUPLICATES libs) @@ -204,18 +218,13 @@ function(merge_static_libs TARGET_NAME) foreach(lib ${libs}) # Get the file names of the libraries to be merged - #if(NOT $ MATCHES "lib.*\\.lib") - # message("library" ${lib}) - # set(libfiles ${libfiles} lib$) - #else() set(libfiles ${libfiles} $) - #endif() endforeach() - - # windows cmd return error in clean env. - # COMMAND del "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib" + # msvc will put libarary in directory of "/Release/xxxlib" by default + # COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib" add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.lib ${libfiles} + COMMAND cmake -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}" + COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/lib${TARGET_NAME}.lib ${libfiles} ) endif(WIN32) endfunction(merge_static_libs) diff --git a/doc/README.md b/doc/README.md index 77aa2a5322057d582435e83f38476833d1f73c48..998a39f10699af6d1a391f177a5cf03c9ae170fd 100644 --- a/doc/README.md +++ b/doc/README.md @@ -2,6 +2,6 @@ Thanks for reading PaddlePaddle documentation. -Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [Fluiddoc Repo](https://github.com/PaddlePaddle/Paddle) and updated in Fluiddoc Repo. +Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [FluidDoc Repo](https://github.com/PaddlePaddle/FluidDoc) and updated there. -Please turn to Fluiddoc Repo for the latest documentation. +Please turn to FluidDoc Repo for the latest documentation. diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 510db68950fc408633c8afbd029836731c93c5f4..48764fb968795cecae58dfca6d8e709eb8d7c4be 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -22,9 +22,6 @@ paddle.fluid.Operator.rename_input ArgSpec(args=['self', 'old_name', 'new_name'] paddle.fluid.Operator.rename_output ArgSpec(args=['self', 'old_name', 'new_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.Operator.set_attr ArgSpec(args=['self', 'name', 'val'], varargs=None, keywords=None, defaults=None) paddle.fluid.Operator.to_string ArgSpec(args=['self', 'throw_on_error'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Parameter.__init__ ArgSpec(args=['self', 'block', 'shape', 'dtype'], varargs=None, keywords='kwargs', defaults=None) -paddle.fluid.Parameter.astype ArgSpec(args=['self', 'dtype'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Parameter.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.program_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) @@ -35,25 +32,12 @@ paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None, paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)) paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.scope_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) -paddle.fluid.Trainer.__init__ ArgSpec(args=['self', 'train_func', 'optimizer_func', 'param_path', 'place', 'parallel', 'checkpoint_config'], varargs=None, keywords=None, defaults=(None, None, False, None)) -paddle.fluid.Trainer.save_inference_model ArgSpec(args=['self', 'param_path', 'feeded_var_names', 'target_var_indexes'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Trainer.save_params ArgSpec(args=['self', 'param_path'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Trainer.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Trainer.test ArgSpec(args=['self', 'reader', 'feed_order'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Trainer.train ArgSpec(args=['self', 'num_epochs', 'event_handler', 'reader', 'feed_order'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.BeginEpochEvent.__init__ ArgSpec(args=['self', 'epoch_id'], varargs=None, keywords=None, defaults=None) -paddle.fluid.EndEpochEvent.__init__ ArgSpec(args=['self', 'epoch_id'], varargs=None, keywords=None, defaults=None) -paddle.fluid.BeginStepEvent.__init__ ArgSpec(args=['self', 'epoch_id', 'step_id'], varargs=None, keywords=None, defaults=None) -paddle.fluid.EndStepEvent.__init__ ArgSpec(args=['self', 'epoch_id', 'step_id', 'metrics'], varargs=None, keywords=None, defaults=None) -paddle.fluid.CheckpointConfig.__init__ ArgSpec(args=['self', 'checkpoint_dir', 'max_num_checkpoints', 'epoch_interval', 'step_interval'], varargs=None, keywords=None, defaults=(None, 3, 1, 10)) -paddle.fluid.Inferencer.__init__ ArgSpec(args=['self', 'infer_func', 'param_path', 'place', 'parallel'], varargs=None, keywords=None, defaults=(None, False)) -paddle.fluid.Inferencer.infer ArgSpec(args=['self', 'inputs', 'return_numpy'], varargs=None, keywords=None, defaults=(True,)) paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)) -paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None)) +paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')) paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0)) paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DistributeTranspilerConfig.__init__ @@ -160,7 +144,16 @@ paddle.fluid.layers.relu ArgSpec(args=['x', 'name'], varargs=None, keywords=None paddle.fluid.layers.log ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.crop ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.layers.rank_loss ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.elu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) +paddle.fluid.layers.relu6 ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None)) +paddle.fluid.layers.pow ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) +paddle.fluid.layers.stanh ArgSpec(args=['x', 'scale_a', 'scale_b', 'name'], varargs=None, keywords=None, defaults=(0.6666666666666666, 1.7159, None)) +paddle.fluid.layers.hard_sigmoid ArgSpec(args=['x', 'slope', 'offset', 'name'], varargs=None, keywords=None, defaults=(0.2, 0.5, None)) +paddle.fluid.layers.swish ArgSpec(args=['x', 'beta', 'name'], varargs=None, keywords=None, defaults=(1.0, None)) paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], varargs=None, keywords=None, defaults=(None, None)) +paddle.fluid.layers.brelu ArgSpec(args=['x', 't_min', 't_max', 'name'], varargs=None, keywords=None, defaults=(0.0, 24.0, None)) +paddle.fluid.layers.leaky_relu ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(0.02, None)) +paddle.fluid.layers.soft_relu ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(40.0, None)) paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None)) paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None)) paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,)) @@ -169,6 +162,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)) +paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) @@ -233,15 +234,7 @@ paddle.fluid.layers.Print ArgSpec(args=['input', 'first_n', 'message', 'summariz paddle.fluid.layers.is_empty ArgSpec(args=['x', 'cond'], varargs=None, keywords='ignored', defaults=(None,)) paddle.fluid.layers.mean ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.mul ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.scale ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.elementwise_add ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.elementwise_div ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.elementwise_sub ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.elementwise_mul ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.elementwise_max ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.elementwise_min ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.elementwise_pow ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.clip ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.clip_by_norm ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.logical_and ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) @@ -256,32 +249,23 @@ paddle.fluid.layers.sum ArgSpec(args=[], varargs='args', keywords='kwargs', defa paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.sigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.logsigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.exp ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.tanh ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.tanh_shrink ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.softshrink ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.sqrt ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.abs ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.ceil ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.floor ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.cos ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.sin ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.round ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.reciprocal ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.square ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.softplus ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.softsign ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.brelu ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.leaky_relu ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.soft_relu ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.elu ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.relu6 ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.pow ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.stanh ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.hard_sigmoid ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.swish ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) +paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.tanh ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.tanh_shrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.softshrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.sqrt ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.abs ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.ceil ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.floor ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.cos ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.sin ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.round ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.reciprocal ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.square ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.softplus ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.softsign ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.uniform_random ArgSpec(args=['shape', 'dtype', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=(None, None, None, None)) paddle.fluid.layers.hard_shrink ArgSpec(args=['x', 'threshold'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.cumsum ArgSpec(args=['x', 'axis', 'exclusive', 'reverse'], varargs=None, keywords=None, defaults=(None, None, None)) @@ -340,7 +324,7 @@ paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=[' paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)) -paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None)) +paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')) paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0)) paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None) @@ -354,6 +338,7 @@ paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'fi paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max')) paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)) paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)) +paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn', 'use_mkldnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True, False)) paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)) diff --git a/paddle/fluid/framework/details/reference_count_op_handle.h b/paddle/fluid/framework/details/reference_count_op_handle.h index 71db8d952f4c205b875ad254dc19c0c1f74e61b3..fc479a4c4a1e7d5c824d3c202e0cccf743dd52c9 100644 --- a/paddle/fluid/framework/details/reference_count_op_handle.h +++ b/paddle/fluid/framework/details/reference_count_op_handle.h @@ -22,6 +22,7 @@ #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/garbage_collector.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/selected_rows.h" #include "paddle/fluid/framework/tensor.h" namespace paddle { @@ -46,17 +47,15 @@ class ReferenceCountOpHandle : public OpHandleBase { const std::vector &var_names, GarbageCollector *gc, AtomicReferenceCountMap *ref_cnts) - : OpHandleBase(node), - scope_(scope), - var_names_(var_names), - gc_(gc), - ref_cnts_(ref_cnts) { + : OpHandleBase(node), scope_(scope), gc_(gc), ref_cnts_(ref_cnts) { dev_ctx_ = static_cast( platform::DeviceContextPool::Instance().Get(place)); if (IsStreamGarabageCollector()) { PADDLE_ENFORCE(cudaSetDevice(place.device)); PADDLE_ENFORCE(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming)); } + + for (auto &name : var_names) AddVar(name); } ~ReferenceCountOpHandle() { @@ -69,19 +68,35 @@ class ReferenceCountOpHandle : public OpHandleBase { std::string Name() const override { return "reference_count"; } + void AddVar(const std::string &name) { + auto it = var_names_.find(name); + if (it != var_names_.end()) + ++(it->second); + else + var_names_[name] = 1; + } + protected: void RunImpl() override { auto *exec_scope = scope_->FindVar(kLocalExecScopeName)->Get(); - std::vector tensors; - for (auto &name : var_names_) { + std::vector tensors; + for (auto &pair : var_names_) { + auto &name = pair.first; auto it = ref_cnts_->find(name); if (it == ref_cnts_->end()) continue; auto *var = exec_scope->FindVar(name); - if (var == nullptr || !var->IsType()) continue; - - if (it->second.fetch_sub(1) <= 1) { - tensors.emplace_back(var->GetMutable()); + if (var == nullptr) continue; + + if (var->IsType()) { + if (it->second.fetch_sub(pair.second) <= pair.second) { + tensors.emplace_back(var->GetMutable()); + } + } else if (var->IsType()) { + if (it->second.fetch_sub(pair.second) <= pair.second) { + tensors.emplace_back( + var->GetMutable()->mutable_value()); + } } } @@ -91,7 +106,7 @@ class ReferenceCountOpHandle : public OpHandleBase { } private: - void ClearTensors(const std::vector &tensors) { + void ClearTensors(const std::vector &tensors) { auto *gc = dynamic_cast *>(gc_); if (gc != nullptr) { auto compute_stream = dev_ctx_->stream(); @@ -112,7 +127,7 @@ class ReferenceCountOpHandle : public OpHandleBase { const Scope *scope_; platform::CUDADeviceContext *dev_ctx_; - std::vector var_names_; + std::unordered_map var_names_; GarbageCollector *gc_; // not own AtomicReferenceCountMap *ref_cnts_; // not own cudaEvent_t event_; diff --git a/paddle/fluid/framework/details/reference_count_pass.cc b/paddle/fluid/framework/details/reference_count_pass.cc index 344754d5a1e119c04cae08ad50126924b5824315..b1ce551ce73de33bcede187c72feebad6e2fa1a5 100644 --- a/paddle/fluid/framework/details/reference_count_pass.cc +++ b/paddle/fluid/framework/details/reference_count_pass.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include @@ -23,6 +24,25 @@ namespace paddle { namespace framework { namespace details { +static ComputationOpHandle *FindNextComputationOpHandle(VarHandle *var_in) { + std::queue queue; + queue.push(var_in); + do { + auto *var = queue.front(); + queue.pop(); + for (auto *op : var->PendingOps()) { + auto *compute_op = dynamic_cast(op); + if (compute_op != nullptr && compute_op->GetPlace() == var_in->place_) { + return compute_op; + } + for (auto *out_var : op->Outputs()) { + queue.push(out_var); + } + } + } while (!queue.empty()); + return nullptr; +} + std::unique_ptr ReferenceCountPass::ApplyImpl( std::unique_ptr graph) const { auto &ref_cnts = Get(kGlobalReferenceCount); @@ -34,6 +54,9 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( // Step 2: Find all variables in non-computation ops which refers to variables // in computation ops std::unordered_set names; + std::unordered_map> + compute_ref_cnt_map; + auto get_ref_cnts_from_compute_op = [&]( const std::unique_ptr &op, const std::vector &vars) { @@ -54,15 +77,18 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( VarDesc *var_desc = var_handle->Node()->Var(); auto var_name = var_handle->Node()->Name(); - // This is wierd but there is really some variables without var_desc + // This is weird but there is really some variables without var_desc // in computation_op if (var_desc == nullptr) { if (compute_op->Node()->Op()->Block()->FindVar(var_name) == nullptr) continue; } else { - if (var_desc->Persistable() || - var_desc->Proto()->type().type() != proto::VarType::LOD_TENSOR) + if (var_desc->Persistable()) continue; + auto var_type = var_desc->Proto()->type().type(); + if (var_type != proto::VarType::LOD_TENSOR && + var_type != proto::VarType::SELECTED_ROWS) { continue; + } } // compute op only runs in one device @@ -93,12 +119,33 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( if (ref_cnts.count(place.device) && ref_cnts[place.device]->count(var_name)) { ++(*ref_cnts[place.device])[var_name]; + + auto *next_compute_op = FindNextComputationOpHandle(var_handle); + if (next_compute_op != nullptr) { + if (compute_ref_cnt_map.count(next_compute_op)) { + compute_ref_cnt_map[next_compute_op]->AddVar(var_name); + VLOG(5) << "Add reference count of " << var_name << " to Operator " + << next_compute_op->Name(); + } else { + // Create new reference_count_op_handle + ir::Node *ref_cnt_node = graph->CreateEmptyNode( + "reference_count", ir::Node::Type::kOperation); + auto *ref_cnt_handle = new ReferenceCountOpHandle( + ref_cnt_node, next_compute_op->GetScope(), place, {var_name}, + gcs[place.device].get(), cur_ref_cnts[place.device].get()); + if (next_compute_op->Outputs().empty()) { + auto *dep_var = new DummyVarHandle(graph->CreateControlDepVar()); + next_compute_op->AddOutput(dep_var); + graph->Get(kGraphDepVars).emplace(dep_var); + } + ref_cnt_handle->AddInput(next_compute_op->Outputs().front()); + compute_ref_cnt_map[next_compute_op].reset(ref_cnt_handle); + } + } } } }; - std::unordered_map - compute_ref_cnt_map; auto &all_ops = graph->Get(kGraphOps); for (auto &op : all_ops) { auto in_var_names = get_ref_cnts_from_compute_op(op, op->Inputs()); @@ -113,11 +160,13 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( auto *ref_cnt_handle = new ReferenceCountOpHandle( ref_cnt_node, compute_op->GetScope(), place, in_var_names, gcs[place.device].get(), cur_ref_cnts[place.device].get()); - auto *dep_var = new DummyVarHandle(graph->CreateControlDepVar()); - compute_op->AddOutput(dep_var); - ref_cnt_handle->AddInput(dep_var); - graph->Get(kGraphDepVars).emplace(dep_var); - compute_ref_cnt_map[compute_op] = ref_cnt_handle; + if (compute_op->Outputs().empty()) { + auto *dep_var = new DummyVarHandle(graph->CreateControlDepVar()); + compute_op->AddOutput(dep_var); + graph->Get(kGraphDepVars).emplace(dep_var); + } + ref_cnt_handle->AddInput(compute_op->Outputs().front()); + compute_ref_cnt_map[compute_op].reset(ref_cnt_handle); } for (auto &op : all_ops) { @@ -131,7 +180,11 @@ std::unique_ptr ReferenceCountPass::ApplyImpl( new_all_ops.emplace_back(std::move(op)); auto it = compute_ref_cnt_map.find(new_all_ops.back().get()); if (it != compute_ref_cnt_map.end()) { - new_all_ops.emplace_back(it->second); + // Add LeafNode to ReferenceCountOpHandle + auto *dummy_leaf = new DummyVarHandle(graph->CreateControlDepVar()); + graph->Get(kGraphDepVars).emplace(dummy_leaf); + it->second->AddOutput(dummy_leaf); + new_all_ops.emplace_back(std::move(it->second)); } } diff --git a/paddle/fluid/framework/eigen.h b/paddle/fluid/framework/eigen.h index 4ea1df655df005ba7585fb67fb0a3c3411a76418..2b265a773fe967f5b2ab38ce795b0f599d859c2a 100644 --- a/paddle/fluid/framework/eigen.h +++ b/paddle/fluid/framework/eigen.h @@ -13,6 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +// logging.h and windows.h conflict +#define GLOG_NO_ABBREVIATED_SEVERITIES +// solve static linking error in windows +// https://github.com/google/glog/issues/301 +#define GOOGLE_GLOG_DLL_DECL #include "paddle/fluid/framework/tensor.h" #include "unsupported/Eigen/CXX11/Tensor" @@ -46,11 +51,13 @@ struct EigenTensor { using ConstType = Eigen::TensorMap>; - static Type From(Tensor& tensor, DDim dims) { + static Type From(Tensor& tensor, DDim dims) { // NOLINT return Type(tensor.data(), EigenDim::From(dims)); } - static Type From(Tensor& tensor) { return From(tensor, tensor.dims_); } + static Type From(Tensor& tensor) { // NOLINT + return From(tensor, tensor.dims_); + } // NOLINT static ConstType From(const Tensor& tensor, DDim dims) { return ConstType(tensor.data(), EigenDim::From(dims)); @@ -64,7 +71,8 @@ struct EigenTensor { template struct EigenMatrix : public EigenTensor { - static typename EigenMatrix::Type Reshape(Tensor& tensor, int num_col_dims) { + static typename EigenMatrix::Type Reshape(Tensor& tensor, // NOLINT + int num_col_dims) { int rank = tensor.dims_.size(); PADDLE_ENFORCE(num_col_dims > 0 && num_col_dims < rank, "`num_col_dims` must be between (0, rank_of_tensor)."); @@ -86,11 +94,12 @@ template struct EigenVector : public EigenTensor { // Flatten reshapes a Tensor into an EigenVector. - static typename EigenVector::Type Flatten(Tensor& tensor) { + static typename EigenVector::Type Flatten(Tensor& tensor) { // NOLINT return EigenVector::From(tensor, {product(tensor.dims_)}); } - static typename EigenVector::ConstType Flatten(const Tensor& tensor) { + static typename EigenVector::ConstType Flatten( + const Tensor& tensor) { // NOLINT return EigenVector::From(tensor, {product(tensor.dims_)}); } }; @@ -104,7 +113,7 @@ struct EigenScalar { using ConstType = Eigen::TensorMap< Eigen::TensorFixedSize, MajorType, IndexType>>; - static Type From(Tensor& tensor) { return Type(tensor.data()); } + static Type From(Tensor& tensor) { return Type(tensor.data()); } // NOLINT static ConstType From(const Tensor& tensor) { return ConstType(tensor.data()); diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 86f6147cf7ac1e82ac2904bbcdcf9697422560ce..17f942571d0141537e992be9ab73847d2a794698 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -54,6 +54,10 @@ class CompileTimeInferShapeContext : public InferShapeContext { size_t j = 0) const override { PADDLE_ENFORCE_LT(i, Inputs(in).size()); PADDLE_ENFORCE_LT(j, Outputs(out).size()); + PADDLE_ENFORCE(Inputs(in)[i] != framework::kEmptyVarName, + "The %s[%d] is @EMPTY@", in, i); + PADDLE_ENFORCE(Outputs(out)[j] != framework::kEmptyVarName, + "The %s[%d] is @EMPTY@", out, j); auto *in_var = block_.FindVarRecursive(Inputs(in)[i]); auto *out_var = block_.FindVarRecursive(Outputs(out)[j]); if (in_var->GetType() != proto::VarType::LOD_TENSOR) { @@ -63,6 +67,7 @@ class CompileTimeInferShapeContext : public InferShapeContext { PADDLE_ENFORCE_EQ(in_var->GetType(), proto::VarType::LOD_TENSOR, "The %d-th output of Output(%s) must be LoDTensor.", j, out); + out_var->SetLoDLevel(in_var->GetLoDLevel()); } diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index e7dfa608b48f89a2155e43c7e63e31154675cd38..ef2eb334a4e7f3f482ba6d62d3f325f109c69302 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -23,6 +23,11 @@ limitations under the License. */ #include #include +#if defined(_WIN32) +#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h +#define GOOGLE_GLOG_DLL_DECL +#endif + #include "glog/logging.h" // For VLOG() #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/details/op_registry.h" @@ -241,22 +246,20 @@ struct OpKernelRegistrarFunctorEx #include diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 1040eb882baea624e972faf4af3094119df72308..626b50edfd39424473be33e9f8baec5970471477 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -20,6 +20,8 @@ limitations under the License. */ #include #include #include +#define GLOG_NO_ABBREVIATED_SEVERITIES +#define GOOGLE_GLOG_DLL_DECL #include "glog/logging.h" // For VLOG #include "paddle/fluid/framework/attribute.h" diff --git a/paddle/fluid/framework/shape_inference.cc b/paddle/fluid/framework/shape_inference.cc index ddff2c7c261746ac9986e79cff3da7e0a9654adc..89eb00ff65598eff5f4ba541df107e8da04e1a89 100644 --- a/paddle/fluid/framework/shape_inference.cc +++ b/paddle/fluid/framework/shape_inference.cc @@ -46,6 +46,16 @@ std::vector InferShapeContext::GetReaderDims( return this->GetRepeatedDims(arg_names[0]); } +void InferShapeContext::ShareLoDs(const std::string &in, + const std::string &out) const { + PADDLE_ENFORCE_EQ(Inputs(in).size(), Outputs(out).size(), + "The number of arguments in %s and %s is not equal.", in, + out); + for (size_t i = 0; i < in.size(); ++i) { + ShareLoD(in, out, i, i); + } +} + DDim InferShapeContext::GetInputsElementDim(const std::string &name, int idx) const { const std::vector &names = Inputs(name); diff --git a/paddle/fluid/framework/shape_inference.h b/paddle/fluid/framework/shape_inference.h index 5f497cafa0f75f7c23d550ef767d55274de7c900..fd220d961af85dd55fe2031409180823d8f178fc 100644 --- a/paddle/fluid/framework/shape_inference.h +++ b/paddle/fluid/framework/shape_inference.h @@ -56,6 +56,8 @@ class InferShapeContext { virtual const std::vector &Outputs( const std::string &name) const = 0; + void ShareLoDs(const std::string &in, const std::string &out) const; + virtual void ShareLoD(const std::string &in, const std::string &out, size_t i = 0, size_t j = 0) const = 0; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 684e0ce0e292d852d4601ebd1ccd920382e42c8b..00cbe28d45dc4393ba1c141912aee7d1b7469a89 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -71,7 +71,7 @@ bool AnalysisPredictor::Init( inference_program_ = paddle::inference::Load( executor_.get(), scope_.get(), config_.prog_file, config_.param_file); } else { - LOG(ERROR) << "fail to load inference model."; + LOG(ERROR) << "fail to load inference model from " << config_.model_dir; return false; } @@ -109,8 +109,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() { } argument_.origin_program_desc.reset( new ProgramDesc(*inference_program_->Proto())); - PADDLE_ENFORCE(config_.ir_mode == AnalysisConfig::IrPassMode::kExclude, - "Only kExclude is supported yet."); + PADDLE_ENFORCE( + config_.ir_mode == contrib::AnalysisConfig::IrPassMode::kExclude, + "Only kExclude is supported yet."); Analyzer().DisableIrPasses(config_.ir_passes).Run(&argument_); CHECK(argument_.transformed_program_desc); @@ -126,8 +127,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() { } template <> -std::unique_ptr CreatePaddlePredictor< - AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig& config) { +std::unique_ptr +CreatePaddlePredictor( + const contrib::AnalysisConfig& config) { VLOG(3) << "create AnalysisConfig"; if (config.use_gpu) { // 1. GPU memeroy @@ -154,4 +156,11 @@ std::unique_ptr CreatePaddlePredictor< return predictor; } +template <> +std::unique_ptr CreatePaddlePredictor( + const contrib::AnalysisConfig& config) { + return CreatePaddlePredictor(config); +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index e53925366e9214cd60422efe56884751297c15e5..aa00e8be5c28c2e3bfe74fa0bff2c72210bd106e 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -30,7 +30,7 @@ using framework::proto::ProgramDesc; */ class AnalysisPredictor : public NativePaddlePredictor { public: - explicit AnalysisPredictor(const AnalysisConfig& config) + explicit AnalysisPredictor(const contrib::AnalysisConfig& config) : NativePaddlePredictor(config), config_(config) {} bool Init(const std::shared_ptr& parent_scope); @@ -46,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor { Argument& analysis_argument() { return argument_; } private: - AnalysisConfig config_; + contrib::AnalysisConfig config_; Argument argument_; }; diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 43b31269d2bd82c06e284e3599a3763da693a2af..2c4894fd887f2f509dc7ab88c367cea5c1aed99a 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -31,21 +31,24 @@ namespace paddle { +using paddle::contrib::AnakinConfig; + template PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( - const AnakinConfig &config) { + const contrib::AnakinConfig &config) { CHECK(Init(config)); } template <> PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( - const AnakinConfig &config) { + const contrib::AnakinConfig &config) { omp_set_dynamic(0); omp_set_num_threads(1); mkl_set_num_threads(1); CHECK(Init(config)); } template -bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) { +bool PaddleInferenceAnakinPredictor::Init( + const contrib::AnakinConfig &config) { if (!(graph_.load(config.model_file))) { VLOG(3) << "fail to load graph from " << config.model_file; return false; @@ -200,10 +203,11 @@ template class PaddleInferenceAnakinPredictor; // A factory to help create difference predictor. template <> -std::unique_ptr CreatePaddlePredictor< - AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) { +std::unique_ptr +CreatePaddlePredictor( + const contrib::AnakinConfig &config) { VLOG(3) << "Anakin Predictor create."; - if (config.target_type == AnakinConfig::NVGPU) { + if (config.target_type == contrib::AnakinConfig::NVGPU) { #ifdef PADDLE_WITH_CUDA VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ]."; std::unique_ptr x( @@ -213,7 +217,7 @@ std::unique_ptr CreatePaddlePredictor< LOG(ERROR) << "AnakinConfig::NVGPU could not used in ONLY-CPU environment"; return nullptr; #endif - } else if (config.target_type == AnakinConfig::X86) { + } else if (config.target_type == contrib::AnakinConfig::X86) { VLOG(3) << "Anakin Predictor create on [ Intel X86 ]."; std::unique_ptr x( new PaddleInferenceAnakinPredictor(config)); diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index dd08661880d8cc3a9f4401e9af91a3d10e6579b6..04536ea3a53bbbc9293d92e69a23567e4bfd84c0 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -29,6 +29,8 @@ limitations under the License. */ namespace paddle { +using contrib::AnakinConfig; + template class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 2e9e10139fa7008a46c3782960dfd44d3228cc26..c57fc64bb6bfeebc7935f19d0e977e8fccd4c9a0 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -101,14 +101,11 @@ bool NativePaddlePredictor::Init( inference_program_ = paddle::inference::Load( executor_.get(), scope_.get(), config_.prog_file, config_.param_file); } else { - LOG(ERROR) << "fail to load inference model."; + LOG(ERROR) << "fail to load inference model from " << config_.model_dir; return false; } ctx_ = executor_->Prepare(*inference_program_, 0); - if (config_._use_mkldnn) { - executor_->EnableMKLDNN(*inference_program_); - } executor_->CreateVariables(*inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0); @@ -330,4 +327,10 @@ std::unique_ptr CreatePaddlePredictor< #endif } +template <> +std::unique_ptr CreatePaddlePredictor( + const NativeConfig &config) { + return CreatePaddlePredictor(config); +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/api_impl.h b/paddle/fluid/inference/api/api_impl.h index ec801c58857e716241d28404510530e551ed25aa..6386d601262b3dac0e957fae991d23768b52f2c0 100644 --- a/paddle/fluid/inference/api/api_impl.h +++ b/paddle/fluid/inference/api/api_impl.h @@ -14,6 +14,12 @@ #pragma once +// logging.h and windows.h conflict +#define GLOG_NO_ABBREVIATED_SEVERITIES +// solve static linking error in windows +// https://github.com/google/glog/issues/301 +#define GOOGLE_GLOG_DLL_DECL + #include #include #include diff --git a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc index d9d6e139b8735c8f07c52f63c70b6b9805e03642..6c7e63971b2d93f58e219dbd93637c8d389deb7c 100644 --- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc +++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc @@ -25,10 +25,11 @@ using inference::analysis::Argument; using inference::Singleton; using inference::analysis::Analyzer; using framework::proto::ProgramDesc; +using paddle::contrib::MixedRTConfig; class TensorRTSubgraphPredictor : public NativePaddlePredictor { public: - explicit TensorRTSubgraphPredictor(const TensorRTConfig& config) + explicit TensorRTSubgraphPredictor(const MixedRTConfig& config) : NativePaddlePredictor(config), config_(config) {} bool Init(const std::shared_ptr& parent_scope) { @@ -115,13 +116,13 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { } private: - TensorRTConfig config_; + MixedRTConfig config_; }; template <> std::unique_ptr -CreatePaddlePredictor( - const TensorRTConfig& config) { +CreatePaddlePredictor( + const MixedRTConfig& config) { VLOG(3) << "create TensorRTSubgraphPredictor"; if (config.use_gpu) { // 1. GPU memeroy @@ -150,6 +151,13 @@ CreatePaddlePredictor( return std::move(predictor); } +template <> +std::unique_ptr CreatePaddlePredictor( + const MixedRTConfig& config) { + return CreatePaddlePredictor(config); +} + } // namespace paddle USE_TRT_CONVERTER(elementwise_add_weight); diff --git a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc index 9e7425eddd2df07ffe897f908aad360abe42117a..fc6310e90b0257bc84742fb617a00f5778bb1866 100644 --- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc +++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc @@ -20,6 +20,8 @@ namespace paddle { +using contrib::MixedRTConfig; + DEFINE_string(dirname, "", "Directory of the inference model."); void CompareTensorRTWithFluid(bool enable_tensorrt) { @@ -32,7 +34,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) { config0.fraction_of_gpu_memory = 0.3; config0.device = 0; - TensorRTConfig config1; + MixedRTConfig config1; config1.model_dir = FLAGS_dirname + "word2vec.inference.model"; config1.use_gpu = true; config1.fraction_of_gpu_memory = 0.3; @@ -42,7 +44,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) { auto predictor0 = CreatePaddlePredictor(config0); auto predictor1 = - CreatePaddlePredictor(config1); for (int batch_id = 0; batch_id < 1; batch_id++) { diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index afb46a7139f6ab8e6b3697fdc56fe1c78a05cd64..d4e6bb3e4a4ceb361ccd35121d0ecf84a764243e 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -1,13 +1,32 @@ cmake_minimum_required(VERSION 3.0) - project(cpp_inference_demo CXX C) +option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) +option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) +option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) + +macro(safe_set_static_flag) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) +endmacro() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") if (WIN32) -set(CMAKE_STATIC_LIBRARY_PREFIX "lib") + if (WITH_STATIC_LIB) + safe_set_static_flag() + add_definitions(-DSTATIC_LIB) + set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "/w") + set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} "/w") + endif() + set(CMAKE_STATIC_LIBRARY_PREFIX "lib") else() -set(CMAKE_STATIC_LIBRARY_PREFIX "") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + set(CMAKE_STATIC_LIBRARY_PREFIX "") endif() +message("flags" ${CMAKE_CXX_FLAGS}) if(NOT DEFINED PADDLE_LIB) message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") @@ -16,14 +35,18 @@ if(NOT DEFINED DEMO_NAME) message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") endif() -option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) -option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) -option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) if(WITH_GPU) - set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") + if(NOT WIN32) + set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") + else() + if(CUDA_LIB STREQUAL "") + set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64") + endif() + endif(NOT WIN32) endif() +include_directories("D:/Paddle/") include_directories("${PADDLE_LIB}") include_directories("${PADDLE_LIB}/third_party/install/protobuf/include") include_directories("${PADDLE_LIB}/third_party/install/glog/include") @@ -83,10 +106,18 @@ set(DEPS ${DEPS} ${MATH_LIB} ${MKLDNN_LIB} ${CMAKE_STATIC_LIBRARY_PREFIX}glog ${CMAKE_STATIC_LIBRARY_PREFIX}gflags ${CMAKE_STATIC_LIBRARY_PREFIX}protobuf ${EXTERNAL_LIB}) +# NOTE(dzhwinter) shlwapi is deprecated. +set(DEPS ${DEPS} libcmt shlwapi) endif(NOT WIN32) if(WITH_GPU) - set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) + if(NOT WIN32) + set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) + else() + set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} ) + endif() endif() target_link_libraries(${DEMO_NAME} ${DEPS}) diff --git a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc index 03ac79e9edf0d7ce6e167c3d34af5ba84bbc0e72..360f924810a570422db5a00b13939813fa73e2fa 100644 --- a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc +++ b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc @@ -18,6 +18,8 @@ limitations under the License. */ #include #include + +#include #include #include //NOLINT #include "paddle/fluid/inference/paddle_inference_api.h" @@ -67,7 +69,8 @@ void Main(bool use_gpu) { 0.000932706}; const size_t num_elements = outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. - for (size_t i = 0; i < std::min(5UL, num_elements); i++) { + for (size_t i = 0; i < std::min(static_cast(5), num_elements); + i++) { PADDLE_ENFORCE(static_cast(outputs.front().data.data())[i], result[i]); } @@ -113,7 +116,8 @@ void MainThreads(int num_threads, bool use_gpu) { const size_t num_elements = outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. - for (size_t i = 0; i < std::min(5UL, num_elements); i++) { + for (size_t i = 0; i < std::min(static_cast(5), num_elements); + i++) { PADDLE_ENFORCE(static_cast(outputs.front().data.data())[i], result[i]); } diff --git a/paddle/fluid/inference/api/demo_ci/windows_inference.md b/paddle/fluid/inference/api/demo_ci/windows_inference.md new file mode 100644 index 0000000000000000000000000000000000000000..44b2586ad6d33ce7cbd2bb3080acc96b5e27f660 --- /dev/null +++ b/paddle/fluid/inference/api/demo_ci/windows_inference.md @@ -0,0 +1,19 @@ +# windows inference +本文介绍windows inference,目前只提供了静态编译,编译出paddle_fluid.lib,包含了除openblas.dll之外的所有第三方依赖库。 + +1. 下载最新的paddle_fluid.lib和openblas.dll,并把它们放在同一个目录下。 + +2. 准备预训练好的模型文件,例如models中的模型,可以将模型用safe_inference_model接口保存下来。将模型文件放到该目录下 + +3. 进入Paddle/paddle/fluid/inference/api/demo_ci目录,新建build目录,然后使用cmake生成vs2015的solution文件。 +其中PADDLE_LIB是前面的paddle_fluid.lib对应文件夹, CUDA_LIB指定为x64格式下的cuda系统库目录文件夹。 +```shell + cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=ON -DWITH_MKL=OFF -DWITH_STATIC_LIB=ON -DCMAKE_BUILD_TYPE=Release -DDEMO_NAME=inference_icnet -DPADDLE_LIB=D:\to_the_paddle_fluid.lib -DCUDA_LIB=D:\tools\v8.0\lib\x64 +``` +然后用vs2015打开对应的项目文件,注意使用静态链接 "/MT",生成对应的exe。将openblas.dll放到exe所在目录。 + +4. 该exe即为项目生成文件,可绑定运行。 + +## FAQ +1. cmake需要您手动下载,并添加到系统路径里 +2. 路径中的不要包含空格,例如发现CUDA_LIB路径是Program Files(x86)可能会出错。可以将CUDA拷贝到一个新位置。 diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index 55a07ca705f9fafa9ea223a867300bd14e10c364..2b4e5ed73704041e18bdbce32338405f3601e082 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -28,34 +28,61 @@ limitations under the License. */ namespace paddle { +// Data type. enum PaddleDType { FLOAT32, INT64, + // TODO(Superjomn) support more data types if needed. }; +/* + * Memory menage for PaddleTensor. + * The PaddleBuf holds a buffer for data input or output. The memory can be + * allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf + * should be reused for better performance. + * + * For user allocated memory, the following API can be used: + * - PaddleBuf(void* data, size_t length) to set an external memory by + * specifying + * the memory address and length. + * - Reset(void* data, size_t length) to reset the PaddleBuf with an external + * memory. + * ATTENTION, for user allocated memory, deallocation should be done by users + * externally after the program finished. The PaddleBuf won't do any allocation + * or deallocation. + * + * To have the PaddleBuf allocate and manage the memory: + * - PaddleBuf(size_t length) will allocate a memory of size `length`. + * - Resize(size_t length) resize the memory to no less than `length`, ATTENTION + * if the allocated memory is larger than `length`, nothing will done. + */ class PaddleBuf { public: - PaddleBuf() = default; - PaddleBuf(PaddleBuf&& other); - // Copy only available when memory is managed externally. - explicit PaddleBuf(const PaddleBuf&); - PaddleBuf& operator=(const PaddleBuf&); - PaddleBuf& operator=(PaddleBuf&&); - // Do not own the memory. - PaddleBuf(void* data, size_t length) - : data_(data), length_(length), memory_owned_{false} {} - // Own memory. + // PaddleBuf allocate memory internally, and manage it. explicit PaddleBuf(size_t length) : data_(new char[length]), length_(length), memory_owned_(true) {} - // Resize to `length` bytes. + // Set external memory, the PaddleBuf won't manage it. + PaddleBuf(void* data, size_t length) + : data_(data), length_(length), memory_owned_{false} {} + // Copy only available when memory is managed externally. + explicit PaddleBuf(const PaddleBuf&); + + // Resize the memory. void Resize(size_t length); - // Reset to external memory. + // Reset to external memory, with address and length set. void Reset(void* data, size_t length); + // Tell whether the buffer is empty. bool empty() const { return length_ == 0; } + // Get the memory address. void* data() const { return data_; } + // Get the memory length. size_t length() const { return length_; } ~PaddleBuf() { Free(); } + PaddleBuf& operator=(const PaddleBuf&); + PaddleBuf& operator=(PaddleBuf&&); + PaddleBuf() = default; + PaddleBuf(PaddleBuf&& other); private: void Free(); @@ -64,6 +91,7 @@ class PaddleBuf { bool memory_owned_{true}; }; +// Basic input and output data structure for PaddlePredictor. struct PaddleTensor { PaddleTensor() = default; std::string name; // variable name. @@ -73,19 +101,8 @@ struct PaddleTensor { std::vector> lod; // Tensor+LoD equals LoDTensor }; -enum class PaddleEngineKind { - kNative = 0, // Use the native Fluid facility. - kAnakin, // Use Anakin for inference. - kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. - kAnalysis - // TODO(Superjomn) support following engines latter. - // kTensorRT, // Use TensorRT for inference. - // kAutoMixedAnakin, // Automatically mix Fluid with Anakin. -}; - /* - * A simple Inference API for Paddle. Currently this API can be used by - * non-sequence scenerios. + * A simple Inference API for Paddle. */ class PaddlePredictor { public: @@ -120,26 +137,53 @@ struct NativeConfig : public PaddlePredictor::Config { // GPU related fields. bool use_gpu{false}; int device{0}; - float fraction_of_gpu_memory{-1.f}; // Negative to notify initialization. - // NOTE: NOT use it, just for the internal test, will discard later - bool _use_mkldnn{false}; - // Specify the variable's name of each input. - bool specify_input_name{false}; + float fraction_of_gpu_memory{-1.f}; // Change to a float in (0,1] if needed. + // Specify the exact path of program and parameter files. std::string prog_file; std::string param_file; + + // Specify the variable's name of each input if input tensors don't follow the + // `feeds` and `fetches` of the phase `save_inference_model`. + bool specify_input_name{false}; }; -// Configurations for Anakin engine. -struct AnakinConfig : public PaddlePredictor::Config { - enum TargetType { NVGPU = 0, X86 }; - int device; - std::string model_file; - int max_batch_size{-1}; - TargetType target_type; +// A factory to help create different predictors. +// +// Usage: +// +// NativeConfig config; +// ... // change the configs. +// auto native_predictor = CreatePaddlePredictor(config); +// +// FOR EXTENSION DEVELOPER: +// Different predictors are designated by config type. Similar configs can be +// merged, but there shouldn't be a huge config containing different fields for +// more than one kind of predictors. +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + +// NOTE The following APIs are too trivial, we will discard it in the following +// versions. +enum class PaddleEngineKind { + kNative = 0, // Use the native Fluid facility. + kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. + kAnalysis, // More optimization. + kAnakin // Use Anakin for inference, not mature yet. }; -struct TensorRTConfig : public NativeConfig { +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + +// == +// +// ----------------------------------------------------------------------------------- +// NOTE: The following APIs are not mature yet, we are still working on them. + +namespace contrib { + +// Accelerate GPU computation with TensorRT engine. +struct MixedRTConfig : public NativeConfig { // Determine whether a subgraph will be executed by TRT. int min_subgraph_size{1}; // While TensorRT allows an engine optimized for a given max batch size @@ -154,7 +198,6 @@ struct TensorRTConfig : public NativeConfig { // NOTE WIP, not stable yet. struct AnalysisConfig : public NativeConfig { - // enum class IrPassMode { kSystem, // Use system default passes, not customize. kInclude, // Specify the passes in `ir_passes`. @@ -165,18 +208,21 @@ struct AnalysisConfig : public NativeConfig { IrPassMode ir_mode{IrPassMode::kExclude}; // attention lstm fuse works only on some specific models, disable as default. std::vector ir_passes{"attention_lstm_fuse_pass"}; + + // NOTE this is just for internal development, please not use it. + bool _use_mkldnn{false}; }; -// A factory to help create different predictors. -// -// FOR EXTENSION DEVELOPER: -// Different predictors are designated by config type and engine kind. Similar -// configs can be merged, but there shouldn't be a huge config containing -// different fields for more than one kind of predictors. -// -// Similarly, each engine kind should map to a unique predictor implementation. -template -std::unique_ptr CreatePaddlePredictor(const ConfigT& config); +// Configurations for Anakin engine. +struct AnakinConfig : public PaddlePredictor::Config { + enum TargetType { NVGPU = 0, X86 }; + int device; + std::string model_file; + int max_batch_size{-1}; + TargetType target_type; +}; + +} // namespace contrib int PaddleDtypeSize(PaddleDType dtype); diff --git a/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc b/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc index 62e820b68c79a47d963bb174663bfc8c4ac22de3..cf97f064beddb6ede1d4716f323b4c5b46cb266d 100644 --- a/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc +++ b/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc @@ -22,10 +22,10 @@ DEFINE_string(model, "", "Directory of the inference model(mobile_v2)."); namespace paddle { -AnakinConfig GetConfig() { - AnakinConfig config; +contrib::AnakinConfig GetConfig() { + contrib::AnakinConfig config; // using AnakinConfig::X86 if you need to use cpu to do inference - config.target_type = AnakinConfig::NVGPU; + config.target_type = contrib::AnakinConfig::NVGPU; config.model_file = FLAGS_model; config.device = 0; config.max_batch_size = 1; @@ -33,9 +33,10 @@ AnakinConfig GetConfig() { } TEST(inference, anakin) { - AnakinConfig config = GetConfig(); + auto config = GetConfig(); auto predictor = - CreatePaddlePredictor(config); + CreatePaddlePredictor( + config); float data[1 * 3 * 224 * 224] = {1.0f}; PaddleTensor tensor; diff --git a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc index 98c74aaa562dce6618ccde8f11f4344eefd59ef2..82bc83988de688e46613e160b66943c89c4a0391 100644 --- a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc @@ -97,10 +97,10 @@ void Data::get_batch_data( namespace paddle { -AnakinConfig GetConfig() { - AnakinConfig config; +contrib::AnakinConfig GetConfig() { + contrib::AnakinConfig config; // using AnakinConfig::X86 if you need to use cpu to do inference - config.target_type = AnakinConfig::X86; + config.target_type = contrib::AnakinConfig::X86; config.model_file = FLAGS_model; config.device = 0; config.max_batch_size = 1000; // the max number of token @@ -121,9 +121,10 @@ void set_tensor(std::string name, std::vector shape, } void single_test() { - AnakinConfig config = GetConfig(); + auto config = GetConfig(); auto predictor = - CreatePaddlePredictor(config); + CreatePaddlePredictor( + config); int max_batch_size = 1000; std::string feature_file = FLAGS_datapath; diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index 8cf230a51d05c3a141f7cfd4e30bf30f064f0989..59020545cd609961487cafc4a08c20951a02c8ce 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -95,7 +95,7 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -void SetConfig(AnalysisConfig *cfg) { +void SetConfig(contrib::AnalysisConfig *cfg) { cfg->prog_file = FLAGS_infer_model + "/__model__"; cfg->param_file = FLAGS_infer_model + "/param"; cfg->use_gpu = false; @@ -117,7 +117,7 @@ void SetInput(std::vector> *inputs) { // Easy for profiling independently. TEST(Analyzer_Chinese_ner, profile) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); std::vector outputs; @@ -141,7 +141,7 @@ TEST(Analyzer_Chinese_ner, profile) { // Check the fuse status TEST(Analyzer_Chinese_ner, fuse_statis) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); int num_ops; @@ -155,7 +155,7 @@ TEST(Analyzer_Chinese_ner, fuse_statis) { // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_Chinese_ner, compare) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); std::vector> input_slots_all; diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index 14bdf76efc71b326bd130858ea246be81c9bd45c..3bf5383d8f35347c767d6caee83e0dcc5fb0a446 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -149,7 +149,7 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -void SetConfig(AnalysisConfig *cfg) { +void SetConfig(contrib::AnalysisConfig *cfg) { cfg->prog_file = FLAGS_infer_model + "/__model__"; cfg->param_file = FLAGS_infer_model + "/param"; cfg->use_gpu = false; @@ -172,7 +172,7 @@ void SetInput(std::vector> *inputs) { // Easy for profiling independently. TEST(Analyzer_rnn1, profile) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); std::vector outputs; @@ -183,7 +183,7 @@ TEST(Analyzer_rnn1, profile) { // Check the fuse status TEST(Analyzer_rnn1, fuse_statis) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); int num_ops; @@ -198,7 +198,7 @@ TEST(Analyzer_rnn1, fuse_statis) { // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_rnn1, compare) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); std::vector> input_slots_all; @@ -208,7 +208,7 @@ TEST(Analyzer_rnn1, compare) { // Test Multi-Thread. TEST(Analyzer_rnn1, multi_thread) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); std::vector outputs; diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 384a40a3f992d1a9734e3189b422be0ce6adb938..7189df775227680726a9d4840386280c5ad44c23 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -38,6 +38,8 @@ DEFINE_bool(use_analysis, true, namespace paddle { namespace inference { +using contrib::AnalysisConfig; + void CompareResult(const std::vector &outputs, const std::vector &ref_outputs) { EXPECT_GT(outputs.size(), 0UL); @@ -74,11 +76,11 @@ void CompareResult(const std::vector &outputs, } } -std::unique_ptr GetPrediction(AnalysisConfig config, - bool use_analysis = true) { +std::unique_ptr CreateTestPredictor( + const AnalysisConfig &config, bool use_analysis = true) { if (use_analysis) { - return CreatePaddlePredictor( - config); + return CreatePaddlePredictor(config); } else { return CreatePaddlePredictor( config); @@ -92,7 +94,7 @@ size_t GetSize(const PaddleTensor &out) { std::unordered_map GetFuseStatis(AnalysisConfig config, int *num_ops) { - auto predictor = GetPrediction(config); + auto predictor = CreateTestPredictor(config); AnalysisPredictor *analysis_predictor = dynamic_cast(predictor.get()); auto &fuse_statis = analysis_predictor->analysis_argument() @@ -113,11 +115,12 @@ std::unordered_map GetFuseStatis(AnalysisConfig config, } void TestOneThreadPrediction( - AnalysisConfig config, const std::vector> inputs, + const AnalysisConfig &config, + const std::vector> &inputs, std::vector *outputs, bool use_analysis = true) { int batch_size = FLAGS_batch_size; int num_times = FLAGS_repeat; - auto predictor = GetPrediction(config, use_analysis); + auto predictor = CreateTestPredictor(config, use_analysis); Timer timer; timer.tic(); for (int i = 0; i < num_times; i++) { @@ -130,7 +133,8 @@ void TestOneThreadPrediction( } void TestMultiThreadPrediction( - AnalysisConfig config, const std::vector> inputs, + const AnalysisConfig &config, + const std::vector> &inputs, std::vector *outputs, int num_threads, bool use_analysis = true) { int batch_size = FLAGS_batch_size; @@ -140,7 +144,7 @@ void TestMultiThreadPrediction( // TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled // because AttentionLSTM's hard code nodeid will be damanged. for (int tid = 0; tid < num_threads; ++tid) { - predictors.emplace_back(GetPrediction(config, use_analysis)); + predictors.emplace_back(CreateTestPredictor(config, use_analysis)); } for (int tid = 0; tid < num_threads; ++tid) { threads.emplace_back([&, tid]() { @@ -164,8 +168,8 @@ void TestMultiThreadPrediction( } } -void TestPrediction(AnalysisConfig config, - const std::vector> inputs, +void TestPrediction(const AnalysisConfig &config, + const std::vector> &inputs, std::vector *outputs, int num_threads, bool use_analysis = FLAGS_use_analysis) { LOG(INFO) << "use_analysis: " << use_analysis; @@ -178,8 +182,8 @@ void TestPrediction(AnalysisConfig config, } void CompareNativeAndAnalysis( - AnalysisConfig config, - const std::vector> inputs) { + const AnalysisConfig &config, + const std::vector> &inputs) { std::vector native_outputs, analysis_outputs; TestOneThreadPrediction(config, inputs, &native_outputs, false); TestOneThreadPrediction(config, inputs, &analysis_outputs, true); diff --git a/paddle/fluid/operators/adam_op.h b/paddle/fluid/operators/adam_op.h index 5b27068c9e805146b8bce03f4f676ef0d4d16c53..4cb1f3a80e95bdda79e6451dc3cc87e899b11779 100644 --- a/paddle/fluid/operators/adam_op.h +++ b/paddle/fluid/operators/adam_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include // for sqrt in CPU and CUDA #include +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detail/safe_ref.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" @@ -306,26 +307,43 @@ class AdamOpKernel : public framework::OpKernel { VLOG(3) << "grad row size is 0!!"; return; } - // merge duplicated rows if any. - // The rows of grad_merge have been sorted inside MergeAdd functor - scatter::MergeAdd merge_func; - auto& grad_merge = *(ctx.scope() - .NewScope() - .Var("sparse_adam_grad_merge") - ->GetMutable()); - merge_func(ctx.template device_context(), grad, - &grad_merge); + + std::vector cpu_rows(grad.rows().begin(), grad.rows().end()); + bool is_strict_sorted = true; + for (size_t i = 1; i < cpu_rows.size(); ++i) { + if (cpu_rows[i - 1] >= cpu_rows[i]) { + is_strict_sorted = false; + break; + } + } + + const framework::SelectedRows* grad_merge_ptr; + if (is_strict_sorted) { + grad_merge_ptr = &grad; + } else { + // merge duplicated rows if any. + // The rows of grad_merge have been sorted inside MergeAdd functor + scatter::MergeAdd merge_func; + auto* grad_merge_var = const_cast(ctx.scope()) + .Var() + ->GetMutable(); + merge_func(ctx.template device_context(), grad, + grad_merge_var); + grad_merge_ptr = grad_merge_var; + } + + auto& grad_merge = *grad_merge_ptr; auto& grad_tensor = grad_merge.value(); const T* grad_data = grad_tensor.template data(); - int64_t* rows = nullptr; -// When compiled without CUDA, the CUDAMutableData() interface should not be + const int64_t* rows = nullptr; +// When compiled without CUDA, the CUDAData() interface should not be // provided. #if defined(PADDLE_WITH_CUDA) if (platform::is_gpu_place(ctx.GetPlace())) { - rows = grad_merge.mutable_rows()->CUDAMutableData(ctx.GetPlace()); + rows = grad_merge.rows().CUDAData(ctx.GetPlace()); } else { #endif - rows = grad_merge.mutable_rows()->data(); + rows = grad_merge.rows().data(); #if defined(PADDLE_WITH_CUDA) } diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index bc58612f9d3a2b433f362787135b6bb23b203f63..57817da71adfd80faad29a48b05ba2f326de6c07 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -94,8 +94,20 @@ class ConcatOpGrad : public framework::OperatorWithKernel { : OperatorWithKernel(type, inputs, outputs, attrs) {} void InferShape(framework::InferShapeContext *ctx) const override { - ctx->SetOutputsDim(framework::GradVarName("X"), ctx->GetInputsDim("X")); - ctx->ShareLoD("X", framework::GradVarName("X")); + auto in_x = "X"; + auto out_x_g_n = framework::GradVarName(in_x); + ctx->SetOutputsDim(out_x_g_n, ctx->GetInputsDim(in_x)); + auto &in_names = ctx->Inputs(in_x); + auto &out_names = ctx->Outputs(out_x_g_n); + PADDLE_ENFORCE_EQ( + in_names.size(), out_names.size(), + "The number of arguments in %s[%d] and %s[%d] is not equal.", in_x, + in_names.size(), out_x_g_n, out_names.size()); + for (size_t i = 0; i < in_names.size(); ++i) { + if (out_names[i] != framework::kEmptyVarName) { + ctx->ShareLoD(in_x, out_x_g_n, i, i); + } + } } }; diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc index b98190d40a2afa684cfd29cc52fc29fac851cca7..4cc980b41b34894f9d915d4b325887548091c0eb 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc @@ -23,8 +23,6 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; -static constexpr int kROISize = 4; - template bool GT_E(T a, T b) { return (a > b) || fabs(a - b) < 1e-4; diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index 13be6c65be58314a75124106eb09b1300305baf0..bf4df4f600c14050b636b7ee6d7b6973b57adb94 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -46,9 +46,15 @@ class ScaleOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( **Scale operator** -Multiply the input tensor with a float scalar to scale the input tensor. +Apply scaling and bias addition to the input tensor. -$$Out = scale*X$$ +if bias_after_scale=True: + +$$Out = scale*X + bias$$ + +else: + +$$Out = scale*(X + bias)$$ )DOC"); AddAttr("scale", "The scaling factor of the scale operator.") .SetDefault(1.0); diff --git a/paddle/fluid/platform/cudnn_helper_test.cc b/paddle/fluid/platform/cudnn_helper_test.cc index 517df6863499f20d7b66d15ef114a689700be5b2..28edfd2e50237c887dbeb7ac73e1f990ce239a9c 100644 --- a/paddle/fluid/platform/cudnn_helper_test.cc +++ b/paddle/fluid/platform/cudnn_helper_test.cc @@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#define GLOG_NO_ABBREVIATED_SEVERITIES +#define GOOGLE_GLOG_DLL_DECL + #include "paddle/fluid/platform/cudnn_helper.h" #include diff --git a/paddle/fluid/platform/init.h b/paddle/fluid/platform/init.h index 0e30594672927253cc8083dcb88bb867d63ec729..992ca5e6f6a966a331616a698e3bebd2eee129d5 100644 --- a/paddle/fluid/platform/init.h +++ b/paddle/fluid/platform/init.h @@ -16,6 +16,9 @@ limitations under the License. */ #include #include +#define GLOG_NO_ABBREVIATED_SEVERITIES +#define GOOGLE_GLOG_DLL_DECL + #include "gflags/gflags.h" #include "glog/logging.h" diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 9e4a5ae8baaf7f2975c8060856f9eecab55f241c..7bbdf7de89cc932e0023952e3c8e102f92b06855 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -19,17 +19,8 @@ from .framework import * # import all class inside executor into fluid module from . import executor from .executor import * - from . import trainer -from .trainer import Trainer -from .trainer import BeginEpochEvent -from .trainer import EndEpochEvent -from .trainer import BeginStepEvent -from .trainer import EndStepEvent -from .trainer import CheckpointConfig - from . import inferencer -from .inferencer import Inferencer from . import io from . import evaluator diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 79904cec93d1732f9f3f25115869c63385bd6276..32b8f1189fd65ba1e8da5aeaf316fc0ae05af552 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -280,7 +280,7 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr): group_scale_name = self.group_name + "_scale" if group_scale_name not in self.context: group_norm_var = layers.sums(input=self.context[self.group_name]) - layers.sqrt(x=group_norm_var, out=group_norm_var) + group_norm_var = layers.sqrt(x=group_norm_var) clip_var = self.context[self.group_name + "_clip"] group_scale_var = layers.elementwise_div( x=clip_var, diff --git a/python/paddle/fluid/contrib/inferencer.py b/python/paddle/fluid/contrib/inferencer.py new file mode 100644 index 0000000000000000000000000000000000000000..b8d5f4ffeadca0a7b103682f175d50dc46fa258a --- /dev/null +++ b/python/paddle/fluid/contrib/inferencer.py @@ -0,0 +1,112 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import contextlib + +from .. import core + +from .. import executor +from .. import framework +from .. import io +from .. import parallel_executor +from .. import unique_name +from .trainer import check_and_get_place + +__all__ = ['Inferencer', ] + + +class Inferencer(object): + """ + Inferencer High Level API. + + Args: + infer_func (Python func): Infer function that will return predict Variable + param_path (str): The path where the inference model is saved by fluid.io.save_params + place (Place): place to do the inference + parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU. + + Examples: + .. code-block:: python + + def inference_program(): + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + return y_predict + + place = fluid.CPUPlace() + inferencer = fluid.Inferencer( + infer_func=inference_program, param_path="/tmp/model", place=place) + + """ + + def __init__(self, infer_func, param_path, place=None, parallel=False): + self.param_path = param_path + self.scope = core.Scope() + self.parallel = parallel + self.place = check_and_get_place(place) + + self.inference_program = framework.Program() + with framework.program_guard(self.inference_program): + with unique_name.guard(): + self.predict_var = infer_func() + + with self._prog_and_scope_guard(): + # load params from param_path into scope + io.load_params(executor.Executor(self.place), param_path) + + if parallel: + with self._prog_and_scope_guard(): + self.exe = parallel_executor.ParallelExecutor( + use_cuda=isinstance(self.place, core.CUDAPlace), + loss_name=self.predict_var.name) + else: + self.exe = executor.Executor(self.place) + + self.inference_program = self.inference_program.clone(for_test=True) + + def infer(self, inputs, return_numpy=True): + """ + Do Inference for Inputs + + Args: + inputs (map): a map of {"input_name": input_var} that will be feed into the inference program + return_numpy (bool): transform return value into numpy or not + + Returns: + Tensor or Numpy: the predict value of the inference model for the inputs + + Examples: + .. code-block:: python + + tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") + results = inferencer.infer({'x': tensor_x}) + """ + if not isinstance(inputs, dict): + raise ValueError( + "inputs should be a map of {'input_name': input_var}") + + with self._prog_and_scope_guard(): + results = self.exe.run(feed=inputs, + fetch_list=[self.predict_var.name], + return_numpy=return_numpy) + + return results + + @contextlib.contextmanager + def _prog_and_scope_guard(self): + with framework.program_guard(main_program=self.inference_program): + with executor.scope_guard(self.scope): + yield diff --git a/python/paddle/fluid/contrib/trainer.py b/python/paddle/fluid/contrib/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..8569e486f91786b5562e84dcdccf6d91da0612cc --- /dev/null +++ b/python/paddle/fluid/contrib/trainer.py @@ -0,0 +1,1258 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import contextlib +import os +import errno +import shutil +import six +import time + +from .. import core +from .. import data_feeder +from .. import executor +from .. import framework +from .. import io +# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module +from .. import optimizer as opt_module +from .. import parallel_executor +from ..transpiler import distribute_transpiler + +__all__ = [ + 'Trainer', 'BeginEpochEvent', 'EndEpochEvent', 'BeginStepEvent', + 'EndStepEvent', 'CheckpointConfig' +] + + +class BeginEpochEvent(object): + """ + The begin of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + """ + + def __init__(self, epoch_id): + self.epoch = epoch_id + + +class EndEpochEvent(object): + """ + The end of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + """ + + def __init__(self, epoch_id): + self.epoch = epoch_id + + +class BeginStepEvent(object): + """ + The begin of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + step_id(int): The current step ID. + """ + + def __init__(self, epoch_id, step_id): + self.epoch = epoch_id + self.step = step_id + self.fetch_metrics = True + """ + If fetch_metrics is true, the metrics will be fetched at the + EndStepEvent. Default is True. + """ + + +class EndStepEvent(object): + """ + The end of a training step. + + Args: + epoch_id(int): The current epoch ID. + step_id(int): The current step ID. + metrics(list): A list of fetched tensor. The order of this list is same + as the :code:`train_func` returns. + """ + + def __init__(self, epoch_id, step_id, metrics): + self.epoch = epoch_id + self.step = step_id + self.metrics = metrics + + +class CheckpointConfig(object): + """ + Parameter object for :code:`save_checkpoint` and + :code:`fluid.Trainer`. Used to configuration how to save checkpoint. + + Args: + checkpoint_dir(str): Directory path to save check point. Default is the + current directory. + + max_num_checkpoints(int): The max number of local check points. + epoch_interval(int): Every number of epoch to save check point. + step_interval(int): Every number of step to save check point. + + Examples: + >>> config = fluid.CheckpointConfig("./checkpoints") + >>> trainer = fluid.Trainer(train_func=train_program, + >>> place=place, + >>> optimizer_func=optimizer_func, + >>> checkpoint_config=config) + >>> trainer.train(...) + """ + + def __init__(self, + checkpoint_dir=None, + max_num_checkpoints=3, + epoch_interval=1, + step_interval=10): + + assert epoch_interval >= 1 + assert step_interval >= 1 + + self.checkpoint_dir = checkpoint_dir \ + if checkpoint_dir is not None else os.getcwd() + self.max_num_checkpoints = max_num_checkpoints + self.epoch_interval = epoch_interval + self.step_interval = step_interval + self.epoch_id = 0 + self.step_id = 0 + self.load_serial = None + self.pserver_id = None + self.lookup_table_name = None + + +def check_and_get_place(place): + """ + Check the type of place or get the default place + Args: + place(None|core.CUDAPlace|core.CPUPlace): the place that trainer will be executed on. + + Raises: + TypeError if the type mismatched. + + Returns: + the original place if it is not None. + if fluid is compiled with CUDA, returns CUDAPlace(0) by default. + Otherwise returns CPUPlace by default. + """ + if place is None: + if core.is_compiled_with_cuda(): + return core.CUDAPlace(0) + else: + return core.CPUPlace() + else: + if not isinstance(place, core.CUDAPlace) and not isinstance( + place, core.CPUPlace): + raise TypeError("Place should be either CUDAPlace or CPUPlace") + return place + + +class Trainer(object): + """ + A trainer wraps MultiGPU/MultiNode training loops and can be used to train a + simple neural network easily. + + This API takes a :code:`train_func`. A :code:`train_func` is a function that + return loss as it first return value. The reset value can be fetched by + EndStepEvent.metrics + + This API also takes a :code:`optimizer_func` that will return an optimizer + instance. + + For example, to train a MLP for MNIST dataset, the sample program is + + >>> import paddle.fluid as fluid + >>> + >>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10): + >>> hidden = image + >>> for layer_size in layer_sizes: + >>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation) + >>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax") + >>> + >>> def train_mnist_mlp(): + >>> img = fluid.layers.data(name='image', shape=[784]) + >>> label = fluid.layers.data(name='label', shape=[1], dtype='int64') + >>> prediction = mlp(img) + >>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label)) + >>> + >>> def optimizer(): + >>> return fluid.optimizer.Adam() + >>> + >>> trainer = Trainer(train_func=train_mnist_mlp, + >>> optimizer_func=optimizer, + >>> place=fluid.CUDAPlace(0), + >>> parallel=True) + >>> + >>> def train_callback(event): + >>> if isinstance(event, fluid.EndStepEvent): + >>> print "Epoch ID", event.epoch, "Step ID",\ + >>> event.step, "AvgLoss", event.metrics[0] + >>> elif isinstance(event, fluid.EndEpochEvent): + >>> trainer.save_params("./model_{0}".format(event.epoch)) + >>> + >>> trainer.train(num_epochs=100, event_handler=train_callback) + + For more example, please see :ref:`api_guide_high_level_api`. + + + Args: + train_func(callable): A function which will return loss. The loss must be + a scalar tensor. + optimizer_func(callable): A function that returns an Optimizer object. + place(CUDAPlace|CPUPlace): The device place of this trainer. If + :code:`parallel=True,` all CUDA Places will be used if :code:`place` + is a :code:`CUDAPlace`. + parallel(bool): True if use multiple devices. + checkpoint_config(CheckpointConfig): Configuration about how to save + checkpoints. + """ + + def __init__(self, + train_func, + optimizer_func, + param_path=None, + place=None, + parallel=False, + checkpoint_config=None): + self.__stop = False + self.parallel = parallel + + # config for checkpoint + # only chief worker will save variables + self.trainer_id = 0 + self.checkpoint_cfg = checkpoint_config + if self.checkpoint_cfg: + assert isinstance(self.checkpoint_cfg, CheckpointConfig) + serial = _get_latest_checkpoint_serial( + self.checkpoint_cfg.checkpoint_dir) + self.checkpoint_cfg.load_serial = serial if serial >= 0 else None + + self.scope = core.Scope() + + # 1. we need to generate a framework.Program by calling + # program_func. Reference: fluid.program_guard in + # test_word2vec.py + + self.startup_program = framework.Program() + self.train_program = framework.Program() + + with framework.program_guard(self.train_program, self.startup_program): + program_func_outs = train_func() + self.train_func_outputs = program_func_outs if isinstance( + program_func_outs, list) else [program_func_outs] + self.test_program = self.train_program.clone(for_test=True) + + # The first element of program_func_outs is loss. + loss = self.train_func_outputs[0] + + optimizer = optimizer_func() + if not isinstance(optimizer, opt_module.Optimizer): + raise TypeError( + "The optimizer should be an instance of Optimizer") + optimize_ops, params_grads = optimizer.minimize(loss) + + self.place = check_and_get_place(place) + + self._dist_transpile_if_necessary(optimize_ops, params_grads) + + # 2. move the default_main_program to self.program and run the + # default_startup program on an empty core.Scope() + # Run startup program + with self._prog_and_scope_guard(): + exe = executor.Executor(place) + exe.run(self.startup_program) + + if self.checkpoint_cfg and self.checkpoint_cfg.load_serial is not None: + self._load_checkpoint() + + if param_path and os.path.isdir(param_path): + with self._prog_and_scope_guard(): + # load params from param_path into scope + io.load_persistables( + executor=exe, + dirname=param_path, + main_program=self.startup_program) + + def _transpile_nccl2_dist(self): + # PADDLE_TRAINER_IPS + if "PADDLE_TRAINER_IPS" not in os.environ: + self.nccl_id_var = None + else: + self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + port = os.getenv("PADDLE_PSERVER_PORT") + worker_ips = os.getenv("PADDLE_TRAINER_IPS") + worker_endpoints = [] + for ip in worker_ips.split(","): + worker_endpoints.append(':'.join([ip, port])) + self.num_trainers = len(worker_endpoints) + current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port + worker_endpoints.remove(current_endpoint) + # TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id + # in ParallelExecutor to start + # distributed training using NCCL2 + self.nccl_id_var = self.startup_program.global_block().create_var( + name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW) + self.startup_program.global_block().append_op( + type="gen_nccl_id", + inputs={}, + outputs={"NCCLID": self.nccl_id_var}, + attrs={ + "endpoint": current_endpoint, + "endpoint_list": worker_endpoints, + "trainer_id": self.trainer_id + }) + + def _dist_transpile_if_necessary(self, optimize_ops, params_grads): + self._transpile_nccl2_dist() + if self.nccl_id_var != None: + return + + if "PADDLE_TRAINING_ROLE" not in os.environ: + return + + # the port of all pservers, needed by both trainer and pserver + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + # comma separated ips of all pservers, needed by trainer and + # pserver + pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "") + eplist = [] + for ip in pserver_ips.split(","): + eplist.append(':'.join([ip, port])) + pserver_endpoints = ",".join(eplist) + # total number of workers/trainers in the job, needed by + # trainer and pserver + trainers = int(os.getenv("PADDLE_TRAINERS")) + # the IP of the local machine, needed by pserver only + current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port + # the unique trainer id, starting from 0, needed by trainer + # only + self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + + # the role, should be either PSERVER or TRAINER + training_role = os.getenv("PADDLE_TRAINING_ROLE") + with self._prog_and_scope_guard(): + t = distribute_transpiler.DistributeTranspiler() + t.transpile( + self.trainer_id, pservers=pserver_endpoints, trainers=trainers) + if training_role == "PSERVER": + if self.checkpoint_cfg: + pserver_id = eplist.index(current_endpoint) + self.checkpoint_cfg.pserver_id = pserver_id + if t.has_distributed_lookup_table: + self.checkpoint_cfg.lookup_table_name = t.table_name + + self.train_program = t.get_pserver_program(current_endpoint) + self.startup_program = t.get_startup_program(current_endpoint, + self.train_program) + elif training_role == "TRAINER": + self.train_program = t.get_trainer_program() + else: + raise ValueError( + 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' + ) + + def stop(self): + """ + stop training + """ + self.__stop = True + + def train(self, num_epochs, event_handler, reader=None, feed_order=None): + """ + Start the train loop to train the model. + + Args: + num_epochs(int): The number of epoch. An epoch will process all data in reader + event_handler(callable): The event handler. A function with type (ev:Event)->void + reader(callable): A reader creator object. See also + :ref:`api_guide_python_reader` . + feed_order(list): Feeding order of reader. None will following the defining + order in program + + Returns: + None + """ + training_role = os.getenv("PADDLE_TRAINING_ROLE", "") + if training_role == "PSERVER": + with self._prog_and_scope_guard(): + exe = executor.Executor(self.place) + exe.run() + return + if self.parallel: + self._train_by_parallel_executor(num_epochs, event_handler, reader, + feed_order) + else: + self._train_by_executor(num_epochs, event_handler, reader, + feed_order) + + def test(self, reader, feed_order): + """ + Test the model on given test data + + Args: + reader(callable): The reader that yields test data. + feed_order(list): Feeding order of reader. None will following the + defining order in program + """ + + return self._test_by_executor(reader, feed_order, + self.train_func_outputs) + + def save_params(self, param_path): + """ + Save all parameters into :code:`param_path`. + + Args: + param_path(str): The path to save parameters. + + Returns: + None + """ + with self._prog_and_scope_guard(): + exe = executor.Executor(self.place) + io.save_persistables(exe, dirname=param_path) + + def save_inference_model(self, param_path, feeded_var_names, + target_var_indexes): + """ + Save model for cpp inference into :code:`param_path`. + + Args: + param_path(str): The path to save parameters. + feeded_var_names(list(str)): The name of the vars that you + need to feed in before run program. + target_var_indexes(list(int)): the index of target var that + you need to return in trainer.train_func. + Returns: + None + """ + with self._prog_and_scope_guard(): + exe = executor.Executor(self.place) + target_vars = [ + self.train_func_outputs[index] for index in target_var_indexes + ] + io.save_inference_model(param_path, feeded_var_names, target_vars, + exe) + + @contextlib.contextmanager + def _prog_and_scope_guard(self): + with framework.program_guard( + main_program=self.train_program, + startup_program=self.startup_program): + with executor.scope_guard(self.scope): + yield + + def _train_by_executor(self, num_epochs, event_handler, reader, feed_order): + """ + Train by Executor and single device. + + Args: + num_epochs: + event_handler: + reader: + feed_order: + + Returns: + + """ + with self._prog_and_scope_guard(): + feed_var_list = build_feed_var_list(self.train_program, feed_order) + feeder = data_feeder.DataFeeder( + feed_list=feed_var_list, place=self.place) + exe = executor.Executor(self.place) + reader = feeder.decorate_reader(reader, multi_devices=False) + self._train_by_any_executor(event_handler, exe, num_epochs, reader) + + def _train_by_any_executor(self, event_handler, exe, num_epochs, reader): + if self.checkpoint_cfg: + epochs = [ + epoch_id for epoch_id in range(num_epochs) + if epoch_id >= self.checkpoint_cfg.epoch_id + ] + else: + epochs = [epoch_id for epoch_id in range(num_epochs)] + + for epoch_id in epochs: + event_handler(BeginEpochEvent(epoch_id)) + for step_id, data in enumerate(reader()): + if self.__stop: + if self.checkpoint_cfg: + self._clean_checkpoint() + return + + if self.checkpoint_cfg and self.checkpoint_cfg.load_serial \ + and self.checkpoint_cfg.step_id >= step_id and self.checkpoint_cfg.epoch_id == epoch_id: + continue + + begin_event = BeginStepEvent(epoch_id, step_id) + event_handler(begin_event) + if begin_event.fetch_metrics: + metrics = exe.run(feed=data, + fetch_list=[ + var.name + for var in self.train_func_outputs + ]) + else: + metrics = exe.run(feed=data, fetch_list=[]) + + if self.checkpoint_cfg: + self._save_checkpoint(epoch_id, step_id) + event_handler(EndStepEvent(epoch_id, step_id, metrics)) + event_handler(EndEpochEvent(epoch_id)) + if self.checkpoint_cfg: + self._clean_checkpoint() + + def _test_by_executor(self, reader, feed_order, fetch_list): + with executor.scope_guard(self.scope): + feed_var_list = build_feed_var_list(self.test_program, feed_order) + feeder = data_feeder.DataFeeder( + feed_list=feed_var_list, place=self.place) + exe = executor.Executor(self.place) + accumulated = len(fetch_list) * [0] + count = 0 + for data in reader(): + outs = exe.run(program=self.test_program, + feed=feeder.feed(data), + fetch_list=fetch_list) + accumulated = [x[0] + x[1][0] for x in zip(accumulated, outs)] + count += 1 + + return [x / count for x in accumulated] + + def _train_by_parallel_executor(self, num_epochs, event_handler, reader, + feed_order): + with self._prog_and_scope_guard(): + pe = self._get_or_create_parallel_executor() + feed_var_list = build_feed_var_list(self.train_program, feed_order) + feeder = data_feeder.DataFeeder( + feed_list=feed_var_list, place=self.place) + reader = feeder.decorate_reader(reader, multi_devices=True) + self._train_by_any_executor(event_handler, pe, num_epochs, reader) + + def _get_parallel_executor(self): + return getattr(self, 'parallel_executor', None) + + def _get_or_create_parallel_executor(self): + if self._get_parallel_executor() is None: + self.parallel_executor = parallel_executor.ParallelExecutor( + use_cuda=isinstance(self.place, core.CUDAPlace), + loss_name=self.train_func_outputs[0].name) + return self._get_parallel_executor() + + def _clean_checkpoint(self): + assert self.checkpoint_cfg + clean_checkpoint(checkpoint_dir=self.checkpoint_cfg.checkpoint_dir) + + def _get_checkpoint_load_args(self): + """ + epoch_id and step_id are runtime arguments, they are not variables, will load them independently. + """ + return ["epoch_id", "step_id"] + + def _get_checkpoint_save_args(self, epoch_id, step_id): + """ + epoch_id and step_id are runtime arguments, they are not variables, will save them independently. + """ + trainer_args = {} + trainer_args["epoch_id"] = epoch_id + trainer_args["step_id"] = step_id + return trainer_args + + def _save_checkpoint(self, epoch_id, step_id): + assert self.checkpoint_cfg + + if epoch_id % self.checkpoint_cfg.epoch_interval == 0 \ + and step_id % self.checkpoint_cfg.step_interval == 0: + exe = executor.Executor(self.place) + save_checkpoint( + executor=exe, + checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, + trainer_id=self.trainer_id, + trainer_args=self._get_checkpoint_save_args(epoch_id, step_id), + main_program=self.train_program, + max_num_checkpoints=self.checkpoint_cfg.max_num_checkpoints) + + def _load_checkpoint(self): + with self._prog_and_scope_guard(): + exe = executor.Executor(self.place) + load_checkpoint( + executor=exe, + checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, + main_program=self.startup_program) + + if not self.checkpoint_cfg.pserver_id: + load_trainer_args = self._get_checkpoint_load_args() + trainer_args = load_checkpoint( + executor=exe, + checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, + main_program=self.startup_program, + role_id=self.trainer_id, + is_trainer=True, + load_trainer_args=load_trainer_args) + + if len(trainer_args) != 2: + raise ValueError( + "the return trainer_args length do not equal _get_checkpoint_load_args" + ) + self.checkpoint_cfg.epoch_id = int(trainer_args[0]) + self.checkpoint_cfg.step_id = int(trainer_args[1]) + else: + if self.checkpoint_cfg.lookup_table_name: + load_checkpoint( + executor=exe, + checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, + main_program=self.startup_program, + role_id=self.checkpoint_cfg.pserver_id, + is_trainer=False, + load_trainer_args=None, + load_lookup_table=self.checkpoint_cfg.lookup_table_name) + + +def build_feed_var_list(program, feed_order): + if not isinstance(program, framework.Program): + raise TypeError("The 'program' should be an object of Program") + + if isinstance(feed_order, list): + feed_var_list = [ + program.global_block().var(var_name) for var_name in feed_order + ] + else: + if not isinstance(feed_order, dict): + raise TypeError( + "The 'feed_order' should be either None, list or dict.") + if not sorted(feed_order.values()) == list(range(len(feed_order))): + raise ValueError( + "The values of 'feed_order' should be a permutation of [0, len(feed_order))" + ) + sorted_pair_list = sorted( + six.iteritems(feed_order), key=lambda item: item[1]) + feed_var_list = [ + program.global_block().var(pair[0]) for pair in sorted_pair_list + ] + return feed_var_list + + +# move Checkpoint APIs from io.py to trainer.py, make all of them are private. +SUCCESS_MARK_FILENAME = "_SUCCESS" +CHECKPOINT_PREFIX = "checkpoint" +MODEL_DIR = "__model__" +LOOKUP_TABLE_DIR = "__lookup_table__" +TRAINER_PREFIX = "trainer" +CHECKPOINT_SEPARATOR = "_" + + +def save_checkpoint(executor, + checkpoint_dir, + trainer_id, + main_program, + trainer_args=None, + max_num_checkpoints=3, + lookup_table=None, + pserver_endpoints=None): + """ + This function filters out all checkpoint variables from the give + main_program and then saves these variables to the `checkpoint_dir` + directory. + + In the training precess, we generally save a checkpoint in each + iteration. So there might be a lot of checkpoints in the + `checkpoint_dir`. To avoid them taking too much disk space, the + `max_num_checkpoints` are introduced to limit the total number of + checkpoints. If the number of existing checkpints is greater than + the `max_num_checkpoints`, oldest ones will be scroll deleted. + + A variable is a checkpoint variable and will be saved if it meets + all following conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for save checkpoint. + checkpoint_dir(str): The folder where to save checkpoints. + trainer_id(int): currect trainer id, if id is equal to 0, the trainer + is chief. + trainer_args(dict|None): Current training arguments. Such as 'epoch_id' + and 'step_id'. + Defaut: None + main_program(Program): The program whose checkpoint variables will + be saved. + max_num_checkpoints(int): The max number of total number of existing + checkpoints. + Default: 3 + lookup_table(string|None): the lookup table name, when use distribute + lookup table, we can get lookup table name by DistributeTranspiler. + table_name + pserver_endpoints(list|None): the parameter server ip:port list. + when use distribute lookup table, we can get pserver_endpoints by + distribute arguments. + + Returns: + None + + Raises: + ValueError: If `checkpoint_dir` is None. + AssertionError: If `trainer_args` is not a dict. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./checkpoints" + prog = fluid.default_main_program() + trainer_args = {"epoch_id": 200, + "step_id": 20} # just an example + table_name = "share_w" + ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] + + save_checkpoint(executor=exe, + checkpoint_dir=path, + trainer_id=0, + trainer_args=trainer_args, + main_program=prog, + max_num_checkpoints=3, + lookup_table=table_name, + pserver_endpoints = ps_endpoints) + """ + if checkpoint_dir is None: + raise ValueError("'checkpoint_dir' should not be None") + + if main_program is None: + raise ValueError('main_program should not be None.') + + if trainer_args: + assert isinstance(trainer_args, dict) + + is_chief = trainer_id == 0 + + _make_chekcpoint_dirs(checkpoint_dir) + serial = _get_latest_checkpoint_serial(checkpoint_dir) + 1 + cur_dir = _get_serial_dir(checkpoint_dir, serial) + + _save_trainer_args(cur_dir, trainer_id, trainer_args) + + if is_chief: + _save_persist_vars_without_grad(executor, cur_dir, main_program) + + if is_chief and lookup_table and pserver_endpoints: + _save_pserver_vars_by_notify(executor, cur_dir, lookup_table, + pserver_endpoints) + + _scroll_delete(checkpoint_dir, max_num_checkpoints) + + +def load_checkpoint(executor, + checkpoint_dir, + main_program, + role_id=0, + is_trainer=True, + load_trainer_args=None, + load_lookup_table=None): + """ + This function filters out all checkpoint variables from the give + main_program and then try to load these variables from the + `checkpoint_dir` directory. + + In the training precess, we generally save a checkpoint in each + iteration. So there are more than one checkpoint in the + `checkpoint_dir` (each checkpoint has its own sub folder), use + `serial` to specify which serial of checkpoint you would like to + load. + + A variable is a checkpoint variable and will be loaded if it meets + all following conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for loading checkpoint. + checkpoint_dir(str): The folder where all checkpoints are. + serial(int): The serial of checkpoint you would like to load. + main_program(Program): The program whose checkpoint variables will + be loaded. + role_id(int): the trainer id or the parameter server id. + is_trainer(bool): trainer is True and parameter server is False. + load_trainer_args(list|None): list about load trainer args. + load_lookup_table(str|None): the lookup table name + + Returns: + None + + Raises: + ValueError: If `checkpoint_dir` is None. + ValueError: If `main_program` is None. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./checkpoints" + prog = fluid.default_main_program() + load_checkpoint(executor=exe, checkpoint_dir=path, + serial=9, main_program=prog) + + # In this example, `load_checkpoint` function + # will first filters out all checkpoint variables in the default + # main program, and then try to load these variables form the + # folder "./checkpoints/checkpoint_9/__model__". + """ + + if checkpoint_dir is None: + raise ValueError("'checkpoint_dir' should not be None") + + serial = _get_latest_checkpoint_serial(checkpoint_dir) + + # there are nothing need to be loaded + if serial is None or serial < 0: + return + + if main_program is None: + raise ValueError('main_program should not be None.') + + if is_trainer and load_trainer_args is None: + cur_dir = _get_serial_dir(checkpoint_dir, serial) + _load_persist_vars_without_grad(executor, cur_dir, main_program, True) + return + + if is_trainer and load_trainer_args: + return _load_trainer_args(checkpoint_dir, serial, role_id, + load_trainer_args) + + if not is_trainer and load_lookup_table: + _load_lookup_table_vars(executor, checkpoint_dir, main_program, role_id, + load_lookup_table) + + +def clean_checkpoint(checkpoint_dir, delete_dir=False): + """ + clean the checkpoint dir, when the train exits normally, + the trainer will call clean_checkpoint to delete checkpoint directory saved before. + delete_dir only works when the directory is empty, otherwise, OSError is raised. + + : param checkpoint_dir + : param delete_dir + """ + + if checkpoint_dir is None: + raise ValueError("'checkpoint_dir' should not be None") + _scroll_delete(checkpoint_dir, max_num_checkpoints=0) + + if delete_dir and not os.listdir(checkpoint_dir): + os.rmdir(checkpoint_dir) + + +def _load_persist_vars_without_grad(executor, + dirname, + program, + has_model_dir=False): + """ + This function filters out all checkpoint variables from the give + program and then trys to load these variables from the given directory. + + A variable is a checkpoint variable if it meets all following + conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for loading variables. + dirname(str): The directory path. + program(Program): The program whose checkpoint variables will + be loaded. + has_model_dir(bool): if True, the function loads variables + from a sub directory named '__model__'. + Default: False + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + _load_persist_vars_without_grad(executor=exe, + dirname=param_path, program=prog, has_model_dir=True) + + # In this example, `_load_persist_vars_without_grad` function + # will first filters out all checkpoint variables in the default + # main program, and then trys to load these variables form the + # folder "./my_paddle_model/__model__". + """ + + if has_model_dir: + dirname = _get_model_dir(dirname) + + io.load_vars( + executor, + dirname=dirname, + main_program=program, + predicate=_is_checkpoint_var, + filename=None) + + +def _load_lookup_table_vars(executor, dirname, program, pserver_id, table_name): + """ + The parameter server will load lookup table's local file in + selectedrows variable. + + Args: + executor(Executor): The executor to run for loading persistable variables + dirname(str): The directory path + main_program(Program): Find the variable named table_name in main_program + pserver_id(int): the serial number in pserver_endpoints list + table_name(str): lookup table name + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + dirname = "./checkpoints/checkpoint_9/" + prog = fluid.default_main_program() + pserver_id = 1 + table_name = "share_w" + _load_lookup_table_vars(executor=exe, + dirname=dirname, program=prog, pserver_id=pserver_id, + table_name=table_name) + """ + + for var in program.list_vars(): + if var.name == table_name: + lookup_table_var = var + break + + assert lookup_table_var is not None + + lookup_table_dir = os.path.join(dirname, LOOKUP_TABLE_DIR) + table_file = table_name + CHECKPOINT_SEPARATOR + str(pserver_id) + + load_prog = framework.Program() + load_block = load_prog.global_block() + + load_block.append_op( + type='load', + inputs={}, + outputs={'Out': [lookup_table_var]}, + attrs={'file_path': os.path.join(lookup_table_dir, table_file)}) + + executor.run(load_prog) + + +def _save_persist_vars_without_grad(executor, dirname, program): + """ + This function filters out all checkpoint variables from the give + program and then save these variables to a sub-folder '__model__' of + the given directory. + + A variable is a checkpoint variable if it meets all following + conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for saving variables. + dirname(str): The directory path. + program(Program): The program whose checkpoint variables will + be saved. + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + _save_persist_vars_without_grad(executor=exe, + dirname=param_path, program=prog) + + # In this example, `_save_persist_vars_without_grad` function + # will first filters out all checkpoint variables in the default + # main program, and then saves these variables to the folder + # "./my_paddle_model/__model__". + """ + cur_dir = _get_model_dir(dirname) + io.save_vars( + executor, + dirname=cur_dir, + main_program=program, + vars=None, + predicate=_is_checkpoint_var, + filename=None) + _write_success(cur_dir) + + +def _save_pserver_vars_by_notify(executor, dirname, lookup_table, + ps_endpoint_list): + """ + This function will send checkpoint notify message from Trainer 0 + to all the pservers. + The checkpoint notify message contains lookup table name, + the absolute path on pserver to save lookup_table. + + Args: + executor(Executor): The executor to run for send checkpoint notify. + dirname(str): The folder where to save checkpoints. + lookup_table(string): the lookup table name, when use distribute + lookup table, we can get lookup table name by DistributeTranspiler. + table_name + ps_endpoint_list(list): the parameter server ip:port list. + when use distribute lookup table, we can get ps_endpoint_list by + distribute arguments. + Return: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + table_name = "share_w" + ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] + + _save_pserver_vars_by_notify(executor=exe, + dirname=param_path, lookup_table=table_name, + ps_endpoint_list=ps_endpoints) + """ + cur_dir = _get_lookuptable_dir(dirname) + + checkpoint_notify_program = framework.Program() + checkpoint_notify_block = checkpoint_notify_program.global_block() + + attrs = {} + attrs['epmap'] = ps_endpoint_list + attrs['dir'] = cur_dir + attrs['lookup_table'] = lookup_table + + checkpoint_notify_block.append_op( + type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs) + executor.run(checkpoint_notify_program) + + +def _save_trainer_args(dirname, trainer_id, trainer_args): + assert isinstance(trainer_args, dict) + + cur_dir = _get_trainer_dir(dirname, trainer_id) + + for name, value in six.iteritems(trainer_args): + args_file = os.path.join(cur_dir, name) + with open(args_file, 'w') as f: + f.write(str(value)) + _write_success(cur_dir) + + +def _load_trainer_args(checkpoint_dir, serial, trainer_id, trainer_args): + """ + trainer will load some args from it's independent directory, + such as epoch_id and step_id. + + Args: + checkpoint_dir(str): The folder where all checkpoints are. + serial(int): The serial of checkpoint you would like to load. + trainer_id(int): current trainer id. + trainer_args(list): list about load trainer args + Return: + None + + Examples: + .. code-block:: python + + param_path = "./checkpoint/" + serial = 7 + trainer_id = 2 + trainer_args = ["epoch_id", "step_id"] + + _load_trainer_args(checkpoint_dir=param_path, serial=serial, + trainer_id=trainer_id, trainer_args=trainer_args) + """ + assert isinstance(trainer_args, list) + + cur_dir = _get_serial_dir(checkpoint_dir, serial) + cur_dir = _get_trainer_dir(cur_dir, trainer_id) + + ret_values = [] + + for arg in trainer_args: + cur_file = os.path.join(cur_dir, arg) + with open(cur_file, 'r') as f: + contents = f.read() + ret_values.append(contents.strip()) + return ret_values + + +def _is_checkpoint_var(var): + """ + the checkpoint will not save or load all the variables. + var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded. + + : param var(Variable) + """ + if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ + var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ + var.desc.type() == core.VarDesc.VarType.RAW: + return False + # @GRAD are named for gradient variables, checkpoint will not save it. + if "@GRAD" in var.name: + return False + # .trainer_ are named for distribute train variables, checkpoint will not save it. + if ".trainer_" in var.name: + return False + + # .block is named for distribute train variables, checkpoint will not save it. + if ".block" in var.name: + return False + + return var.persistable + + +def _make_chekcpoint_dirs(dirs): + """ + _make_chekcpoint_dirs will makdir local directory directly, when the directory is exist, it will igore it. + """ + assert dirs is not None + + if os.path.isfile(dirs): + raise OSError(errno.ENOTDIR, "dirs path shoule be a Directory.", dirs) + + if not os.path.isdir(dirs): + try: + os.makedirs(dirs) + except OSError as err: + if err.errno != errno.EEXIST: + raise err + + +def _get_dir_serial(dirname): + _, serial = dirname.split(CHECKPOINT_SEPARATOR) + + try: + serial_num = int(serial) + except ValueError: + serial_num = -1 + return serial_num + + +def _get_serial_dir(dirname, serial): + serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial) + serial_dir = os.path.join(dirname, serial_folder) + _make_chekcpoint_dirs(serial_dir) + + return serial_dir + + +def _get_model_dir(dirname): + model_dir = os.path.join(dirname, MODEL_DIR) + _make_chekcpoint_dirs(model_dir) + return model_dir + + +def _get_lookuptable_dir(dirname): + lookuptable_dir = os.path.join(dirname, LOOKUP_TABLE_DIR) + _make_chekcpoint_dirs(lookuptable_dir) + return lookuptable_dir + + +def _get_trainer_dir(dirname, trainer_id): + trainer_folder = TRAINER_PREFIX + CHECKPOINT_SEPARATOR + str(trainer_id) + trainer_dir = os.path.join(dirname, trainer_folder) + _make_chekcpoint_dirs(trainer_dir) + return trainer_dir + + +def _scroll_delete(dirname, max_num_checkpoints=3): + dirs = os.listdir(dirname) + serial_map = {} + for serial in dirs: + serial_num = _get_dir_serial(serial) + serial_map[serial_num] = serial + + if len(list(serial_map.keys())) <= max_num_checkpoints: + return + + serials = list(serial_map.keys()) + serials.sort(reverse=True) + serials = serials[max_num_checkpoints:] + for serial in serials: + cur_dir = _get_serial_dir(dirname, serial) + try: + shutil.rmtree(cur_dir) + except OSError as err: + if err.errno != errno.ENOENT: + raise err + + +def _write_success(dirname): + """ + write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct. + + : param dirname + """ + success_file = os.path.join(dirname, SUCCESS_MARK_FILENAME) + with open(success_file, 'a') as f: + now = time.ctime() + f.write(now) + + +def _get_latest_checkpoint_serial(checkpoint_dir): + """ + get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory + + : param checkpoint_dir + """ + if not checkpoint_dir: + return -1 + + def has_success(checkpoint_dir, cur_dir): + """ + is _SUCCESS in this dir + """ + + serial = _get_dir_serial(cur_dir) + if serial == -1 or not os.path.isdir( + os.path.join(checkpoint_dir, cur_dir)): + return -1 + + success_path = os.path.join( + _get_serial_dir(checkpoint_dir, serial), MODEL_DIR, + SUCCESS_MARK_FILENAME) + if os.path.isfile(success_path): + return serial + + if not os.path.isdir(checkpoint_dir): + return -1 + + current_dir = -1 + dirs = os.listdir(checkpoint_dir) + for cur_dir in dirs: + success_num = has_success(checkpoint_dir, cur_dir) + if success_num > current_dir: + current_dir = success_num + return current_dir diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index d7e5e4704858c08a21a2e7505facb63a93a4c010..fc61bcbea66de07350ee778abb16e81f8f8bc8db 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -38,7 +38,6 @@ from . import unique_name __all__ = [ 'Program', 'Operator', - 'Parameter', 'default_startup_program', 'default_main_program', 'program_guard', @@ -489,7 +488,8 @@ class OpProtoHolder(object): def generated_op_attr_names(): return { core.op_proto_and_checker_maker.kOpRoleAttrName(), - core.op_proto_and_checker_maker.kOpRoleVarAttrName() + core.op_proto_and_checker_maker.kOpRoleVarAttrName(), + core.op_proto_and_checker_maker.kOpNameScopeAttrName() } diff --git a/python/paddle/fluid/inferencer.py b/python/paddle/fluid/inferencer.py index a9b94a20720615dbfca97749463f27dbc88ac64f..7bdd430f985bd0b3818f6ef305ce2d7d8976106b 100644 --- a/python/paddle/fluid/inferencer.py +++ b/python/paddle/fluid/inferencer.py @@ -12,101 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function - -import contextlib - -from . import core - -from . import executor -from . import framework -from . import io -from . import parallel_executor -from . import unique_name -from .trainer import check_and_get_place - -__all__ = ['Inferencer', ] - - -class Inferencer(object): - """ - Inferencer High Level API. - - Args: - infer_func (Python func): Infer function that will return predict Variable - param_path (str): The path where the inference model is saved by fluid.io.save_params - place (Place): place to do the inference - parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU. - - Examples: - .. code-block:: python - - def inference_program(): - x = fluid.layers.data(name='x', shape=[13], dtype='float32') - y_predict = fluid.layers.fc(input=x, size=1, act=None) - return y_predict - - place = fluid.CPUPlace() - inferencer = fluid.Inferencer( - infer_func=inference_program, param_path="/tmp/model", place=place) - - """ - - def __init__(self, infer_func, param_path, place=None, parallel=False): - self.param_path = param_path - self.scope = core.Scope() - self.parallel = parallel - self.place = check_and_get_place(place) - - self.inference_program = framework.Program() - with framework.program_guard(self.inference_program): - with unique_name.guard(): - self.predict_var = infer_func() - - with self._prog_and_scope_guard(): - # load params from param_path into scope - io.load_params(executor.Executor(self.place), param_path) - - if parallel: - with self._prog_and_scope_guard(): - self.exe = parallel_executor.ParallelExecutor( - use_cuda=isinstance(self.place, core.CUDAPlace), - loss_name=self.predict_var.name) - else: - self.exe = executor.Executor(self.place) - - self.inference_program = self.inference_program.clone(for_test=True) - - def infer(self, inputs, return_numpy=True): - """ - Do Inference for Inputs - - Args: - inputs (map): a map of {"input_name": input_var} that will be feed into the inference program - return_numpy (bool): transform return value into numpy or not - - Returns: - Tensor or Numpy: the predict value of the inference model for the inputs - - Examples: - .. code-block:: python - - tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") - results = inferencer.infer({'x': tensor_x}) - """ - if not isinstance(inputs, dict): - raise ValueError( - "inputs should be a map of {'input_name': input_var}") - - with self._prog_and_scope_guard(): - results = self.exe.run(feed=inputs, - fetch_list=[self.predict_var.name], - return_numpy=return_numpy) - - return results - - @contextlib.contextmanager - def _prog_and_scope_guard(self): - with framework.program_guard(main_program=self.inference_program): - with executor.scope_guard(self.scope): - yield +# NOTE: inferencer is moved into fluid.contrib.inferencer. +__all__ = [] diff --git a/python/paddle/fluid/layers/layer_function_generator.py b/python/paddle/fluid/layers/layer_function_generator.py index 8963d74de014d69c590276d5ff7080111f614230..8c11921d9bde0920f33368837302d39f36f45556 100644 --- a/python/paddle/fluid/layers/layer_function_generator.py +++ b/python/paddle/fluid/layers/layer_function_generator.py @@ -23,7 +23,10 @@ from ..proto import framework_pb2 from ..framework import OpProtoHolder, Variable from ..layer_helper import LayerHelper -__all__ = ['deprecated', 'generate_layer_fn', 'autodoc', 'templatedoc'] +__all__ = [ + 'deprecated', 'generate_layer_fn', 'generate_layer_fn_noattr', 'autodoc', + 'templatedoc' +] def _convert_(name): @@ -58,7 +61,7 @@ def escape_math(text): _two_dollar_pattern_.sub(r"!!\1!!", text))) -def _generate_doc_string_(op_proto): +def _generate_doc_string_(op_proto, additional_args_lines=None): """ Generate docstring by OpProto @@ -98,6 +101,13 @@ def _generate_doc_string_(op_proto): buf.write(escape_math(each_attr.comment)) buf.write('\n') + if additional_args_lines is not None: + for line in additional_args_lines: + line = line.strip() + buf.write(' ') + buf.write(line) + buf.write('\n') + if len(op_proto.outputs) != 0: buf.write('\nReturns:\n') buf.write(' ') @@ -205,6 +215,29 @@ def generate_layer_fn(op_type): return func +def generate_layer_fn_noattr(op_type): + """Register the Python layer for an Operator without Attribute. + + Args: + op_type: The name of the operator to be created. + + This function takes in the operator type (sigmoid, exp , tanh etc) and + creates the operator functionality. + + """ + op_proto = OpProtoHolder.instance().get_op_proto(op_type) + + def func(x, name=None): + helper = LayerHelper(op_type, **locals()) + output = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op(type=op_type, inputs={"X": x}, outputs={"Out": output}) + return output + + func.__name__ = op_type + func.__doc__ = _generate_doc_string_(op_proto) + return func + + def deprecated(func_or_class): """ Deprecated warning decorator. It will result a warning message. diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 2b947ca9e87af2a0a7b224cb55f3409e17118bed..dfd801a098d6451dbdb20d9ba44187d1e3f8a91a 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -68,7 +68,7 @@ def noam_decay(d_model, warmup_steps): a = global_step**-0.5 b = (warmup_steps**-1.5) * global_step - lr_value = (d_model**-0.5) * ops.elementwise_min(a, b) + lr_value = (d_model**-0.5) * nn.elementwise_min(a, b) return lr_value @@ -241,7 +241,7 @@ def polynomial_decay(learning_rate, else: decay_steps_var = tensor.fill_constant( shape=[1], dtype='float32', value=float(decay_steps)) - global_step = ops.elementwise_min(x=global_step, y=decay_steps_var) + global_step = nn.elementwise_min(x=global_step, y=decay_steps_var) decayed_lr = (learning_rate - end_learning_rate) * \ ((1 - global_step / decay_steps) ** power) + end_learning_rate diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index f896cfa04b3e0d89daaa1bd7fd893b5892a09a4e..6e0f3de4141aff3efb98b6d1162823a13f83a7bb 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -20,9 +20,9 @@ from __future__ import print_function import numpy as np from ..layer_helper import LayerHelper from ..initializer import Normal, Constant -from ..framework import Variable +from ..framework import Variable, OpProtoHolder from ..param_attr import ParamAttr -from .layer_function_generator import autodoc, templatedoc +from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_ from .tensor import concat from . import utils from .. import unique_name @@ -107,7 +107,16 @@ __all__ = [ 'log', 'crop', 'rank_loss', + 'elu', + 'relu6', + 'pow', + 'stanh', + 'hard_sigmoid', + 'swish', 'prelu', + 'brelu', + 'leaky_relu', + 'soft_relu', 'flatten', 'sequence_mask', 'stack', @@ -116,6 +125,14 @@ __all__ = [ 'sequence_enumerate', 'expand', 'sequence_concat', + 'scale', + 'elementwise_add', + 'elementwise_div', + 'elementwise_sub', + 'elementwise_mul', + 'elementwise_max', + 'elementwise_min', + 'elementwise_pow', ] @@ -3605,7 +3622,7 @@ def matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None): attrs={ 'transpose_X': transpose_x, 'transpose_Y': transpose_y, - 'alpha': alpha, + 'alpha': float(alpha), }) return out @@ -5895,6 +5912,148 @@ def pad2d(input, return out +@templatedoc() +def elu(x, alpha=1.0, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + alpha(${alpha_type}|1.0): ${alpha_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('elu', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='elu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'alpha': alpha}) + return out + + +@templatedoc() +def relu6(x, threshold=6.0, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + threshold(${threshold_type}|6.0): ${threshold_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('relu6', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='relu6', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold}) + return out + + +@templatedoc() +def pow(x, factor=1.0, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + factor(${factor_type}|1.0): ${factor_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('pow', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='pow', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'factor': factor}) + return out + + +@templatedoc() +def stanh(x, scale_a=2.0 / 3.0, scale_b=1.7159, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + scale_a(${scale_a_type}|2.0 / 3.0): ${scale_a_comment} + scale_b(${scale_b_type}|1.7159): ${scale_b_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('stanh', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='stanh', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'scale_a': scale_a, + 'scale_b': scale_b}) + return out + + +@templatedoc() +def hard_sigmoid(x, slope=0.2, offset=0.5, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + slope(${slope_type}|0.2): ${slope_comment} + offset(${offset_type}|0.5): ${offset_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('hard_sigmoid', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='hard_sigmoid', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'slope': slope, + 'offset': offset}) + return out + + +@templatedoc() +def swish(x, beta=1.0, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + beta(${beta_type}|1.0): ${beta_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('swish', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='swish', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'slope': beta}) + return out + + def prelu(x, mode, param_attr=None, name=None): """ Equation: @@ -5948,6 +6107,74 @@ def prelu(x, mode, param_attr=None, name=None): return out +@templatedoc() +def brelu(x, t_min=0.0, t_max=24.0, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + t_min(${t_min_type}|0.0): ${t_min_comment} + t_max(${t_max_type}|24.0): ${t_max_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('brelu', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='brelu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'t_min': t_min, + 't_max': t_max}) + return out + + +@templatedoc() +def leaky_relu(x, alpha=0.02, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + alpha(${alpha_type}|0.02): ${alpha_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('leaky_relu', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='leaky_relu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'alpha': alpha}) + return out + + +@templatedoc() +def soft_relu(x, threshold=40.0, name=None): + """ + ${comment} + Args: + x(${x_type}): ${x_comment} + threshold(${threshold_type}|40.0): ${threshold_comment} + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + Returns: + output(${out_type}): ${out_comment} + """ + helper = LayerHelper('soft_relu', **locals()) + out = helper.create_tmp_variable(dtype=x.dtype) + helper.append_op( + type='soft_relu', + inputs={'X': x}, + outputs={'Out': out}, + attrs={'threshold': threshold}) + return out + + def flatten(x, axis=1, name=None): """ **Flatten layer** @@ -6234,3 +6461,105 @@ def expand(x, expand_times, name=None): outputs={'Out': out}, attrs={'expand_times': expand_times}) return out + + +def _elementwise_op(helper): + op_type = helper.layer_type + x = helper.kwargs.get('x', None) + y = helper.kwargs.get('y', None) + assert x is not None, 'x cannot be None in {}'.format(op_type) + assert y is not None, 'y cannot be None in {}'.format(op_type) + axis = helper.kwargs.get('axis', -1) + use_mkldnn = helper.kwargs.get('use_mkldnn', False) + name = helper.kwargs.get('name', None) + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) + + helper.append_op( + type=op_type, + inputs={'X': x, + 'Y': y}, + outputs={'Out': out}, + attrs={'axis': axis, + 'use_mkldnn': use_mkldnn}) + return helper.append_activation(out) + + +@templatedoc() +def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): + """ + ${comment} + + Args: + x(${x_type}): ${x_comment} + scale(${scale_type}): ${scale_comment} + bias(${bias_type}): ${bias_comment} + bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment} + act(basestring|None): Activation applied to the output. + name(basestring|None): Name of the output. + + Returns: + out(${out_type}): ${out_comment} + """ + + helper = LayerHelper('scale', **locals()) + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) + + helper.append_op( + type='scale', + inputs={'X': x}, + outputs={'Out': out}, + attrs={ + 'scale': float(scale), + 'bias': float(bias), + 'bias_after_scale': bias_after_scale + }) + return helper.append_activation(out) + + +def elementwise_add(x, y, axis=-1, use_mkldnn=False, act=None, name=None): + return _elementwise_op(LayerHelper('elementwise_add', **locals())) + + +def elementwise_div(x, y, axis=-1, use_mkldnn=False, act=None, name=None): + return _elementwise_op(LayerHelper('elementwise_div', **locals())) + + +def elementwise_sub(x, y, axis=-1, use_mkldnn=False, act=None, name=None): + return _elementwise_op(LayerHelper('elementwise_sub', **locals())) + + +def elementwise_mul(x, y, axis=-1, use_mkldnn=False, act=None, name=None): + return _elementwise_op(LayerHelper('elementwise_mul', **locals())) + + +def elementwise_max(x, y, axis=-1, use_mkldnn=False, act=None, name=None): + return _elementwise_op(LayerHelper('elementwise_max', **locals())) + + +def elementwise_min(x, y, axis=-1, use_mkldnn=False, act=None, name=None): + return _elementwise_op(LayerHelper('elementwise_min', **locals())) + + +def elementwise_pow(x, y, axis=-1, use_mkldnn=False, act=None, name=None): + return _elementwise_op(LayerHelper('elementwise_pow', **locals())) + + +for func in [ + elementwise_add, elementwise_div, elementwise_sub, elementwise_mul, + elementwise_max, elementwise_min, elementwise_pow +]: + op_proto = OpProtoHolder.instance().get_op_proto(func.__name__) + func.__doc__ = _generate_doc_string_( + op_proto, + additional_args_lines=[ + "act (basestring|None): Activation applied to the output.", + "name (basestring|None): Name of the output." + ]) diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 129252653dc139b7405626e6fd410704a4ad06d9..7867bfe00e25711643eab1ab8d0141dbbad3da52 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -13,9 +13,9 @@ # limitations under the License. from __future__ import print_function -from .layer_function_generator import generate_layer_fn +from .layer_function_generator import generate_layer_fn, generate_layer_fn_noattr -__activations__ = [ +__activations_noattr__ = [ 'sigmoid', 'logsigmoid', 'exp', @@ -33,29 +33,12 @@ __activations__ = [ 'square', 'softplus', 'softsign', - 'brelu', - 'leaky_relu', - 'soft_relu', - 'elu', - 'relu6', - 'pow', - 'stanh', - 'hard_sigmoid', - 'swish', ] __all__ = [ 'mean', 'mul', - 'scale', 'sigmoid_cross_entropy_with_logits', - 'elementwise_add', - 'elementwise_div', - 'elementwise_sub', - 'elementwise_mul', - 'elementwise_max', - 'elementwise_min', - 'elementwise_pow', 'clip', 'clip_by_norm', 'logical_and', @@ -70,11 +53,21 @@ __all__ = [ 'slice', 'shape', 'maxout', -] + __activations__ +] for _OP in set(__all__): globals()[_OP] = generate_layer_fn(_OP) +# It is a hot fix in some unittest using: +# fluid.layers.scale(x=x, scale=10.0, out=out_var) +# e.g.: test_program_code.py, test_dist_train.py +globals()['_scale'] = generate_layer_fn('scale') + +__all__ += __activations_noattr__ + +for _OP in set(__activations_noattr__): + globals()[_OP] = generate_layer_fn_noattr(_OP) + __all__ += ["uniform_random"] _uniform_random_ = generate_layer_fn('uniform_random') diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index 051fe84364639ca6028326c0cb02b204a02531af..06513801dd8b34d366f9632f6943c8046872c31b 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -21,6 +21,7 @@ __all__ = [ "sequence_conv_pool", "glu", "scaled_dot_product_attention", + "img_conv_group", ] diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py index e1368a3392a9cab3e82eff0a73eb225a52aa03bf..87f3b7502e26d3e6a437985f99d7897b060e101e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import contextlib import numpy import unittest @@ -57,11 +67,11 @@ def optimizer_func(): def train(use_cuda, train_program, params_dirname, inference_model_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_func) def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): if event.step == 10: test_metrics = trainer.test( reader=test_reader, feed_order=['x', 'y']) @@ -91,7 +101,7 @@ def infer(use_cuda, inference_program, params_dirname=None): return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 10 diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py index de276755bb1eb2746cc780575a40357255223809..d744a00242422defb360590b193e07c6f811dcb9 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py @@ -14,11 +14,22 @@ from __future__ import print_function +import sys + import paddle import paddle.fluid as fluid + +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.core as core import numpy -import six import os import cifar10_small_test_set @@ -106,7 +117,7 @@ def train(use_cuda, train_program, parallel, params_dirname): paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False) def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): avg_cost, accuracy = trainer.test( reader=test_reader, feed_order=['pixel', 'label']) @@ -118,7 +129,7 @@ def train(use_cuda, train_program, parallel, params_dirname): return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, optimizer_func=optimizer_func, place=place, @@ -133,7 +144,7 @@ def train(use_cuda, train_program, parallel, params_dirname): def infer(use_cuda, inference_program, parallel, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place, diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py index dd547f3448ae55c07b6c09f9de4ac08d8ec5ee88..82294d4b26fe64e6cddc81f9ba3480caf5b51620 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py @@ -14,11 +14,22 @@ from __future__ import print_function +import sys + import paddle import paddle.fluid as fluid + +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.core as core import numpy -import six import os import cifar10_small_test_set @@ -83,7 +94,7 @@ def train(use_cuda, train_program, parallel, params_dirname): paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False) def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): avg_cost, accuracy = trainer.test( reader=test_reader, feed_order=['pixel', 'label']) @@ -95,7 +106,7 @@ def train(use_cuda, train_program, parallel, params_dirname): return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_func, @@ -110,7 +121,7 @@ def train(use_cuda, train_program, parallel, params_dirname): def infer(use_cuda, inference_program, parallel, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place, diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py index ec4e1c768c7f2a2421ac409a2eecc0100c086a6a..9e155a59145db88dab27576a4a67a5d450bcfc9d 100755 --- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import numpy as np WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict() @@ -149,7 +159,7 @@ def optimize_func(): def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimize_func) feed_order = [ @@ -164,7 +174,7 @@ def train(use_cuda, train_program, params_dirname): # place) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.conll05.test(), batch_size=BATCH_SIZE) avg_cost_set = trainer.test( @@ -184,7 +194,7 @@ def train(use_cuda, train_program, params_dirname): if math.isnan(float(avg_cost)): sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): + elif isinstance(event, EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, list(map(np.array, event.metrics)))) if event.step == 1: # Run 2 iterations to speed CI @@ -204,7 +214,7 @@ def train(use_cuda, train_program, params_dirname): def infer(use_cuda, inference_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( inference_program, param_path=params_dirname, place=place) # Setup input by creating LoDTensor to represent sequence of words. diff --git a/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py b/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py index 560f1189581f631dc6a3470cf8f22f902ca26f26..b597dcf801dc5ad4b5957875634018cfdcd0b83b 100644 --- a/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py @@ -13,17 +13,28 @@ # limitations under the License. from __future__ import print_function + import contextlib +import sys import numpy as np import paddle import paddle.fluid as fluid + +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.framework as framework import paddle.fluid.layers as pd from paddle.fluid.executor import Executor from functools import partial import unittest -import os dict_size = 30000 source_dict_dim = target_dict_dim = dict_size @@ -198,12 +209,12 @@ def train(use_cuda, is_sparse, is_local=True): ] def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): print('pass_id=' + str(event.epoch) + ' batch=' + str(event.step)) if event.step == 10: trainer.stop() - trainer = fluid.Trainer( + trainer = Trainer( train_func=partial(train_program, is_sparse), place=place, optimizer_func=optimizer_func) diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py index 973308498bec3cddde2ef651751ad5d0c9f84503..ce183883e3bddd8633dd9c393ee358ba6210ea61 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py @@ -14,14 +14,22 @@ from __future__ import print_function -import argparse +import sys + import paddle.fluid as fluid + +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.core as core import paddle -import six -import sys import numpy -import unittest import math import sys import os @@ -68,14 +76,14 @@ def optimizer_func(): def train(use_cuda, train_program, parallel, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_func, parallel=parallel) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) avg_cost, acc = trainer.test( @@ -91,7 +99,7 @@ def train(use_cuda, train_program, parallel, params_dirname): event.epoch + 1, avg_cost, acc)) if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): + elif isinstance(event, EndStepEvent): print( ("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, @@ -112,7 +120,7 @@ def train(use_cuda, train_program, parallel, params_dirname): def infer(use_cuda, inference_program, parallel, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place, diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py index cb4aeb430e1a9662a183084c0cdacc41c5a8ec11..45a5ff34af00f2dbe69bd4f08a50626d6ca814f8 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py @@ -14,14 +14,22 @@ from __future__ import print_function -import argparse +import sys + import paddle.fluid as fluid + +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.core as core import paddle -import six -import sys import numpy -import unittest import math import sys import os @@ -55,14 +63,14 @@ def optimizer_func(): def train(use_cuda, train_program, params_dirname, parallel): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_func, parallel=parallel) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) avg_cost, acc = trainer.test( @@ -94,7 +102,7 @@ def train(use_cuda, train_program, params_dirname, parallel): def infer(use_cuda, inference_program, parallel, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place, diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py index 9e2767783bb6748cfc8f95567627068d7532a8c8..82193737967b2bebdd17cef8752eeb9cec2e85ce 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py @@ -19,6 +19,16 @@ import sys import numpy as np import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.layers as layers import paddle.fluid.nets as nets @@ -164,7 +174,7 @@ def optimizer_func(): def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_func) feed_order = [ @@ -173,7 +183,7 @@ def train(use_cuda, train_program, params_dirname): ] def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): test_reader = paddle.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) avg_cost_set = trainer.test( @@ -208,7 +218,7 @@ def train(use_cuda, train_program, params_dirname): def infer(use_cuda, inference_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( inference_program, param_path=params_dirname, place=place) # Use the first data from paddle.dataset.movielens.test() as input. diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py index 097c2a468fca558106aba2f24c332256189d9076..14719774b9d90c2e96d8f6134469502241a5f1f2 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * from functools import partial import numpy as np @@ -72,13 +82,13 @@ def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - trainer = fluid.Trainer( + trainer = Trainer( train_func=partial(train_program, word_dict), place=place, optimizer_func=optimizer_func) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) avg_cost, acc = trainer.test( @@ -96,7 +106,7 @@ def train(use_cuda, train_program, params_dirname): event.epoch + 1, avg_cost, acc)) if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): + elif isinstance(event, EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, list(map(np.array, event.metrics)))) if event.step == 1: # Run 2 iterations to speed CI @@ -119,7 +129,7 @@ def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=partial(inference_program, word_dict), param_path=params_dirname, place=place) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py index 5f74cd142590abb93f8846bc831a9f5e3dd2f311..62fbba6fe1a62da6a93d50abc074bf5d794cf458 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * from functools import partial import numpy as np @@ -87,13 +97,13 @@ def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - trainer = fluid.Trainer( + trainer = Trainer( train_func=partial(train_program, word_dict), place=place, optimizer_func=optimizer_func) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) avg_cost, acc = trainer.test( @@ -111,7 +121,7 @@ def train(use_cuda, train_program, params_dirname): event.epoch + 1, avg_cost, acc)) if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): + elif isinstance(event, EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, list(map(np.array, event.metrics)))) if event.step == 1: # Run 2 iterations to speed CI @@ -134,7 +144,7 @@ def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=partial(inference_program, word_dict), param_path=params_dirname, place=place) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py index 284a6ca168636377699c287236c491352566909b..7523ad3fef17f61b1bde1fc687761cc6b86c3d9e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * from functools import partial import numpy as np @@ -79,13 +89,13 @@ def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - trainer = fluid.Trainer( + trainer = Trainer( train_func=partial(train_program, word_dict), place=place, optimizer_func=optimizer_func) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE, @@ -105,7 +115,7 @@ def train(use_cuda, train_program, params_dirname): event.epoch + 1, avg_cost, acc)) if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): + elif isinstance(event, EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, list(map(np.array, event.metrics)))) if event.step == 1: # Run 2 iterations to speed CI @@ -129,7 +139,7 @@ def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=partial(inference_program, word_dict), param_path=params_dirname, place=place) diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py index 1c7cf3199a07c3f65d967eda70a481b1bd1b1638..e4c0cc5429d3fe891034161d90fadfa9dd078b0b 100644 --- a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py +++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import numpy as np import math import sys @@ -95,7 +105,7 @@ def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): outs = trainer.test( reader=test_reader, feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw']) @@ -109,7 +119,7 @@ def train(use_cuda, train_program, params_dirname): if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, optimizer_func=optimizer_func, place=place) trainer.train( @@ -121,7 +131,7 @@ def train(use_cuda, train_program, params_dirname): def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place) # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word diff --git a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py index 7c3ed0916845d0a0cc0c462ff00927b91f546b21..c2b089694ea2f329e67ad6c50def26caa454720e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/test_dist_se_resnext.py @@ -25,13 +25,14 @@ class TestDistSeResneXt2x2(TestDistBase): self.check_with_place("dist_se_resnext.py", delta=1e-7) -class TestDistseResnXt2x2WithMemopt(TestDistBase): - def _setup_config(self): - self._sync_mode = True - self._mem_opt = True - - def test_dist_train(self): - self.check_with_place("dist_se_resnext.py", delta=1e-7) +# TODO(typhoonzero): fix this test +# class TestDistseResnXt2x2WithMemopt(TestDistBase): +# def _setup_config(self): +# self._sync_mode = True +# self._mem_opt = True + +# def test_dist_train(self): +# self.check_with_place("dist_se_resnext.py", delta=1e-7) class TestDistSeResneXt2x2Async(TestDistBase): diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py index 083525ccf54d389b60c4aaa9f8c6223f07c773cd..d0875d9ea442d0e88dfd958e5948b26225416df2 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_train.py +++ b/python/paddle/fluid/tests/unittests/test_dist_train.py @@ -27,6 +27,7 @@ import paddle.fluid.layers as layers from paddle.fluid.layers.io import ListenAndServ from paddle.fluid.layers.io import Recv from paddle.fluid.layers.io import Send +import paddle.fluid.layers.ops as ops from paddle.fluid import core @@ -89,7 +90,7 @@ class TestSendOp(unittest.TestCase): name="X", append_batch_size=False) fluid.initializer.Constant(value=1.0)(x, main.global_block()) - layers.scale(x=x, scale=10.0, out=out_var) + ops._scale(x=x, scale=10.0, out=out_var) self.server_exe = fluid.Executor(place) self.server_exe.run(main) diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index a198b25520f97ce23b9c1ebb9cd82fc458222d73..1b7f0745cd758a7534dec9f48132e9cb44a994e6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -659,5 +659,25 @@ class TestLoadSliceVar(TranspilerTest): pserver2._slice_vars_and_attrs[idx][2].shape)) +class TestNCCL2Transpile(TranspilerTest): + def test_nccl2_transpile(self): + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + self.net_conf() + + config = fluid.DistributeTranspilerConfig() + config.mode = "nccl2" + t = fluid.DistributeTranspiler(config=config) + t.transpile( + 0, + trainers="127.0.0.1:6174,127.0.0.1:6175", + current_endpoint="127.0.0.1:6174", + startup_program=startup) + print([op.type for op in startup.global_block().ops]) + self.assertEqual(startup.global_block().ops[-1].type, "gen_nccl_id") + self.assertIsNotNone(startup.global_block().vars.get("NCCLID")) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 6855a0e2c0437429fe35b1dd97188716945a743b..f474cdae2054531d44724e0e3e0e58a35fb8ddcd 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -573,6 +573,158 @@ class TestBook(unittest.TestCase): self.assertIsNotNone(out) print(str(program)) + def test_brelu(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.brelu(input, t_min=1.0, t_max=20.0, name='brelu') + self.assertIsNotNone(out) + print(str(program)) + + def test_leaky_relu(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.leaky_relu(input, alpha=0.1, name='leaky_relu') + self.assertIsNotNone(out) + print(str(program)) + + def test_soft_relu(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.soft_relu(input, threshold=30.0, name='soft_relu') + self.assertIsNotNone(out) + print(str(program)) + + def test_sigmoid(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.sigmoid(input, name='sigmoid') + self.assertIsNotNone(out) + print(str(program)) + + def test_logsigmoid(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.logsigmoid(input, name='logsigmoid') + self.assertIsNotNone(out) + print(str(program)) + + def test_exp(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.exp(input, name='exp') + self.assertIsNotNone(out) + print(str(program)) + + def test_tanh(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.tanh(input, name='tanh') + self.assertIsNotNone(out) + print(str(program)) + + def test_tanh_shrink(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.tanh_shrink(input, name='tanh_shrink') + self.assertIsNotNone(out) + print(str(program)) + + def test_sqrt(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.sqrt(input, name='sqrt') + self.assertIsNotNone(out) + print(str(program)) + + def test_abs(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.abs(input, name='abs') + self.assertIsNotNone(out) + print(str(program)) + + def test_ceil(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.ceil(input, name='ceil') + self.assertIsNotNone(out) + print(str(program)) + + def test_floor(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.floor(input, name='floor') + self.assertIsNotNone(out) + print(str(program)) + + def test_cos(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.cos(input, name='cos') + self.assertIsNotNone(out) + print(str(program)) + + def test_sin(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.sin(input, name='sin') + self.assertIsNotNone(out) + print(str(program)) + + def test_round(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.round(input, name='round') + self.assertIsNotNone(out) + print(str(program)) + + def test_reciprocal(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.reciprocal(input, name='reciprocal') + self.assertIsNotNone(out) + print(str(program)) + + def test_square(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.square(input, name='square') + self.assertIsNotNone(out) + print(str(program)) + + def test_softplus(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.softplus(input, name='softplus') + self.assertIsNotNone(out) + print(str(program)) + + def test_softsign(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.softsign(input, name='softsign') + self.assertIsNotNone(out) + print(str(program)) + def test_roi_perspective_transform(self): program = Program() with program_guard(program): @@ -606,6 +758,14 @@ class TestBook(unittest.TestCase): out = layers.expand(x, [1, 2]) print(str(program)) + def test_softshrink(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.softshrink(input, name='softshrink') + self.assertIsNotNone(out) + print(str(program)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_program_code.py b/python/paddle/fluid/tests/unittests/test_program_code.py index e9c2b928617dce3904ca119896ca81454256e82e..27b22ba9392b63c0ccd7904ff03d737b977cc9fc 100644 --- a/python/paddle/fluid/tests/unittests/test_program_code.py +++ b/python/paddle/fluid/tests/unittests/test_program_code.py @@ -25,6 +25,7 @@ import paddle.fluid.layers as layers from paddle.fluid.layers.io import ListenAndServ from paddle.fluid.layers.io import Recv from paddle.fluid.layers.io import Send +import paddle.fluid.layers.ops as ops from paddle.fluid.transpiler.details import program_to_code @@ -52,7 +53,7 @@ class TestProgram2Code(unittest.TestCase): name="X", append_batch_size=False) fluid.initializer.Constant(value=1.0)(x, main.global_block()) - layers.scale(x=x, scale=10.0, out=out_var) + ops._scale(x=x, scale=10.0, out=out_var) program_to_code(main) diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py index 30cdfe4ad2c9892184862b70ff49417ce5a08516..b495b6699b5d02ca8c466c984820be5c497d626e 100644 --- a/python/paddle/fluid/trainer.py +++ b/python/paddle/fluid/trainer.py @@ -12,1247 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function - -import contextlib -import os -import errno -import shutil -import six -import time - -from . import core -from . import data_feeder -from . import executor -from . import framework -from . import io -# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module -from . import optimizer as opt_module -from . import parallel_executor -from .transpiler import distribute_transpiler - -__all__ = [ - 'Trainer', 'BeginEpochEvent', 'EndEpochEvent', 'BeginStepEvent', - 'EndStepEvent', 'CheckpointConfig' -] - - -class BeginEpochEvent(object): - """ - The begin of a training epoch. - - Args: - epoch_id(int): The current epoch ID. - """ - - def __init__(self, epoch_id): - self.epoch = epoch_id - - -class EndEpochEvent(object): - """ - The end of a training epoch. - - Args: - epoch_id(int): The current epoch ID. - """ - - def __init__(self, epoch_id): - self.epoch = epoch_id - - -class BeginStepEvent(object): - """ - The begin of a training epoch. - - Args: - epoch_id(int): The current epoch ID. - step_id(int): The current step ID. - """ - - def __init__(self, epoch_id, step_id): - self.epoch = epoch_id - self.step = step_id - self.fetch_metrics = True - """ - If fetch_metrics is true, the metrics will be fetched at the - EndStepEvent. Default is True. - """ - - -class EndStepEvent(object): - """ - The end of a training step. - - Args: - epoch_id(int): The current epoch ID. - step_id(int): The current step ID. - metrics(list): A list of fetched tensor. The order of this list is same - as the :code:`train_func` returns. - """ - - def __init__(self, epoch_id, step_id, metrics): - self.epoch = epoch_id - self.step = step_id - self.metrics = metrics - - -class CheckpointConfig(object): - """ - Parameter object for :code:`save_checkpoint` and - :code:`fluid.Trainer`. Used to configuration how to save checkpoint. - - Args: - checkpoint_dir(str): Directory path to save check point. Default is the - current directory. - - max_num_checkpoints(int): The max number of local check points. - epoch_interval(int): Every number of epoch to save check point. - step_interval(int): Every number of step to save check point. - - Examples: - >>> config = fluid.CheckpointConfig("./checkpoints") - >>> trainer = fluid.Trainer(train_func=train_program, - >>> place=place, - >>> optimizer_func=optimizer_func, - >>> checkpoint_config=config) - >>> trainer.train(...) - """ - - def __init__(self, - checkpoint_dir=None, - max_num_checkpoints=3, - epoch_interval=1, - step_interval=10): - - assert epoch_interval >= 1 - assert step_interval >= 1 - - self.checkpoint_dir = checkpoint_dir \ - if checkpoint_dir is not None else os.getcwd() - self.max_num_checkpoints = max_num_checkpoints - self.epoch_interval = epoch_interval - self.step_interval = step_interval - self.epoch_id = 0 - self.step_id = 0 - self.load_serial = None - self.pserver_id = None - self.lookup_table_name = None - - -def check_and_get_place(place): - """ - Check the type of place or get the default place - Args: - place(None|core.CUDAPlace|core.CPUPlace): the place that trainer will be executed on. - - Raises: - TypeError if the type mismatched. - - Returns: - the original place if it is not None. - if fluid is compiled with CUDA, returns CUDAPlace(0) by default. - Otherwise returns CPUPlace by default. - """ - if place is None: - if core.is_compiled_with_cuda(): - return core.CUDAPlace(0) - else: - return core.CPUPlace() - else: - if not isinstance(place, core.CUDAPlace) and not isinstance( - place, core.CPUPlace): - raise TypeError("Place should be either CUDAPlace or CPUPlace") - return place - - -class Trainer(object): - """ - A trainer wraps MultiGPU/MultiNode training loops and can be used to train a - simple neural network easily. - - This API takes a :code:`train_func`. A :code:`train_func` is a function that - return loss as it first return value. The reset value can be fetched by - EndStepEvent.metrics - - This API also takes a :code:`optimizer_func` that will return an optimizer - instance. - - For example, to train a MLP for MNIST dataset, the sample program is - - >>> import paddle.fluid as fluid - >>> - >>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10): - >>> hidden = image - >>> for layer_size in layer_sizes: - >>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation) - >>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax") - >>> - >>> def train_mnist_mlp(): - >>> img = fluid.layers.data(name='image', shape=[784]) - >>> label = fluid.layers.data(name='label', shape=[1], dtype='int64') - >>> prediction = mlp(img) - >>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label)) - >>> - >>> def optimizer(): - >>> return fluid.optimizer.Adam() - >>> - >>> trainer = Trainer(train_func=train_mnist_mlp, - >>> optimizer_func=optimizer, - >>> place=fluid.CUDAPlace(0), - >>> parallel=True) - >>> - >>> def train_callback(event): - >>> if isinstance(event, fluid.EndStepEvent): - >>> print "Epoch ID", event.epoch, "Step ID",\ - >>> event.step, "AvgLoss", event.metrics[0] - >>> elif isinstance(event, fluid.EndEpochEvent): - >>> trainer.save_params("./model_{0}".format(event.epoch)) - >>> - >>> trainer.train(num_epochs=100, event_handler=train_callback) - - For more example, please see :ref:`api_guide_high_level_api`. - - - Args: - train_func(callable): A function which will return loss. The loss must be - a scalar tensor. - optimizer_func(callable): A function that returns an Optimizer object. - place(CUDAPlace|CPUPlace): The device place of this trainer. If - :code:`parallel=True,` all CUDA Places will be used if :code:`place` - is a :code:`CUDAPlace`. - parallel(bool): True if use multiple devices. - checkpoint_config(CheckpointConfig): Configuration about how to save - checkpoints. - """ - - def __init__(self, - train_func, - optimizer_func, - param_path=None, - place=None, - parallel=False, - checkpoint_config=None): - self.__stop = False - self.parallel = parallel - - # config for checkpoint - # only chief worker will save variables - self.trainer_id = 0 - self.checkpoint_cfg = checkpoint_config - if self.checkpoint_cfg: - assert isinstance(self.checkpoint_cfg, CheckpointConfig) - serial = _get_latest_checkpoint_serial( - self.checkpoint_cfg.checkpoint_dir) - self.checkpoint_cfg.load_serial = serial if serial >= 0 else None - - self.scope = core.Scope() - - # 1. we need to generate a framework.Program by calling - # program_func. Reference: fluid.program_guard in - # test_word2vec.py - - self.startup_program = framework.Program() - self.train_program = framework.Program() - - with framework.program_guard(self.train_program, self.startup_program): - program_func_outs = train_func() - self.train_func_outputs = program_func_outs if isinstance( - program_func_outs, list) else [program_func_outs] - self.test_program = self.train_program.clone(for_test=True) - - # The first element of program_func_outs is loss. - loss = self.train_func_outputs[0] - - optimizer = optimizer_func() - if not isinstance(optimizer, opt_module.Optimizer): - raise TypeError( - "The optimizer should be an instance of Optimizer") - optimize_ops, params_grads = optimizer.minimize(loss) - - self.place = check_and_get_place(place) - - self._dist_transpile_if_necessary(optimize_ops, params_grads) - - # 2. move the default_main_program to self.program and run the - # default_startup program on an empty core.Scope() - # Run startup program - with self._prog_and_scope_guard(): - exe = executor.Executor(place) - exe.run(self.startup_program) - - if self.checkpoint_cfg and self.checkpoint_cfg.load_serial is not None: - self._load_checkpoint() - - if param_path and os.path.isdir(param_path): - with self._prog_and_scope_guard(): - # load params from param_path into scope - io.load_persistables( - executor=exe, - dirname=param_path, - main_program=self.startup_program) - - def _transpile_nccl2_dist(self): - # PADDLE_TRAINER_IPS - if "PADDLE_TRAINER_IPS" not in os.environ: - self.nccl_id_var = None - else: - self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) - port = os.getenv("PADDLE_PSERVER_PORT") - worker_ips = os.getenv("PADDLE_TRAINER_IPS") - worker_endpoints = [] - for ip in worker_ips.split(","): - worker_endpoints.append(':'.join([ip, port])) - self.num_trainers = len(worker_endpoints) - current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port - worker_endpoints.remove(current_endpoint) - # TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id - # in ParallelExecutor to start - # distributed training using NCCL2 - self.nccl_id_var = self.startup_program.global_block().create_var( - name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW) - self.startup_program.global_block().append_op( - type="gen_nccl_id", - inputs={}, - outputs={"NCCLID": self.nccl_id_var}, - attrs={ - "endpoint": current_endpoint, - "endpoint_list": worker_endpoints, - "trainer_id": self.trainer_id - }) - - def _dist_transpile_if_necessary(self, optimize_ops, params_grads): - self._transpile_nccl2_dist() - if self.nccl_id_var != None: - return - - if "PADDLE_TRAINING_ROLE" not in os.environ: - return - - # the port of all pservers, needed by both trainer and pserver - port = os.getenv("PADDLE_PSERVER_PORT", "6174") - # comma separated ips of all pservers, needed by trainer and - # pserver - pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "") - eplist = [] - for ip in pserver_ips.split(","): - eplist.append(':'.join([ip, port])) - pserver_endpoints = ",".join(eplist) - # total number of workers/trainers in the job, needed by - # trainer and pserver - trainers = int(os.getenv("PADDLE_TRAINERS")) - # the IP of the local machine, needed by pserver only - current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port - # the unique trainer id, starting from 0, needed by trainer - # only - self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) - - # the role, should be either PSERVER or TRAINER - training_role = os.getenv("PADDLE_TRAINING_ROLE") - with self._prog_and_scope_guard(): - t = distribute_transpiler.DistributeTranspiler() - t.transpile( - self.trainer_id, pservers=pserver_endpoints, trainers=trainers) - if training_role == "PSERVER": - if self.checkpoint_cfg: - pserver_id = eplist.index(current_endpoint) - self.checkpoint_cfg.pserver_id = pserver_id - if t.has_distributed_lookup_table: - self.checkpoint_cfg.lookup_table_name = t.table_name - - self.train_program = t.get_pserver_program(current_endpoint) - self.startup_program = t.get_startup_program(current_endpoint, - self.train_program) - elif training_role == "TRAINER": - self.train_program = t.get_trainer_program() - else: - raise ValueError( - 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' - ) - - def stop(self): - """ - stop training - """ - self.__stop = True - - def train(self, num_epochs, event_handler, reader=None, feed_order=None): - """ - Start the train loop to train the model. - - Args: - num_epochs(int): The number of epoch. An epoch will process all data in reader - event_handler(callable): The event handler. A function with type (ev:Event)->void - reader(callable): A reader creator object. See also - :ref:`api_guide_python_reader` . - feed_order(list): Feeding order of reader. None will following the defining - order in program - - Returns: - None - """ - training_role = os.getenv("PADDLE_TRAINING_ROLE", "") - if training_role == "PSERVER": - with self._prog_and_scope_guard(): - exe = executor.Executor(self.place) - exe.run() - return - if self.parallel: - self._train_by_parallel_executor(num_epochs, event_handler, reader, - feed_order) - else: - self._train_by_executor(num_epochs, event_handler, reader, - feed_order) - - def test(self, reader, feed_order): - """ - Test the model on given test data - - Args: - reader(callable): The reader that yields test data. - feed_order(list): Feeding order of reader. None will following the - defining order in program - """ - - return self._test_by_executor(reader, feed_order, - self.train_func_outputs) - - def save_params(self, param_path): - """ - Save all parameters into :code:`param_path`. - - Args: - param_path(str): The path to save parameters. - - Returns: - None - """ - with self._prog_and_scope_guard(): - exe = executor.Executor(self.place) - io.save_persistables(exe, dirname=param_path) - - def save_inference_model(self, param_path, feeded_var_names, - target_var_indexes): - """ - Save model for cpp inference into :code:`param_path`. - - Args: - param_path(str): The path to save parameters. - feeded_var_names(list(str)): The name of the vars that you - need to feed in before run program. - target_var_indexes(list(int)): the index of target var that - you need to return in trainer.train_func. - Returns: - None - """ - with self._prog_and_scope_guard(): - exe = executor.Executor(self.place) - target_vars = [ - self.train_func_outputs[index] for index in target_var_indexes - ] - io.save_inference_model(param_path, feeded_var_names, target_vars, - exe) - - @contextlib.contextmanager - def _prog_and_scope_guard(self): - with framework.program_guard( - main_program=self.train_program, - startup_program=self.startup_program): - with executor.scope_guard(self.scope): - yield - - def _train_by_executor(self, num_epochs, event_handler, reader, feed_order): - """ - Train by Executor and single device. - - Args: - num_epochs: - event_handler: - reader: - feed_order: - - Returns: - - """ - with self._prog_and_scope_guard(): - feed_var_list = build_feed_var_list(self.train_program, feed_order) - feeder = data_feeder.DataFeeder( - feed_list=feed_var_list, place=self.place) - exe = executor.Executor(self.place) - reader = feeder.decorate_reader(reader, multi_devices=False) - self._train_by_any_executor(event_handler, exe, num_epochs, reader) - - def _train_by_any_executor(self, event_handler, exe, num_epochs, reader): - if self.checkpoint_cfg: - epochs = [ - epoch_id for epoch_id in range(num_epochs) - if epoch_id >= self.checkpoint_cfg.epoch_id - ] - else: - epochs = [epoch_id for epoch_id in range(num_epochs)] - - for epoch_id in epochs: - event_handler(BeginEpochEvent(epoch_id)) - for step_id, data in enumerate(reader()): - if self.__stop: - if self.checkpoint_cfg: - self._clean_checkpoint() - return - - if self.checkpoint_cfg and self.checkpoint_cfg.load_serial \ - and self.checkpoint_cfg.step_id >= step_id and self.checkpoint_cfg.epoch_id == epoch_id: - continue - - begin_event = BeginStepEvent(epoch_id, step_id) - event_handler(begin_event) - if begin_event.fetch_metrics: - metrics = exe.run(feed=data, - fetch_list=[ - var.name - for var in self.train_func_outputs - ]) - else: - metrics = exe.run(feed=data, fetch_list=[]) - - if self.checkpoint_cfg: - self._save_checkpoint(epoch_id, step_id) - event_handler(EndStepEvent(epoch_id, step_id, metrics)) - event_handler(EndEpochEvent(epoch_id)) - if self.checkpoint_cfg: - self._clean_checkpoint() - - def _test_by_executor(self, reader, feed_order, fetch_list): - with executor.scope_guard(self.scope): - feed_var_list = build_feed_var_list(self.test_program, feed_order) - feeder = data_feeder.DataFeeder( - feed_list=feed_var_list, place=self.place) - exe = executor.Executor(self.place) - accumulated = len(fetch_list) * [0] - count = 0 - for data in reader(): - outs = exe.run(program=self.test_program, - feed=feeder.feed(data), - fetch_list=fetch_list) - accumulated = [x[0] + x[1][0] for x in zip(accumulated, outs)] - count += 1 - - return [x / count for x in accumulated] - - def _train_by_parallel_executor(self, num_epochs, event_handler, reader, - feed_order): - with self._prog_and_scope_guard(): - pe = self._get_or_create_parallel_executor() - feed_var_list = build_feed_var_list(self.train_program, feed_order) - feeder = data_feeder.DataFeeder( - feed_list=feed_var_list, place=self.place) - reader = feeder.decorate_reader(reader, multi_devices=True) - self._train_by_any_executor(event_handler, pe, num_epochs, reader) - - def _get_parallel_executor(self): - return getattr(self, 'parallel_executor', None) - - def _get_or_create_parallel_executor(self): - if self._get_parallel_executor() is None: - self.parallel_executor = parallel_executor.ParallelExecutor( - use_cuda=isinstance(self.place, core.CUDAPlace), - loss_name=self.train_func_outputs[0].name) - return self._get_parallel_executor() - - def _clean_checkpoint(self): - assert self.checkpoint_cfg - clean_checkpoint(checkpoint_dir=self.checkpoint_cfg.checkpoint_dir) - - def _get_checkpoint_load_args(self): - """ - epoch_id and step_id are runtime arguments, they are not variables, will load them independently. - """ - return ["epoch_id", "step_id"] - - def _get_checkpoint_save_args(self, epoch_id, step_id): - """ - epoch_id and step_id are runtime arguments, they are not variables, will save them independently. - """ - trainer_args = {} - trainer_args["epoch_id"] = epoch_id - trainer_args["step_id"] = step_id - return trainer_args - - def _save_checkpoint(self, epoch_id, step_id): - assert self.checkpoint_cfg - - if epoch_id % self.checkpoint_cfg.epoch_interval == 0 \ - and step_id % self.checkpoint_cfg.step_interval == 0: - exe = executor.Executor(self.place) - save_checkpoint( - executor=exe, - checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, - trainer_id=self.trainer_id, - trainer_args=self._get_checkpoint_save_args(epoch_id, step_id), - main_program=self.train_program, - max_num_checkpoints=self.checkpoint_cfg.max_num_checkpoints) - - def _load_checkpoint(self): - with self._prog_and_scope_guard(): - exe = executor.Executor(self.place) - load_checkpoint( - executor=exe, - checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, - main_program=self.startup_program) - - if not self.checkpoint_cfg.pserver_id: - load_trainer_args = self._get_checkpoint_load_args() - trainer_args = load_checkpoint( - executor=exe, - checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, - main_program=self.startup_program, - role_id=self.trainer_id, - is_trainer=True, - load_trainer_args=load_trainer_args) - - if len(trainer_args) != 2: - raise ValueError( - "the return trainer_args length do not equal _get_checkpoint_load_args" - ) - self.checkpoint_cfg.epoch_id = int(trainer_args[0]) - self.checkpoint_cfg.step_id = int(trainer_args[1]) - else: - if self.checkpoint_cfg.lookup_table_name: - load_checkpoint( - executor=exe, - checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, - main_program=self.startup_program, - role_id=self.checkpoint_cfg.pserver_id, - is_trainer=False, - load_trainer_args=None, - load_lookup_table=self.checkpoint_cfg.lookup_table_name) - - -def build_feed_var_list(program, feed_order): - if not isinstance(program, framework.Program): - raise TypeError("The 'program' should be an object of Program") - - if isinstance(feed_order, list): - feed_var_list = [ - program.global_block().var(var_name) for var_name in feed_order - ] - else: - if not isinstance(feed_order, dict): - raise TypeError( - "The 'feed_order' should be either None, list or dict.") - if not sorted(feed_order.values()) == list(range(len(feed_order))): - raise ValueError( - "The values of 'feed_order' should be a permutation of [0, len(feed_order))" - ) - sorted_pair_list = sorted( - six.iteritems(feed_order), key=lambda item: item[1]) - feed_var_list = [ - program.global_block().var(pair[0]) for pair in sorted_pair_list - ] - return feed_var_list - - -# move Checkpoint APIs from io.py to trainer.py, make all of them are private. -SUCCESS_MARK_FILENAME = "_SUCCESS" -CHECKPOINT_PREFIX = "checkpoint" -MODEL_DIR = "__model__" -LOOKUP_TABLE_DIR = "__lookup_table__" -TRAINER_PREFIX = "trainer" -CHECKPOINT_SEPARATOR = "_" - - -def save_checkpoint(executor, - checkpoint_dir, - trainer_id, - main_program, - trainer_args=None, - max_num_checkpoints=3, - lookup_table=None, - pserver_endpoints=None): - """ - This function filters out all checkpoint variables from the give - main_program and then saves these variables to the `checkpoint_dir` - directory. - - In the training precess, we generally save a checkpoint in each - iteration. So there might be a lot of checkpoints in the - `checkpoint_dir`. To avoid them taking too much disk space, the - `max_num_checkpoints` are introduced to limit the total number of - checkpoints. If the number of existing checkpints is greater than - the `max_num_checkpoints`, oldest ones will be scroll deleted. - - A variable is a checkpoint variable and will be saved if it meets - all following conditions: - 1. It's persistable. - 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. - 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - - Args: - executor(Executor): The executor to run for save checkpoint. - checkpoint_dir(str): The folder where to save checkpoints. - trainer_id(int): currect trainer id, if id is equal to 0, the trainer - is chief. - trainer_args(dict|None): Current training arguments. Such as 'epoch_id' - and 'step_id'. - Defaut: None - main_program(Program): The program whose checkpoint variables will - be saved. - max_num_checkpoints(int): The max number of total number of existing - checkpoints. - Default: 3 - lookup_table(string|None): the lookup table name, when use distribute - lookup table, we can get lookup table name by DistributeTranspiler. - table_name - pserver_endpoints(list|None): the parameter server ip:port list. - when use distribute lookup table, we can get pserver_endpoints by - distribute arguments. - - Returns: - None - - Raises: - ValueError: If `checkpoint_dir` is None. - AssertionError: If `trainer_args` is not a dict. - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - path = "./checkpoints" - prog = fluid.default_main_program() - trainer_args = {"epoch_id": 200, - "step_id": 20} # just an example - table_name = "share_w" - ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] - - save_checkpoint(executor=exe, - checkpoint_dir=path, - trainer_id=0, - trainer_args=trainer_args, - main_program=prog, - max_num_checkpoints=3, - lookup_table=table_name, - pserver_endpoints = ps_endpoints) - """ - if checkpoint_dir is None: - raise ValueError("'checkpoint_dir' should not be None") - - if main_program is None: - raise ValueError('main_program should not be None.') - - if trainer_args: - assert isinstance(trainer_args, dict) - - is_chief = trainer_id == 0 - - _make_chekcpoint_dirs(checkpoint_dir) - serial = _get_latest_checkpoint_serial(checkpoint_dir) + 1 - cur_dir = _get_serial_dir(checkpoint_dir, serial) - - _save_trainer_args(cur_dir, trainer_id, trainer_args) - - if is_chief: - _save_persist_vars_without_grad(executor, cur_dir, main_program) - - if is_chief and lookup_table and pserver_endpoints: - _save_pserver_vars_by_notify(executor, cur_dir, lookup_table, - pserver_endpoints) - - _scroll_delete(checkpoint_dir, max_num_checkpoints) - - -def load_checkpoint(executor, - checkpoint_dir, - main_program, - role_id=0, - is_trainer=True, - load_trainer_args=None, - load_lookup_table=None): - """ - This function filters out all checkpoint variables from the give - main_program and then try to load these variables from the - `checkpoint_dir` directory. - - In the training precess, we generally save a checkpoint in each - iteration. So there are more than one checkpoint in the - `checkpoint_dir` (each checkpoint has its own sub folder), use - `serial` to specify which serial of checkpoint you would like to - load. - - A variable is a checkpoint variable and will be loaded if it meets - all following conditions: - 1. It's persistable. - 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. - 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - - Args: - executor(Executor): The executor to run for loading checkpoint. - checkpoint_dir(str): The folder where all checkpoints are. - serial(int): The serial of checkpoint you would like to load. - main_program(Program): The program whose checkpoint variables will - be loaded. - role_id(int): the trainer id or the parameter server id. - is_trainer(bool): trainer is True and parameter server is False. - load_trainer_args(list|None): list about load trainer args. - load_lookup_table(str|None): the lookup table name - - Returns: - None - - Raises: - ValueError: If `checkpoint_dir` is None. - ValueError: If `main_program` is None. - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - path = "./checkpoints" - prog = fluid.default_main_program() - load_checkpoint(executor=exe, checkpoint_dir=path, - serial=9, main_program=prog) - - # In this example, `load_checkpoint` function - # will first filters out all checkpoint variables in the default - # main program, and then try to load these variables form the - # folder "./checkpoints/checkpoint_9/__model__". - """ - - if checkpoint_dir is None: - raise ValueError("'checkpoint_dir' should not be None") - - serial = _get_latest_checkpoint_serial(checkpoint_dir) - - # there are nothing need to be loaded - if serial is None or serial < 0: - return - - if main_program is None: - raise ValueError('main_program should not be None.') - - if is_trainer and load_trainer_args is None: - cur_dir = _get_serial_dir(checkpoint_dir, serial) - _load_persist_vars_without_grad(executor, cur_dir, main_program, True) - return - - if is_trainer and load_trainer_args: - return _load_trainer_args(checkpoint_dir, serial, role_id, - load_trainer_args) - - if not is_trainer and load_lookup_table: - _load_lookup_table_vars(executor, checkpoint_dir, main_program, role_id, - load_lookup_table) - - -def clean_checkpoint(checkpoint_dir, delete_dir=False): - """ - clean the checkpoint dir, when the train exits normally, - the trainer will call clean_checkpoint to delete checkpoint directory saved before. - delete_dir only works when the directory is empty, otherwise, OSError is raised. - - : param checkpoint_dir - : param delete_dir - """ - - if checkpoint_dir is None: - raise ValueError("'checkpoint_dir' should not be None") - _scroll_delete(checkpoint_dir, max_num_checkpoints=0) - - if delete_dir and not os.listdir(checkpoint_dir): - os.rmdir(checkpoint_dir) - - -def _load_persist_vars_without_grad(executor, - dirname, - program, - has_model_dir=False): - """ - This function filters out all checkpoint variables from the give - program and then trys to load these variables from the given directory. - - A variable is a checkpoint variable if it meets all following - conditions: - 1. It's persistable. - 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. - 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - - Args: - executor(Executor): The executor to run for loading variables. - dirname(str): The directory path. - program(Program): The program whose checkpoint variables will - be loaded. - has_model_dir(bool): if True, the function loads variables - from a sub directory named '__model__'. - Default: False - - Returns: - None - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - param_path = "./my_paddle_model" - prog = fluid.default_main_program() - _load_persist_vars_without_grad(executor=exe, - dirname=param_path, program=prog, has_model_dir=True) - - # In this example, `_load_persist_vars_without_grad` function - # will first filters out all checkpoint variables in the default - # main program, and then trys to load these variables form the - # folder "./my_paddle_model/__model__". - """ - - if has_model_dir: - dirname = _get_model_dir(dirname) - - io.load_vars( - executor, - dirname=dirname, - main_program=program, - predicate=_is_checkpoint_var, - filename=None) - - -def _load_lookup_table_vars(executor, dirname, program, pserver_id, table_name): - """ - The parameter server will load lookup table's local file in - selectedrows variable. - - Args: - executor(Executor): The executor to run for loading persistable variables - dirname(str): The directory path - main_program(Program): Find the variable named table_name in main_program - pserver_id(int): the serial number in pserver_endpoints list - table_name(str): lookup table name - - Returns: - None - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - dirname = "./checkpoints/checkpoint_9/" - prog = fluid.default_main_program() - pserver_id = 1 - table_name = "share_w" - _load_lookup_table_vars(executor=exe, - dirname=dirname, program=prog, pserver_id=pserver_id, - table_name=table_name) - """ - - for var in program.list_vars(): - if var.name == table_name: - lookup_table_var = var - break - - assert lookup_table_var is not None - - lookup_table_dir = os.path.join(dirname, LOOKUP_TABLE_DIR) - table_file = table_name + CHECKPOINT_SEPARATOR + str(pserver_id) - - load_prog = framework.Program() - load_block = load_prog.global_block() - - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [lookup_table_var]}, - attrs={'file_path': os.path.join(lookup_table_dir, table_file)}) - - executor.run(load_prog) - - -def _save_persist_vars_without_grad(executor, dirname, program): - """ - This function filters out all checkpoint variables from the give - program and then save these variables to a sub-folder '__model__' of - the given directory. - - A variable is a checkpoint variable if it meets all following - conditions: - 1. It's persistable. - 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. - 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - - Args: - executor(Executor): The executor to run for saving variables. - dirname(str): The directory path. - program(Program): The program whose checkpoint variables will - be saved. - - Returns: - None - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - param_path = "./my_paddle_model" - prog = fluid.default_main_program() - _save_persist_vars_without_grad(executor=exe, - dirname=param_path, program=prog) - - # In this example, `_save_persist_vars_without_grad` function - # will first filters out all checkpoint variables in the default - # main program, and then saves these variables to the folder - # "./my_paddle_model/__model__". - """ - cur_dir = _get_model_dir(dirname) - io.save_vars( - executor, - dirname=cur_dir, - main_program=program, - vars=None, - predicate=_is_checkpoint_var, - filename=None) - _write_success(cur_dir) - - -def _save_pserver_vars_by_notify(executor, dirname, lookup_table, - ps_endpoint_list): - """ - This function will send checkpoint notify message from Trainer 0 - to all the pservers. - The checkpoint notify message contains lookup table name, - the absolute path on pserver to save lookup_table. - - Args: - executor(Executor): The executor to run for send checkpoint notify. - dirname(str): The folder where to save checkpoints. - lookup_table(string): the lookup table name, when use distribute - lookup table, we can get lookup table name by DistributeTranspiler. - table_name - ps_endpoint_list(list): the parameter server ip:port list. - when use distribute lookup table, we can get ps_endpoint_list by - distribute arguments. - Return: - None - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - param_path = "./my_paddle_model" - prog = fluid.default_main_program() - table_name = "share_w" - ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] - - _save_pserver_vars_by_notify(executor=exe, - dirname=param_path, lookup_table=table_name, - ps_endpoint_list=ps_endpoints) - """ - cur_dir = _get_lookuptable_dir(dirname) - - checkpoint_notify_program = framework.Program() - checkpoint_notify_block = checkpoint_notify_program.global_block() - - attrs = {} - attrs['epmap'] = ps_endpoint_list - attrs['dir'] = cur_dir - attrs['lookup_table'] = lookup_table - - checkpoint_notify_block.append_op( - type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs) - executor.run(checkpoint_notify_program) - - -def _save_trainer_args(dirname, trainer_id, trainer_args): - assert isinstance(trainer_args, dict) - - cur_dir = _get_trainer_dir(dirname, trainer_id) - - for name, value in six.iteritems(trainer_args): - args_file = os.path.join(cur_dir, name) - with open(args_file, 'w') as f: - f.write(str(value)) - _write_success(cur_dir) - - -def _load_trainer_args(checkpoint_dir, serial, trainer_id, trainer_args): - """ - trainer will load some args from it's independent directory, - such as epoch_id and step_id. - - Args: - checkpoint_dir(str): The folder where all checkpoints are. - serial(int): The serial of checkpoint you would like to load. - trainer_id(int): current trainer id. - trainer_args(list): list about load trainer args - Return: - None - - Examples: - .. code-block:: python - - param_path = "./checkpoint/" - serial = 7 - trainer_id = 2 - trainer_args = ["epoch_id", "step_id"] - - _load_trainer_args(checkpoint_dir=param_path, serial=serial, - trainer_id=trainer_id, trainer_args=trainer_args) - """ - assert isinstance(trainer_args, list) - - cur_dir = _get_serial_dir(checkpoint_dir, serial) - cur_dir = _get_trainer_dir(cur_dir, trainer_id) - - ret_values = [] - - for arg in trainer_args: - cur_file = os.path.join(cur_dir, arg) - with open(cur_file, 'r') as f: - contents = f.read() - ret_values.append(contents.strip()) - return ret_values - - -def _is_checkpoint_var(var): - """ - the checkpoint will not save or load all the variables. - var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded. - - : param var(Variable) - """ - if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ - var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ - var.desc.type() == core.VarDesc.VarType.RAW: - return False - # @GRAD are named for gradient variables, checkpoint will not save it. - if "@GRAD" in var.name: - return False - # .trainer_ are named for distribute train variables, checkpoint will not save it. - if ".trainer_" in var.name: - return False - - # .block is named for distribute train variables, checkpoint will not save it. - if ".block" in var.name: - return False - - return var.persistable - - -def _make_chekcpoint_dirs(dirs): - """ - _make_chekcpoint_dirs will makdir local directory directly, when the directory is exist, it will igore it. - """ - assert dirs is not None - - if os.path.isfile(dirs): - raise OSError(errno.ENOTDIR, "dirs path shoule be a Directory.", dirs) - - if not os.path.isdir(dirs): - try: - os.makedirs(dirs) - except OSError as err: - if err.errno != errno.EEXIST: - raise err - - -def _get_dir_serial(dirname): - _, serial = dirname.split(CHECKPOINT_SEPARATOR) - - try: - serial_num = int(serial) - except ValueError: - serial_num = -1 - return serial_num - - -def _get_serial_dir(dirname, serial): - serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial) - serial_dir = os.path.join(dirname, serial_folder) - _make_chekcpoint_dirs(serial_dir) - - return serial_dir - - -def _get_model_dir(dirname): - model_dir = os.path.join(dirname, MODEL_DIR) - _make_chekcpoint_dirs(model_dir) - return model_dir - - -def _get_lookuptable_dir(dirname): - lookuptable_dir = os.path.join(dirname, LOOKUP_TABLE_DIR) - _make_chekcpoint_dirs(lookuptable_dir) - return lookuptable_dir - - -def _get_trainer_dir(dirname, trainer_id): - trainer_folder = TRAINER_PREFIX + CHECKPOINT_SEPARATOR + str(trainer_id) - trainer_dir = os.path.join(dirname, trainer_folder) - _make_chekcpoint_dirs(trainer_dir) - return trainer_dir - - -def _scroll_delete(dirname, max_num_checkpoints=3): - dirs = os.listdir(dirname) - serial_map = {} - for serial in dirs: - serial_num = _get_dir_serial(serial) - serial_map[serial_num] = serial - - if len(list(serial_map.keys())) <= max_num_checkpoints: - return - - serials = list(serial_map.keys()) - serials.sort(reverse=True) - serials = serials[max_num_checkpoints:] - for serial in serials: - cur_dir = _get_serial_dir(dirname, serial) - try: - shutil.rmtree(cur_dir) - except OSError as err: - if err.errno != errno.ENOENT: - raise err - - -def _write_success(dirname): - """ - write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct. - - : param dirname - """ - success_file = os.path.join(dirname, SUCCESS_MARK_FILENAME) - with open(success_file, 'a') as f: - now = time.ctime() - f.write(now) - - -def _get_latest_checkpoint_serial(checkpoint_dir): - """ - get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory - - : param checkpoint_dir - """ - if not checkpoint_dir: - return -1 - - def has_success(checkpoint_dir, cur_dir): - """ - is _SUCCESS in this dir - """ - - serial = _get_dir_serial(cur_dir) - if serial == -1 or not os.path.isdir( - os.path.join(checkpoint_dir, cur_dir)): - return -1 - - success_path = os.path.join( - _get_serial_dir(checkpoint_dir, serial), MODEL_DIR, - SUCCESS_MARK_FILENAME) - if os.path.isfile(success_path): - return serial - - if not os.path.isdir(checkpoint_dir): - return -1 - - current_dir = -1 - dirs = os.listdir(checkpoint_dir) - for cur_dir in dirs: - success_num = has_success(checkpoint_dir, cur_dir) - if success_num > current_dir: - current_dir = success_num - return current_dir +# NOTE: Trainer is moved into fluid.contrib.trainer. +__all__ = [] diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 3f8c7b844a9fdc8404560ba4c78f9d328af2852a..43071def7a906e585909e50e4c0c52c56d981cde 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -136,6 +136,8 @@ class DistributeTranspilerConfig(object): slice_var_up = True split_method = None min_block_size = 8192 + # supported modes: pserver, nccl2 + mode = "pserver" print_log = False @@ -144,27 +146,30 @@ class DistributeTranspiler(object): **DistributeTranspiler** Convert the fluid program to distributed data-parallelism programs. + Supports two modes: pserver mode and nccl2 mode. - The main_program will be transformed to use a remote parameter server - to do parameter optimization. And the optimization graph will be put - into a parameter server program. + In pserver mode, the main_program will be transformed to use a remote + parameter server to do parameter optimization. And the optimization + graph will be put into a parameter server program. + + In nccl2 mode, the transpiler will append a NCCL_ID broadcasting + op in startup_program to share the NCCL_ID across the job nodes. + After transpile_nccl2 called, you ***must*** pass trainer_id and + num_trainers argument to ParallelExecutor to enable NCCL2 distributed + mode. Examples: .. code-block:: python - # Define your model before these codes. - port = os.getenv("PADDLE_PSERVER_PORT", "6174") - pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "") - eplist = [] - for ip in pserver_ips.split(","): - eplist.append(':'.join([ip, port])) - pserver_endpoints = ",".join(eplist) - trainers = int(os.getenv("PADDLE_TRAINERS")) - current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port - trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + # for pserver mode + pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174" + trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174" + current_endpoint = "192.168.0.1:6174" + trainer_id = 0 + trainers = 4 role = os.getenv("PADDLE_TRAINING_ROLE") - t = distribute_transpiler.DistributeTranspiler() + t = fluid.DistributeTranspiler() t.transpile( trainer_id, pservers=pserver_endpoints, trainers=trainers) if role == "PSERVER": @@ -173,6 +178,18 @@ class DistributeTranspiler(object): pserver_program) elif role == "TRAINER": trainer_program = t.get_trainer_program() + + # for nccl2 mode + config = fluid.DistributeTranspilerConfig() + config.mode = "nccl2" + t = fluid.DistributeTranspiler(config=config) + t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep) + exe = fluid.ParallelExecutor( + use_cuda, + loss_name=loss_var.name, + num_trainers=len(trainers.split(",)), + trainer_id=trainer_id + ) """ def __init__(self, config=None): @@ -190,13 +207,41 @@ class DistributeTranspiler(object): assert (self.config.min_block_size >= 8192) assert (self.config.split_method.__bases__[0] == PSDispatcher) + def _transpile_nccl2(self, + trainer_id, + trainers, + current_endpoint, + startup_program=None): + if not startup_program: + startup_program = default_startup_program() + if trainer_id >= 0: + worker_endpoints = trainers.split(",") + # send NCCL_ID to others or recv from trainer 0 + worker_endpoints.remove(current_endpoint) + + nccl_id_var = startup_program.global_block().create_var( + name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW) + startup_program.global_block().append_op( + type="gen_nccl_id", + inputs={}, + outputs={"NCCLID": nccl_id_var}, + attrs={ + "endpoint": current_endpoint, + "endpoint_list": worker_endpoints, + "trainer_id": trainer_id + }) + return nccl_id_var + else: + raise ValueError("must set trainer_id > 0") + def transpile(self, trainer_id, program=None, pservers="127.0.0.1:6174", trainers=1, sync_mode=True, - startup_program=None): + startup_program=None, + current_endpoint="127.0.0.1:6174"): """ Run the transpiler. @@ -207,10 +252,15 @@ class DistributeTranspiler(object): default is fluid.default_main_program(). pservers (str): comma separated ip:port string for the pserver list. - trainers (int): number of trainers in the distributed job. + trainers (int|str): in pserver mode this is the number of + trainers, in nccl2 mode this is a string of trainer + endpoints. sync_mode (bool): Do sync training or not, default is True. startup_program (Program|None): startup_program to transpile, default is fluid.default_main_program(). + current_endpoint (str): need pass current endpoint when + transpile as nccl2 distributed mode. In pserver mode + this argument is not used. """ if program is None: program = default_main_program() @@ -220,6 +270,15 @@ class DistributeTranspiler(object): self.startup_program = startup_program self.origin_startup_program = self.startup_program.clone() + if self.config.mode == "nccl2": + assert (isinstance(trainers, str)) + self._transpile_nccl2( + trainer_id, + trainers, + current_endpoint, + startup_program=startup_program) + return + self.trainer_num = trainers self.sync_mode = sync_mode self.trainer_id = trainer_id @@ -1082,7 +1141,7 @@ to transpile() call.") if self.sync_mode else [] }, attrs={ - "sync_mode": False, + "sync_mode": self.sync_mode, "epmap": pserver_endpoints, RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE, OP_ROLE_VAR_ATTR_NAME: [