diff --git a/cmake/configure.cmake b/cmake/configure.cmake index ce1857582bd3e8ab3077158384beaae36a83a4b2..e9852f00b1835adec31373f58ac538f9685251e0 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -62,8 +62,26 @@ if(NOT CMAKE_CROSSCOMPILING) endif() if(WIN32) - # windows stupid compile option for all targets. + # windows header option for all targets. add_definitions(-D_XKEYCHECK_H) + # Use symbols instead of absolute path, reduce the cmake link command length. + SET(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1) + SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1) + SET(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1) + SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1) + SET(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1) + SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1) + SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@") + SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@") + + # Specify the program to use when building static libraries + SET(CMAKE_C_CREATE_STATIC_LIBRARY " lib ") + SET(CMAKE_CXX_CREATE_STATIC_LIBRARY " lib ") + + # set defination for the dll export + if (NOT MSVC) + message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.") + endif(NOT MSVC) endif(WIN32) if(NOT WITH_GOLANG) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index a67512578147fc7223714dbc4cd124b831fb4775..5bf82b4ddf10a646ca540ac4ee2cfd3d3bc6cf58 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -110,6 +110,20 @@ function(find_fluid_modules TARGET_NAME) endif() endfunction(find_fluid_modules) +# find all third_party modules is used for paddle static library +# for reduce the dependency when building the inference libs. +set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY) +function(find_fluid_thirdparties TARGET_NAME) + get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE) + string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path}) + string(FIND "${__target_path}" "third_party" pos) + if(pos GREATER 1) + get_property(fluid_ GLOBAL PROPERTY FLUID_THIRD_PARTY) + set(fluid_third_partys ${fluid_third_partys} ${TARGET_NAME}) + set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY "${fluid_third_partys}") + endif() +endfunction(find_fluid_thirdparties) + function(merge_static_libs TARGET_NAME) set(libs ${ARGN}) list(REMOVE_DUPLICATES libs) @@ -204,18 +218,13 @@ function(merge_static_libs TARGET_NAME) foreach(lib ${libs}) # Get the file names of the libraries to be merged - #if(NOT $ MATCHES "lib.*\\.lib") - # message("library" ${lib}) - # set(libfiles ${libfiles} lib$) - #else() set(libfiles ${libfiles} $) - #endif() endforeach() - - # windows cmd return error in clean env. - # COMMAND del "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib" + # msvc will put libarary in directory of "/Release/xxxlib" by default + # COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib" add_custom_command(TARGET ${TARGET_NAME} POST_BUILD - COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.lib ${libfiles} + COMMAND cmake -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}" + COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/lib${TARGET_NAME}.lib ${libfiles} ) endif(WIN32) endfunction(merge_static_libs) diff --git a/doc/README.md b/doc/README.md index 77aa2a5322057d582435e83f38476833d1f73c48..998a39f10699af6d1a391f177a5cf03c9ae170fd 100644 --- a/doc/README.md +++ b/doc/README.md @@ -2,6 +2,6 @@ Thanks for reading PaddlePaddle documentation. -Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [Fluiddoc Repo](https://github.com/PaddlePaddle/Paddle) and updated in Fluiddoc Repo. +Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [FluidDoc Repo](https://github.com/PaddlePaddle/FluidDoc) and updated there. -Please turn to Fluiddoc Repo for the latest documentation. +Please turn to FluidDoc Repo for the latest documentation. diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index c4a65bb13e25c550b3cdd08a0f1c8d11bee5abe8..d3583cf894991624f537a4073f14aacc470aadd0 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -22,9 +22,6 @@ paddle.fluid.Operator.rename_input ArgSpec(args=['self', 'old_name', 'new_name'] paddle.fluid.Operator.rename_output ArgSpec(args=['self', 'old_name', 'new_name'], varargs=None, keywords=None, defaults=None) paddle.fluid.Operator.set_attr ArgSpec(args=['self', 'name', 'val'], varargs=None, keywords=None, defaults=None) paddle.fluid.Operator.to_string ArgSpec(args=['self', 'throw_on_error'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Parameter.__init__ ArgSpec(args=['self', 'block', 'shape', 'dtype'], varargs=None, keywords='kwargs', defaults=None) -paddle.fluid.Parameter.astype ArgSpec(args=['self', 'dtype'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Parameter.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,)) paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.program_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) @@ -35,29 +32,16 @@ paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None, paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)) paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None) paddle.fluid.scope_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) -paddle.fluid.Trainer.__init__ ArgSpec(args=['self', 'train_func', 'optimizer_func', 'param_path', 'place', 'parallel', 'checkpoint_config'], varargs=None, keywords=None, defaults=(None, None, False, None)) -paddle.fluid.Trainer.save_inference_model ArgSpec(args=['self', 'param_path', 'feeded_var_names', 'target_var_indexes'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Trainer.save_params ArgSpec(args=['self', 'param_path'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Trainer.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Trainer.test ArgSpec(args=['self', 'reader', 'feed_order'], varargs=None, keywords=None, defaults=None) -paddle.fluid.Trainer.train ArgSpec(args=['self', 'num_epochs', 'event_handler', 'reader', 'feed_order'], varargs=None, keywords=None, defaults=(None, None)) -paddle.fluid.BeginEpochEvent.__init__ ArgSpec(args=['self', 'epoch_id'], varargs=None, keywords=None, defaults=None) -paddle.fluid.EndEpochEvent.__init__ ArgSpec(args=['self', 'epoch_id'], varargs=None, keywords=None, defaults=None) -paddle.fluid.BeginStepEvent.__init__ ArgSpec(args=['self', 'epoch_id', 'step_id'], varargs=None, keywords=None, defaults=None) -paddle.fluid.EndStepEvent.__init__ ArgSpec(args=['self', 'epoch_id', 'step_id', 'metrics'], varargs=None, keywords=None, defaults=None) -paddle.fluid.CheckpointConfig.__init__ ArgSpec(args=['self', 'checkpoint_dir', 'max_num_checkpoints', 'epoch_interval', 'step_interval'], varargs=None, keywords=None, defaults=(None, 3, 1, 10)) -paddle.fluid.Inferencer.__init__ ArgSpec(args=['self', 'infer_func', 'param_path', 'place', 'parallel'], varargs=None, keywords=None, defaults=(None, False)) -paddle.fluid.Inferencer.infer ArgSpec(args=['self', 'inputs', 'return_numpy'], varargs=None, keywords=None, defaults=(True,)) paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)) -paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None)) +paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')) paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0)) paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DistributeTranspilerConfig.__init__ -paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords='kwargs', defaults=(None, None, None, None, None, 1, 0, None)) +paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None)) paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True)) paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.GradientScaleStrategy, arg0: int) -> None @@ -178,14 +162,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None)) paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) -paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None)) -paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) -paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None)) +paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'out', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None, None)) +paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) +paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None)) paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)) paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)) paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None) @@ -265,12 +249,12 @@ paddle.fluid.layers.sum ArgSpec(args=[], varargs='args', keywords='kwargs', defa paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) -paddle.fluid.layers.softshrink ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None) paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.tanh ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.tanh_shrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.layers.softshrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.sqrt ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.abs ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.ceil ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)) @@ -336,7 +320,7 @@ paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=[' paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,)) -paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None)) +paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174')) paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0)) paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None) @@ -350,6 +334,7 @@ paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'fi paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max')) paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)) paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)) +paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn', 'use_mkldnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True, False)) paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None)) paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)) paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None)) @@ -389,7 +374,7 @@ paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> Non paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None paddle.fluid.ParamAttr.__init__ ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)) -paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim'], varargs=None, keywords='kwargs', defaults=(None,)) +paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)) paddle.fluid.DataFeeder.__init__ ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.DataFeeder.decorate_reader ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True)) paddle.fluid.DataFeeder.feed ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None) diff --git a/paddle/fluid/framework/eigen.h b/paddle/fluid/framework/eigen.h index 4ea1df655df005ba7585fb67fb0a3c3411a76418..2b265a773fe967f5b2ab38ce795b0f599d859c2a 100644 --- a/paddle/fluid/framework/eigen.h +++ b/paddle/fluid/framework/eigen.h @@ -13,6 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +// logging.h and windows.h conflict +#define GLOG_NO_ABBREVIATED_SEVERITIES +// solve static linking error in windows +// https://github.com/google/glog/issues/301 +#define GOOGLE_GLOG_DLL_DECL #include "paddle/fluid/framework/tensor.h" #include "unsupported/Eigen/CXX11/Tensor" @@ -46,11 +51,13 @@ struct EigenTensor { using ConstType = Eigen::TensorMap>; - static Type From(Tensor& tensor, DDim dims) { + static Type From(Tensor& tensor, DDim dims) { // NOLINT return Type(tensor.data(), EigenDim::From(dims)); } - static Type From(Tensor& tensor) { return From(tensor, tensor.dims_); } + static Type From(Tensor& tensor) { // NOLINT + return From(tensor, tensor.dims_); + } // NOLINT static ConstType From(const Tensor& tensor, DDim dims) { return ConstType(tensor.data(), EigenDim::From(dims)); @@ -64,7 +71,8 @@ struct EigenTensor { template struct EigenMatrix : public EigenTensor { - static typename EigenMatrix::Type Reshape(Tensor& tensor, int num_col_dims) { + static typename EigenMatrix::Type Reshape(Tensor& tensor, // NOLINT + int num_col_dims) { int rank = tensor.dims_.size(); PADDLE_ENFORCE(num_col_dims > 0 && num_col_dims < rank, "`num_col_dims` must be between (0, rank_of_tensor)."); @@ -86,11 +94,12 @@ template struct EigenVector : public EigenTensor { // Flatten reshapes a Tensor into an EigenVector. - static typename EigenVector::Type Flatten(Tensor& tensor) { + static typename EigenVector::Type Flatten(Tensor& tensor) { // NOLINT return EigenVector::From(tensor, {product(tensor.dims_)}); } - static typename EigenVector::ConstType Flatten(const Tensor& tensor) { + static typename EigenVector::ConstType Flatten( + const Tensor& tensor) { // NOLINT return EigenVector::From(tensor, {product(tensor.dims_)}); } }; @@ -104,7 +113,7 @@ struct EigenScalar { using ConstType = Eigen::TensorMap< Eigen::TensorFixedSize, MajorType, IndexType>>; - static Type From(Tensor& tensor) { return Type(tensor.data()); } + static Type From(Tensor& tensor) { return Type(tensor.data()); } // NOLINT static ConstType From(const Tensor& tensor) { return ConstType(tensor.data()); diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc index 09c5ec59d66445bdbd5349447b125be89cb2efdf..d7df6389cfd595324e284e0da10f65213ccee80f 100644 --- a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass.cc @@ -26,8 +26,6 @@ std::unique_ptr ConvReLUFusePass::ApplyImpl( PADDLE_ENFORCE(graph.get()); FusePassBase::Init("conv_relu_mkldnn_fuse", graph.get()); - std::unordered_set nodes2delete; - GraphPatternDetector gpd; auto* conv_input = gpd.mutable_pattern() ->NewNode("conv_relu_mkldnn_fuse/conv_input") @@ -42,36 +40,20 @@ std::unique_ptr ConvReLUFusePass::ApplyImpl( Graph* g) { VLOG(4) << "handle ConvReLU fuse"; GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, - conv_relu_pattern); // Filter - GET_IR_NODE_FROM_SUBGRAPH(conv_bias, conv_bias, conv_relu_pattern); // Bias - GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp + conv_relu_pattern); // Filter + GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp GET_IR_NODE_FROM_SUBGRAPH(conv, conv, conv_relu_pattern); // CONV op GET_IR_NODE_FROM_SUBGRAPH(relu_out, relu_out, conv_relu_pattern); // Out GET_IR_NODE_FROM_SUBGRAPH(relu, relu, conv_relu_pattern); // ReLU op - // Create an ConvReLU Node. - OpDesc desc; - std::string conv_relu_i_in = subgraph.at(conv_input)->Name(); - std::string conv_relu_w_in = conv_weight->Name(); - std::string conv_relu_b_in = conv_bias->Name(); - std::string conv_relu_out = relu_out->Name(); - desc.SetInput("Input", std::vector({conv_relu_i_in})); - desc.SetInput("Filter", std::vector({conv_relu_w_in})); - desc.SetInput("Bias", std::vector({conv_relu_b_in})); - desc.SetOutput("Output", std::vector({conv_relu_out})); - desc.SetType("conv2d"); - for (auto& attr : conv->Op()->GetAttrMap()) { - desc.SetAttr(attr.first, attr.second); - } - desc.SetAttr("fuse_relu", true); - auto conv_relu_node = g->CreateOpNode(&desc); // OpDesc will be copied. - GraphSafeRemoveNodes(graph.get(), {conv, relu, conv_out}); + // Transform Conv node into ConvReLU node. + OpDesc* desc = conv->Op(); + desc->SetOutput("Output", std::vector({relu_out->Name()})); + desc->SetAttr("fuse_relu", true); + GraphSafeRemoveNodes(graph.get(), {relu, conv_out}); PADDLE_ENFORCE(subgraph.count(conv_input)); - IR_NODE_LINK_TO(subgraph.at(conv_input), conv_relu_node); - IR_NODE_LINK_TO(conv_weight, conv_relu_node); - IR_NODE_LINK_TO(conv_bias, conv_relu_node); - IR_NODE_LINK_TO(conv_relu_node, relu_out); + IR_NODE_LINK_TO(conv, relu_out); found_conv_relu_count++; }; diff --git a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc index 82b5fa1886098ca3b19c147c307d3f2fc3ba03d6..9dd780ec89ab991d6d99cb66fa2a9b683be2b9ca 100644 --- a/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/conv_relu_mkldnn_fuse_pass_tester.cc @@ -85,16 +85,13 @@ TEST(ConvReLUFusePass, basic) { for (auto* node : graph->Nodes()) { if (node->IsOp() && node->Op()->Type() == "conv2d") { - if (node->Op()->HasAttr("use_mkldnn")) { - bool use_mkldnn = boost::get(node->Op()->GetAttr("use_mkldnn")); - if (use_mkldnn) { - if (node->Op()->HasAttr("fuse_relu")) { - bool fuse_relu = boost::get(node->Op()->GetAttr("fuse_relu")); - if (fuse_relu) { - ++conv_relu_count; - } - } - } + auto* op = node->Op(); + ASSERT_TRUE(op->HasAttr("use_mkldnn")); + EXPECT_TRUE(boost::get(op->GetAttr("use_mkldnn"))); + ASSERT_TRUE(op->HasAttr("fuse_relu")); + bool fuse_relu = boost::get(op->GetAttr("fuse_relu")); + if (fuse_relu) { + ++conv_relu_count; } } } diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index ef5113819696238d4e06b826bf43064b0f368dea..6d2c51b0e9bed8461f6491b84a36a3bf6663a138 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -638,11 +638,6 @@ PDNode *patterns::ConvReLU::operator()( ->AsInput() ->assert_is_persistable_var() ->assert_is_op_input("conv2d", "Filter"); - // Bias - auto *conv_bias_var = pattern->NewNode(conv_bias_repr()) - ->AsInput() - ->assert_is_persistable_var() - ->assert_is_op_input("conv2d", "Bias"); // intermediate variable, will be removed in the IR after fuse. auto *conv_out_var = pattern->NewNode(conv_out_repr()) ->AsIntermediate() @@ -653,8 +648,7 @@ PDNode *patterns::ConvReLU::operator()( ->AsOutput() ->assert_is_op_output("relu"); - conv_op->LinksFrom({conv_input, conv_weight_var, conv_bias_var}) - .LinksTo({conv_out_var}); + conv_op->LinksFrom({conv_input, conv_weight_var}).LinksTo({conv_out_var}); relu_op->LinksFrom({conv_out_var}).LinksTo({relu_out_var}); return relu_out_var; } diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.h b/paddle/fluid/framework/ir/graph_pattern_detector.h index 46950ed877cda7cd83ead4e9aa9a3aaae5d5ecfa..69b486c29d8bd1102a8372f5041051c25ce19359 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.h +++ b/paddle/fluid/framework/ir/graph_pattern_detector.h @@ -379,7 +379,7 @@ struct PatternBase { // op: conv + relu // named nodes: // conv_input, conv_weight, -// conv_bias, conv_out, conv, +// conv_out, conv, // relu_out, relu struct ConvReLU : public PatternBase { ConvReLU(PDPattern* pattern, const std::string& name_scope) @@ -392,7 +392,6 @@ struct ConvReLU : public PatternBase { PATTERN_DECL_NODE(relu); // declare variable node's name PATTERN_DECL_NODE(conv_weight); - PATTERN_DECL_NODE(conv_bias); PATTERN_DECL_NODE(conv_out); PATTERN_DECL_NODE(relu_out); }; diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index df2a7a27ca4a6011b214202ac9bf4f30dc482ece..2663c9be41a834523fb896b490e7e75df256de05 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -132,7 +132,9 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, AddAttr(OpNamescopeAttrName(), "Operator name with namesope.") .SetDefault(""); - + AddAttr>(OpCreationCallstackAttrName(), + "Callstack for Op Creatation.") + .SetDefault({}); Validate(); } diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index 4ed3cc45d66849267ef4945a03da1db76b53e4ea..f13196959705bad473a6f7b3ef88f8faa8abe2b8 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -46,6 +46,7 @@ class OpProtoAndCheckerMaker { static const char *OpRoleAttrName() { return "op_role"; } static const char *OpRoleVarAttrName() { return "op_role_var"; } static const char *OpNamescopeAttrName() { return "op_namescope"; } + static const char *OpCreationCallstackAttrName() { return "op_callstack"; } void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker); diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h index e7dfa608b48f89a2155e43c7e63e31154675cd38..ef2eb334a4e7f3f482ba6d62d3f325f109c69302 100644 --- a/paddle/fluid/framework/op_registry.h +++ b/paddle/fluid/framework/op_registry.h @@ -23,6 +23,11 @@ limitations under the License. */ #include #include +#if defined(_WIN32) +#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h +#define GOOGLE_GLOG_DLL_DECL +#endif + #include "glog/logging.h" // For VLOG() #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/details/op_registry.h" @@ -241,22 +246,20 @@ struct OpKernelRegistrarFunctorEx #include - #include - +#include +#include +#include #include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/platform/profiler.h" @@ -137,19 +142,48 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { } void OperatorBase::Run(const Scope& scope, const platform::Place& place) { - VLOG(4) << place << " " << DebugStringEx(&scope); - if (platform::is_gpu_place(place)) { + try { + if (VLOG_IS_ON(4)) { + VLOG(4) << place << " " << DebugStringEx(&scope); + } + if (platform::is_gpu_place(place)) { #ifndef PADDLE_WITH_CUDA - PADDLE_THROW("Cannot run operator on place %s", place); + PADDLE_THROW("Cannot run operator on place %s", place); #else - auto dev_id = boost::get(place).device; - platform::SetDeviceId(dev_id); + auto dev_id = boost::get(place).device; + platform::SetDeviceId(dev_id); #endif + } + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + platform::RecordEvent record_event(Type(), pool.Get(place)); + RunImpl(scope, place); + if (VLOG_IS_ON(3)) { + VLOG(3) << place << " " << DebugStringEx(&scope); + } + } catch (platform::EnforceNotMet exception) { + if (Attrs().count("sub_block") != 0) { + throw exception; + } + + auto& callstack = Attr>( + OpProtoAndCheckerMaker::OpCreationCallstackAttrName()); + + if (callstack.empty()) { + throw exception; + } + std::ostringstream sout; + sout << "Invoke operator " << Type() << " error.\n"; + sout << "Python Callstacks: \n"; + for (auto& line : callstack) { + sout << line; + } + sout << "C++ Callstacks: \n"; + sout << exception.err_str_; + exception.err_str_ = sout.str(); + throw exception; + } catch (...) { + std::rethrow_exception(std::current_exception()); } - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - platform::RecordEvent record_event(Type(), pool.Get(place)); - RunImpl(scope, place); - VLOG(3) << place << " " << DebugStringEx(&scope); } bool OperatorBase::HasInputs(const std::string& name) const { @@ -177,7 +211,7 @@ const std::vector& OperatorBase::Inputs( } bool OperatorBase::HasOutputs(const std::string& name) const { - if (outputs_.find(name) != outputs_.end()) { + if (outputs_.end() != outputs_.find(name)) { return true; } else { return false; diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 1040eb882baea624e972faf4af3094119df72308..626b50edfd39424473be33e9f8baec5970471477 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -20,6 +20,8 @@ limitations under the License. */ #include #include #include +#define GLOG_NO_ABBREVIATED_SEVERITIES +#define GOOGLE_GLOG_DLL_DECL #include "glog/logging.h" // For VLOG #include "paddle/fluid/framework/attribute.h" diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 684e0ce0e292d852d4601ebd1ccd920382e42c8b..1032aadcbda4f1b05841e08e1abe7c737c3aeb9c 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -71,15 +71,15 @@ bool AnalysisPredictor::Init( inference_program_ = paddle::inference::Load( executor_.get(), scope_.get(), config_.prog_file, config_.param_file); } else { - LOG(ERROR) << "fail to load inference model."; + LOG(ERROR) << "fail to load inference model from " << config_.model_dir; return false; } OptimizeInferenceProgram(); - ctx_ = executor_->Prepare(*inference_program_, 0); if (config_._use_mkldnn) { executor_->EnableMKLDNN(*inference_program_); } + ctx_ = executor_->Prepare(*inference_program_, 0); VLOG(5) << "to create variables"; PADDLE_ENFORCE(scope_.get()); @@ -109,8 +109,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() { } argument_.origin_program_desc.reset( new ProgramDesc(*inference_program_->Proto())); - PADDLE_ENFORCE(config_.ir_mode == AnalysisConfig::IrPassMode::kExclude, - "Only kExclude is supported yet."); + PADDLE_ENFORCE( + config_.ir_mode == contrib::AnalysisConfig::IrPassMode::kExclude, + "Only kExclude is supported yet."); Analyzer().DisableIrPasses(config_.ir_passes).Run(&argument_); CHECK(argument_.transformed_program_desc); @@ -126,8 +127,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() { } template <> -std::unique_ptr CreatePaddlePredictor< - AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig& config) { +std::unique_ptr +CreatePaddlePredictor( + const contrib::AnalysisConfig& config) { VLOG(3) << "create AnalysisConfig"; if (config.use_gpu) { // 1. GPU memeroy @@ -154,4 +156,11 @@ std::unique_ptr CreatePaddlePredictor< return predictor; } +template <> +std::unique_ptr CreatePaddlePredictor( + const contrib::AnalysisConfig& config) { + return CreatePaddlePredictor(config); +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index e53925366e9214cd60422efe56884751297c15e5..aa00e8be5c28c2e3bfe74fa0bff2c72210bd106e 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -30,7 +30,7 @@ using framework::proto::ProgramDesc; */ class AnalysisPredictor : public NativePaddlePredictor { public: - explicit AnalysisPredictor(const AnalysisConfig& config) + explicit AnalysisPredictor(const contrib::AnalysisConfig& config) : NativePaddlePredictor(config), config_(config) {} bool Init(const std::shared_ptr& parent_scope); @@ -46,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor { Argument& analysis_argument() { return argument_; } private: - AnalysisConfig config_; + contrib::AnalysisConfig config_; Argument argument_; }; diff --git a/paddle/fluid/inference/api/api_anakin_engine.cc b/paddle/fluid/inference/api/api_anakin_engine.cc index 43b31269d2bd82c06e284e3599a3763da693a2af..2c4894fd887f2f509dc7ab88c367cea5c1aed99a 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.cc +++ b/paddle/fluid/inference/api/api_anakin_engine.cc @@ -31,21 +31,24 @@ namespace paddle { +using paddle::contrib::AnakinConfig; + template PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( - const AnakinConfig &config) { + const contrib::AnakinConfig &config) { CHECK(Init(config)); } template <> PaddleInferenceAnakinPredictor::PaddleInferenceAnakinPredictor( - const AnakinConfig &config) { + const contrib::AnakinConfig &config) { omp_set_dynamic(0); omp_set_num_threads(1); mkl_set_num_threads(1); CHECK(Init(config)); } template -bool PaddleInferenceAnakinPredictor::Init(const AnakinConfig &config) { +bool PaddleInferenceAnakinPredictor::Init( + const contrib::AnakinConfig &config) { if (!(graph_.load(config.model_file))) { VLOG(3) << "fail to load graph from " << config.model_file; return false; @@ -200,10 +203,11 @@ template class PaddleInferenceAnakinPredictor; // A factory to help create difference predictor. template <> -std::unique_ptr CreatePaddlePredictor< - AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) { +std::unique_ptr +CreatePaddlePredictor( + const contrib::AnakinConfig &config) { VLOG(3) << "Anakin Predictor create."; - if (config.target_type == AnakinConfig::NVGPU) { + if (config.target_type == contrib::AnakinConfig::NVGPU) { #ifdef PADDLE_WITH_CUDA VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ]."; std::unique_ptr x( @@ -213,7 +217,7 @@ std::unique_ptr CreatePaddlePredictor< LOG(ERROR) << "AnakinConfig::NVGPU could not used in ONLY-CPU environment"; return nullptr; #endif - } else if (config.target_type == AnakinConfig::X86) { + } else if (config.target_type == contrib::AnakinConfig::X86) { VLOG(3) << "Anakin Predictor create on [ Intel X86 ]."; std::unique_ptr x( new PaddleInferenceAnakinPredictor(config)); diff --git a/paddle/fluid/inference/api/api_anakin_engine.h b/paddle/fluid/inference/api/api_anakin_engine.h index dd08661880d8cc3a9f4401e9af91a3d10e6579b6..04536ea3a53bbbc9293d92e69a23567e4bfd84c0 100644 --- a/paddle/fluid/inference/api/api_anakin_engine.h +++ b/paddle/fluid/inference/api/api_anakin_engine.h @@ -29,6 +29,8 @@ limitations under the License. */ namespace paddle { +using contrib::AnakinConfig; + template class PaddleInferenceAnakinPredictor : public PaddlePredictor { public: diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 2e9e10139fa7008a46c3782960dfd44d3228cc26..dca4386b21b4a064c21b52218682321258f368c4 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/inference/api/api_impl.h" +#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/timer.h" #include "paddle/fluid/platform/profiler.h" @@ -101,14 +102,11 @@ bool NativePaddlePredictor::Init( inference_program_ = paddle::inference::Load( executor_.get(), scope_.get(), config_.prog_file, config_.param_file); } else { - LOG(ERROR) << "fail to load inference model."; + LOG(ERROR) << "fail to load inference model from " << config_.model_dir; return false; } ctx_ = executor_->Prepare(*inference_program_, 0); - if (config_._use_mkldnn) { - executor_->EnableMKLDNN(*inference_program_); - } executor_->CreateVariables(*inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0); @@ -218,57 +216,20 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, template void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch, PaddleTensor *output) { - std::vector shape; - auto dims_i = fetch.dims(); - auto lod = fetch.lod(); - const T *output_ptr = fetch.data(); - auto num = fetch.numel(); - std::vector data; - if (0 == lod.size()) { - std::copy(output_ptr, output_ptr + num, std::back_inserter(data)); - for (int j = 0; j < dims_i.size(); ++j) { - shape.push_back(dims_i[j]); - } - } else { - // for batch detection - // image[0] -> output[0] shape {145, 6} - // image[1] -> output[1] shape {176, 6} - // then, - // the batch output shape {321, 6} - // the lod {{0, 145, 321}} - // so we should append output[0] to {176, 6} - size_t max_dim = 0; - for (size_t j = 1; j < lod[0].size(); j++) { - max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]); - } - size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back(); - if (max_dim > 0) { - data.resize((lod[0].size() - 1) * max_dim * common_dim, 0); - } - for (size_t j = 1; j < lod[0].size(); j++) { - size_t start = lod[0][j - 1] * common_dim; - size_t end = lod[0][j] * common_dim; - if (end > start) { - std::copy(output_ptr + start, output_ptr + end, - data.begin() + (j - 1) * max_dim * common_dim); - } - } - shape.push_back(lod[0].size() - 1); - shape.push_back(max_dim); - for (int j = 1; j < dims_i.size(); ++j) { - shape.push_back(dims_i[j]); - } - } - - output->shape = shape; - auto &buffer = output->data; - if (buffer.empty() || buffer.length() < sizeof(T) * data.size()) { - buffer.Resize(sizeof(T) * data.size()); - } - std::memcpy(buffer.data(), data.data(), sizeof(T) * data.size()); - // copy LoD - for (const auto &level : fetch.lod()) { - output->lod.emplace_back(level); + // set shape. + auto shape = framework::vectorize(fetch.dims()); + output->shape.assign(shape.begin(), shape.end()); + // set data. + const T *data = fetch.data(); + int num_elems = inference::VecReduceToInt(shape); + output->data.Resize(num_elems * sizeof(T)); + // The fetched tensor output by fetch op, should always in CPU memory, so just + // copy. + memcpy(output->data.data(), data, num_elems * sizeof(T)); + // set lod + output->lod.clear(); + for (auto &level : fetch.lod()) { + output->lod.emplace_back(level.begin(), level.end()); } } @@ -330,4 +291,10 @@ std::unique_ptr CreatePaddlePredictor< #endif } +template <> +std::unique_ptr CreatePaddlePredictor( + const NativeConfig &config) { + return CreatePaddlePredictor(config); +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/api_impl.h b/paddle/fluid/inference/api/api_impl.h index 6ecc32a700506214d44244e6a5612a574b46cc6b..6386d601262b3dac0e957fae991d23768b52f2c0 100644 --- a/paddle/fluid/inference/api/api_impl.h +++ b/paddle/fluid/inference/api/api_impl.h @@ -14,15 +14,22 @@ #pragma once +// logging.h and windows.h conflict +#define GLOG_NO_ABBREVIATED_SEVERITIES +// solve static linking error in windows +// https://github.com/google/glog/issues/301 +#define GOOGLE_GLOG_DLL_DECL + #include #include #include #include #include +#include "paddle/fluid/inference/api/paddle_inference_api.h" + #include "paddle/fluid/framework/ddim.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/io.h" #include "paddle/fluid/platform/init.h" #include "paddle/fluid/platform/profiler.h" diff --git a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc index 2b9be77e9f8856c1c4c581847b70dbc285a576d6..5ee6a5a93168f58770067f76ca7f6bb6f67b2965 100644 --- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc +++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine.cc @@ -25,10 +25,11 @@ using inference::analysis::Argument; using inference::Singleton; using inference::analysis::Analyzer; using framework::proto::ProgramDesc; +using paddle::contrib::MixedRTConfig; class TensorRTSubgraphPredictor : public NativePaddlePredictor { public: - explicit TensorRTSubgraphPredictor(const TensorRTConfig& config) + explicit TensorRTSubgraphPredictor(const MixedRTConfig& config) : NativePaddlePredictor(config), config_(config) {} bool Init(const std::shared_ptr& parent_scope) { @@ -121,13 +122,13 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { } private: - TensorRTConfig config_; + MixedRTConfig config_; }; template <> std::unique_ptr -CreatePaddlePredictor( - const TensorRTConfig& config) { +CreatePaddlePredictor( + const MixedRTConfig& config) { VLOG(3) << "create TensorRTSubgraphPredictor"; if (config.use_gpu) { // 1. GPU memeroy @@ -156,6 +157,13 @@ CreatePaddlePredictor( return std::move(predictor); } +template <> +std::unique_ptr CreatePaddlePredictor( + const MixedRTConfig& config) { + return CreatePaddlePredictor(config); +} + } // namespace paddle USE_TRT_CONVERTER(elementwise_add_weight); diff --git a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc index 9e7425eddd2df07ffe897f908aad360abe42117a..fc6310e90b0257bc84742fb617a00f5778bb1866 100644 --- a/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc +++ b/paddle/fluid/inference/api/api_tensorrt_subgraph_engine_tester.cc @@ -20,6 +20,8 @@ namespace paddle { +using contrib::MixedRTConfig; + DEFINE_string(dirname, "", "Directory of the inference model."); void CompareTensorRTWithFluid(bool enable_tensorrt) { @@ -32,7 +34,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) { config0.fraction_of_gpu_memory = 0.3; config0.device = 0; - TensorRTConfig config1; + MixedRTConfig config1; config1.model_dir = FLAGS_dirname + "word2vec.inference.model"; config1.use_gpu = true; config1.fraction_of_gpu_memory = 0.3; @@ -42,7 +44,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) { auto predictor0 = CreatePaddlePredictor(config0); auto predictor1 = - CreatePaddlePredictor(config1); for (int batch_id = 0; batch_id < 1; batch_id++) { diff --git a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt index afb46a7139f6ab8e6b3697fdc56fe1c78a05cd64..d4e6bb3e4a4ceb361ccd35121d0ecf84a764243e 100644 --- a/paddle/fluid/inference/api/demo_ci/CMakeLists.txt +++ b/paddle/fluid/inference/api/demo_ci/CMakeLists.txt @@ -1,13 +1,32 @@ cmake_minimum_required(VERSION 3.0) - project(cpp_inference_demo CXX C) +option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) +option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) +option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) + +macro(safe_set_static_flag) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) +endmacro() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") if (WIN32) -set(CMAKE_STATIC_LIBRARY_PREFIX "lib") + if (WITH_STATIC_LIB) + safe_set_static_flag() + add_definitions(-DSTATIC_LIB) + set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "/w") + set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} "/w") + endif() + set(CMAKE_STATIC_LIBRARY_PREFIX "lib") else() -set(CMAKE_STATIC_LIBRARY_PREFIX "") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + set(CMAKE_STATIC_LIBRARY_PREFIX "") endif() +message("flags" ${CMAKE_CXX_FLAGS}) if(NOT DEFINED PADDLE_LIB) message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib") @@ -16,14 +35,18 @@ if(NOT DEFINED DEMO_NAME) message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name") endif() -option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON) -option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF) -option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON) if(WITH_GPU) - set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") + if(NOT WIN32) + set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library") + else() + if(CUDA_LIB STREQUAL "") + set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64") + endif() + endif(NOT WIN32) endif() +include_directories("D:/Paddle/") include_directories("${PADDLE_LIB}") include_directories("${PADDLE_LIB}/third_party/install/protobuf/include") include_directories("${PADDLE_LIB}/third_party/install/glog/include") @@ -83,10 +106,18 @@ set(DEPS ${DEPS} ${MATH_LIB} ${MKLDNN_LIB} ${CMAKE_STATIC_LIBRARY_PREFIX}glog ${CMAKE_STATIC_LIBRARY_PREFIX}gflags ${CMAKE_STATIC_LIBRARY_PREFIX}protobuf ${EXTERNAL_LIB}) +# NOTE(dzhwinter) shlwapi is deprecated. +set(DEPS ${DEPS} libcmt shlwapi) endif(NOT WIN32) if(WITH_GPU) - set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) + if(NOT WIN32) + set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) + else() + set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} ) + endif() endif() target_link_libraries(${DEMO_NAME} ${DEPS}) diff --git a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc index 03ac79e9edf0d7ce6e167c3d34af5ba84bbc0e72..360f924810a570422db5a00b13939813fa73e2fa 100644 --- a/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc +++ b/paddle/fluid/inference/api/demo_ci/simple_on_word2vec.cc @@ -18,6 +18,8 @@ limitations under the License. */ #include #include + +#include #include #include //NOLINT #include "paddle/fluid/inference/paddle_inference_api.h" @@ -67,7 +69,8 @@ void Main(bool use_gpu) { 0.000932706}; const size_t num_elements = outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. - for (size_t i = 0; i < std::min(5UL, num_elements); i++) { + for (size_t i = 0; i < std::min(static_cast(5), num_elements); + i++) { PADDLE_ENFORCE(static_cast(outputs.front().data.data())[i], result[i]); } @@ -113,7 +116,8 @@ void MainThreads(int num_threads, bool use_gpu) { const size_t num_elements = outputs.front().data.length() / sizeof(float); // The outputs' buffers are in CPU memory. - for (size_t i = 0; i < std::min(5UL, num_elements); i++) { + for (size_t i = 0; i < std::min(static_cast(5), num_elements); + i++) { PADDLE_ENFORCE(static_cast(outputs.front().data.data())[i], result[i]); } diff --git a/paddle/fluid/inference/api/demo_ci/windows_inference.md b/paddle/fluid/inference/api/demo_ci/windows_inference.md new file mode 100644 index 0000000000000000000000000000000000000000..44b2586ad6d33ce7cbd2bb3080acc96b5e27f660 --- /dev/null +++ b/paddle/fluid/inference/api/demo_ci/windows_inference.md @@ -0,0 +1,19 @@ +# windows inference +本文介绍windows inference,目前只提供了静态编译,编译出paddle_fluid.lib,包含了除openblas.dll之外的所有第三方依赖库。 + +1. 下载最新的paddle_fluid.lib和openblas.dll,并把它们放在同一个目录下。 + +2. 准备预训练好的模型文件,例如models中的模型,可以将模型用safe_inference_model接口保存下来。将模型文件放到该目录下 + +3. 进入Paddle/paddle/fluid/inference/api/demo_ci目录,新建build目录,然后使用cmake生成vs2015的solution文件。 +其中PADDLE_LIB是前面的paddle_fluid.lib对应文件夹, CUDA_LIB指定为x64格式下的cuda系统库目录文件夹。 +```shell + cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=ON -DWITH_MKL=OFF -DWITH_STATIC_LIB=ON -DCMAKE_BUILD_TYPE=Release -DDEMO_NAME=inference_icnet -DPADDLE_LIB=D:\to_the_paddle_fluid.lib -DCUDA_LIB=D:\tools\v8.0\lib\x64 +``` +然后用vs2015打开对应的项目文件,注意使用静态链接 "/MT",生成对应的exe。将openblas.dll放到exe所在目录。 + +4. 该exe即为项目生成文件,可绑定运行。 + +## FAQ +1. cmake需要您手动下载,并添加到系统路径里 +2. 路径中的不要包含空格,例如发现CUDA_LIB路径是Program Files(x86)可能会出错。可以将CUDA拷贝到一个新位置。 diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index 8e359a67738c0df180933421b45f15b39fd0e78c..1fec2f96da0f9d978a3537b2d78e4ce5ef628c81 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -74,13 +74,17 @@ template <> std::string to_string>>( const std::vector>> &vec); +template +int VecReduceToInt(const std::vector &v) { + return std::accumulate(v.begin(), v.end(), 1, [](T a, T b) { return a * b; }); +} + template static void TensorAssignData(PaddleTensor *tensor, const std::vector> &data) { // Assign buffer - int dim = std::accumulate(tensor->shape.begin(), tensor->shape.end(), 1, - [](int a, int b) { return a * b; }); - tensor->data.Resize(sizeof(T) * dim); + int num_elems = VecReduceToInt(tensor->shape); + tensor->data.Resize(sizeof(T) * num_elems); int c = 0; for (const auto &f : data) { for (T v : f) { @@ -89,7 +93,7 @@ static void TensorAssignData(PaddleTensor *tensor, } } -std::string DescribeTensor(const PaddleTensor &tensor) { +static std::string DescribeTensor(const PaddleTensor &tensor) { std::stringstream os; os << "Tensor [" << tensor.name << "]\n"; os << " - type: "; @@ -113,8 +117,7 @@ std::string DescribeTensor(const PaddleTensor &tensor) { os << "\n"; os << " - data: "; - int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1, - [](int a, int b) { return a * b; }); + int dim = VecReduceToInt(tensor.shape); for (int i = 0; i < dim; i++) { os << static_cast(tensor.data.data())[i] << " "; } @@ -122,8 +125,8 @@ std::string DescribeTensor(const PaddleTensor &tensor) { return os.str(); } -void PrintTime(int batch_size, int repeat, int num_threads, int tid, - double latency, int epoch = 1) { +static void PrintTime(int batch_size, int repeat, int num_threads, int tid, + double latency, int epoch = 1) { LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat << ", threads: " << num_threads << ", thread id: " << tid << ", latency: " << latency << "ms ======"; diff --git a/paddle/fluid/inference/api/paddle_inference_api.h b/paddle/fluid/inference/api/paddle_inference_api.h index d0527d714acdcda047ad9d47ccd0e3a8083c771f..01ea0d9c3ad37b3bcebe6853de77373810333776 100644 --- a/paddle/fluid/inference/api/paddle_inference_api.h +++ b/paddle/fluid/inference/api/paddle_inference_api.h @@ -28,34 +28,61 @@ limitations under the License. */ namespace paddle { +// Data type. enum PaddleDType { FLOAT32, INT64, + // TODO(Superjomn) support more data types if needed. }; +/* + * Memory menage for PaddleTensor. + * The PaddleBuf holds a buffer for data input or output. The memory can be + * allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf + * should be reused for better performance. + * + * For user allocated memory, the following API can be used: + * - PaddleBuf(void* data, size_t length) to set an external memory by + * specifying + * the memory address and length. + * - Reset(void* data, size_t length) to reset the PaddleBuf with an external + * memory. + * ATTENTION, for user allocated memory, deallocation should be done by users + * externally after the program finished. The PaddleBuf won't do any allocation + * or deallocation. + * + * To have the PaddleBuf allocate and manage the memory: + * - PaddleBuf(size_t length) will allocate a memory of size `length`. + * - Resize(size_t length) resize the memory to no less than `length`, ATTENTION + * if the allocated memory is larger than `length`, nothing will done. + */ class PaddleBuf { public: - PaddleBuf() = default; - PaddleBuf(PaddleBuf&& other); - // Copy only available when memory is managed externally. - explicit PaddleBuf(const PaddleBuf&); - PaddleBuf& operator=(const PaddleBuf&); - PaddleBuf& operator=(PaddleBuf&&); - // Do not own the memory. - PaddleBuf(void* data, size_t length) - : data_(data), length_(length), memory_owned_{false} {} - // Own memory. + // PaddleBuf allocate memory internally, and manage it. explicit PaddleBuf(size_t length) : data_(new char[length]), length_(length), memory_owned_(true) {} - // Resize to `length` bytes. + // Set external memory, the PaddleBuf won't manage it. + PaddleBuf(void* data, size_t length) + : data_(data), length_(length), memory_owned_{false} {} + // Copy only available when memory is managed externally. + explicit PaddleBuf(const PaddleBuf&); + + // Resize the memory. void Resize(size_t length); - // Reset to external memory. + // Reset to external memory, with address and length set. void Reset(void* data, size_t length); + // Tell whether the buffer is empty. bool empty() const { return length_ == 0; } + // Get the memory address. void* data() const { return data_; } + // Get the memory length. size_t length() const { return length_; } ~PaddleBuf() { Free(); } + PaddleBuf& operator=(const PaddleBuf&); + PaddleBuf& operator=(PaddleBuf&&); + PaddleBuf() = default; + PaddleBuf(PaddleBuf&& other); private: void Free(); @@ -64,6 +91,7 @@ class PaddleBuf { bool memory_owned_{true}; }; +// Basic input and output data structure for PaddlePredictor. struct PaddleTensor { PaddleTensor() = default; std::string name; // variable name. @@ -73,19 +101,8 @@ struct PaddleTensor { std::vector> lod; // Tensor+LoD equals LoDTensor }; -enum class PaddleEngineKind { - kNative = 0, // Use the native Fluid facility. - kAnakin, // Use Anakin for inference. - kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. - kAnalysis - // TODO(Superjomn) support following engines latter. - // kTensorRT, // Use TensorRT for inference. - // kAutoMixedAnakin, // Automatically mix Fluid with Anakin. -}; - /* - * A simple Inference API for Paddle. Currently this API can be used by - * non-sequence scenerios. + * A simple Inference API for Paddle. */ class PaddlePredictor { public: @@ -120,26 +137,53 @@ struct NativeConfig : public PaddlePredictor::Config { // GPU related fields. bool use_gpu{false}; int device{0}; - float fraction_of_gpu_memory{-1.f}; // Negative to notify initialization. - // NOTE: NOT use it, just for the internal test, will discard later - bool _use_mkldnn{false}; - // Specify the variable's name of each input. - bool specify_input_name{false}; + float fraction_of_gpu_memory{-1.f}; // Change to a float in (0,1] if needed. + // Specify the exact path of program and parameter files. std::string prog_file; std::string param_file; + + // Specify the variable's name of each input if input tensors don't follow the + // `feeds` and `fetches` of the phase `save_inference_model`. + bool specify_input_name{false}; }; -// Configurations for Anakin engine. -struct AnakinConfig : public PaddlePredictor::Config { - enum TargetType { NVGPU = 0, X86 }; - int device; - std::string model_file; - int max_batch_size{-1}; - TargetType target_type; +// A factory to help create different predictors. +// +// Usage: +// +// NativeConfig config; +// ... // change the configs. +// auto native_predictor = CreatePaddlePredictor(config); +// +// FOR EXTENSION DEVELOPER: +// Different predictors are designated by config type. Similar configs can be +// merged, but there shouldn't be a huge config containing different fields for +// more than one kind of predictors. +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + +// NOTE The following APIs are too trivial, we will discard it in the following +// versions. +enum class PaddleEngineKind { + kNative = 0, // Use the native Fluid facility. + kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. + kAnalysis, // More optimization. + kAnakin // Use Anakin for inference, not mature yet. }; -struct TensorRTConfig : public NativeConfig { +template +std::unique_ptr CreatePaddlePredictor(const ConfigT& config); + +// == +// +// ----------------------------------------------------------------------------------- +// NOTE: The following APIs are not mature yet, we are still working on them. + +namespace contrib { + +// Accelerate GPU computation with TensorRT engine. +struct MixedRTConfig : public NativeConfig { // Determine whether a subgraph will be executed by TRT. int min_subgraph_size{1}; // While TensorRT allows an engine optimized for a given max batch size @@ -162,7 +206,6 @@ struct TensorRTConfig : public NativeConfig { // NOTE WIP, not stable yet. struct AnalysisConfig : public NativeConfig { - // enum class IrPassMode { kSystem, // Use system default passes, not customize. kInclude, // Specify the passes in `ir_passes`. @@ -173,18 +216,21 @@ struct AnalysisConfig : public NativeConfig { IrPassMode ir_mode{IrPassMode::kExclude}; // attention lstm fuse works only on some specific models, disable as default. std::vector ir_passes{"attention_lstm_fuse_pass"}; + + // NOTE this is just for internal development, please not use it. + bool _use_mkldnn{false}; }; -// A factory to help create different predictors. -// -// FOR EXTENSION DEVELOPER: -// Different predictors are designated by config type and engine kind. Similar -// configs can be merged, but there shouldn't be a huge config containing -// different fields for more than one kind of predictors. -// -// Similarly, each engine kind should map to a unique predictor implementation. -template -std::unique_ptr CreatePaddlePredictor(const ConfigT& config); +// Configurations for Anakin engine. +struct AnakinConfig : public PaddlePredictor::Config { + enum TargetType { NVGPU = 0, X86 }; + int device; + std::string model_file; + int max_batch_size{-1}; + TargetType target_type; +}; + +} // namespace contrib int PaddleDtypeSize(PaddleDType dtype); diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 9c057affca9c4b6b3fcd4574a587af1b78145b5c..925c3e6b879b56ab158e983032efbbc673357eba 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -58,6 +58,11 @@ set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classifi download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" "text_classification_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_text_classification ${TEXT_CLASSIFICATION_INSTALL_DIR} analyzer_text_classification_tester.cc) +# seq_conv1 +set(SEQ_CONV1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_conv1") +download_model_and_data(${SEQ_CONV1_INSTALL_DIR} "seq_conv1_model.tar.gz" "seq_conv1_data.txt.tar.gz") +inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} analyzer_seq_conv1_tester.cc) + # ocr set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr") if (NOT EXISTS ${OCR_INSTALL_DIR}) diff --git a/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc b/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc index 62e820b68c79a47d963bb174663bfc8c4ac22de3..cf97f064beddb6ede1d4716f323b4c5b46cb266d 100644 --- a/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc +++ b/paddle/fluid/inference/tests/api/anakin_mobilenet_tester.cc @@ -22,10 +22,10 @@ DEFINE_string(model, "", "Directory of the inference model(mobile_v2)."); namespace paddle { -AnakinConfig GetConfig() { - AnakinConfig config; +contrib::AnakinConfig GetConfig() { + contrib::AnakinConfig config; // using AnakinConfig::X86 if you need to use cpu to do inference - config.target_type = AnakinConfig::NVGPU; + config.target_type = contrib::AnakinConfig::NVGPU; config.model_file = FLAGS_model; config.device = 0; config.max_batch_size = 1; @@ -33,9 +33,10 @@ AnakinConfig GetConfig() { } TEST(inference, anakin) { - AnakinConfig config = GetConfig(); + auto config = GetConfig(); auto predictor = - CreatePaddlePredictor(config); + CreatePaddlePredictor( + config); float data[1 * 3 * 224 * 224] = {1.0f}; PaddleTensor tensor; diff --git a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc index 98c74aaa562dce6618ccde8f11f4344eefd59ef2..82bc83988de688e46613e160b66943c89c4a0391 100644 --- a/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/anakin_rnn1_tester.cc @@ -97,10 +97,10 @@ void Data::get_batch_data( namespace paddle { -AnakinConfig GetConfig() { - AnakinConfig config; +contrib::AnakinConfig GetConfig() { + contrib::AnakinConfig config; // using AnakinConfig::X86 if you need to use cpu to do inference - config.target_type = AnakinConfig::X86; + config.target_type = contrib::AnakinConfig::X86; config.model_file = FLAGS_model; config.device = 0; config.max_batch_size = 1000; // the max number of token @@ -121,9 +121,10 @@ void set_tensor(std::string name, std::vector shape, } void single_test() { - AnakinConfig config = GetConfig(); + auto config = GetConfig(); auto predictor = - CreatePaddlePredictor(config); + CreatePaddlePredictor( + config); int max_batch_size = 1000; std::string feature_file = FLAGS_datapath; diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index 8cf230a51d05c3a141f7cfd4e30bf30f064f0989..59020545cd609961487cafc4a08c20951a02c8ce 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -95,7 +95,7 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -void SetConfig(AnalysisConfig *cfg) { +void SetConfig(contrib::AnalysisConfig *cfg) { cfg->prog_file = FLAGS_infer_model + "/__model__"; cfg->param_file = FLAGS_infer_model + "/param"; cfg->use_gpu = false; @@ -117,7 +117,7 @@ void SetInput(std::vector> *inputs) { // Easy for profiling independently. TEST(Analyzer_Chinese_ner, profile) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); std::vector outputs; @@ -141,7 +141,7 @@ TEST(Analyzer_Chinese_ner, profile) { // Check the fuse status TEST(Analyzer_Chinese_ner, fuse_statis) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); int num_ops; @@ -155,7 +155,7 @@ TEST(Analyzer_Chinese_ner, fuse_statis) { // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_Chinese_ner, compare) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); std::vector> input_slots_all; diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index 14bdf76efc71b326bd130858ea246be81c9bd45c..3bf5383d8f35347c767d6caee83e0dcc5fb0a446 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -149,7 +149,7 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -void SetConfig(AnalysisConfig *cfg) { +void SetConfig(contrib::AnalysisConfig *cfg) { cfg->prog_file = FLAGS_infer_model + "/__model__"; cfg->param_file = FLAGS_infer_model + "/param"; cfg->use_gpu = false; @@ -172,7 +172,7 @@ void SetInput(std::vector> *inputs) { // Easy for profiling independently. TEST(Analyzer_rnn1, profile) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); std::vector outputs; @@ -183,7 +183,7 @@ TEST(Analyzer_rnn1, profile) { // Check the fuse status TEST(Analyzer_rnn1, fuse_statis) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); int num_ops; @@ -198,7 +198,7 @@ TEST(Analyzer_rnn1, fuse_statis) { // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_rnn1, compare) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); std::vector> input_slots_all; @@ -208,7 +208,7 @@ TEST(Analyzer_rnn1, compare) { // Test Multi-Thread. TEST(Analyzer_rnn1, multi_thread) { - AnalysisConfig cfg; + contrib::AnalysisConfig cfg; SetConfig(&cfg); std::vector outputs; diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..2f71ed46ffc9fd5f853f5b5b46de1446d28b9e69 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -0,0 +1,199 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { + +struct DataRecord { + std::vector> title1_all, title2_all, title3_all, l1_all; + std::vector> title1, title2, title3, l1; + std::vector title1_lod, title2_lod, title3_lod, l1_lod; + size_t batch_iter{0}; + size_t batch_size{1}; + size_t num_samples; // total number of samples + DataRecord() = default; + explicit DataRecord(const std::string &path, int batch_size = 1) + : batch_size(batch_size) { + Load(path); + } + DataRecord NextBatch() { + DataRecord data; + size_t batch_end = batch_iter + batch_size; + // NOTE skip the final batch, if no enough data is provided. + if (batch_end <= title1_all.size()) { + data.title1_all.assign(title1_all.begin() + batch_iter, + title1_all.begin() + batch_end); + data.title2_all.assign(title2_all.begin() + batch_iter, + title2_all.begin() + batch_end); + data.title3_all.assign(title3_all.begin() + batch_iter, + title3_all.begin() + batch_end); + data.l1_all.assign(l1_all.begin() + batch_iter, + l1_all.begin() + batch_end); + // Prepare LoDs + data.title1_lod.push_back(0); + data.title2_lod.push_back(0); + data.title3_lod.push_back(0); + data.l1_lod.push_back(0); + CHECK(!data.title1_all.empty()); + CHECK(!data.title2_all.empty()); + CHECK(!data.title3_all.empty()); + CHECK(!data.l1_all.empty()); + CHECK_EQ(data.title1_all.size(), data.title2_all.size()); + CHECK_EQ(data.title1_all.size(), data.title3_all.size()); + CHECK_EQ(data.title1_all.size(), data.l1_all.size()); + for (size_t j = 0; j < data.title1_all.size(); j++) { + data.title1.push_back(data.title1_all[j]); + data.title2.push_back(data.title2_all[j]); + data.title3.push_back(data.title3_all[j]); + data.l1.push_back(data.l1_all[j]); + // calculate lod + data.title1_lod.push_back(data.title1_lod.back() + + data.title1_all[j].size()); + data.title2_lod.push_back(data.title2_lod.back() + + data.title2_all[j].size()); + data.title3_lod.push_back(data.title3_lod.back() + + data.title3_all[j].size()); + data.l1_lod.push_back(data.l1_lod.back() + data.l1_all[j].size()); + } + } + batch_iter += batch_size; + return data; + } + void Load(const std::string &path) { + std::ifstream file(path); + std::string line; + int num_lines = 0; + while (std::getline(file, line)) { + num_lines++; + std::vector data; + split(line, '\t', &data); + // load title1 data + std::vector title1_data; + split_to_int64(data[0], ' ', &title1_data); + // load title2 data + std::vector title2_data; + split_to_int64(data[1], ' ', &title2_data); + // load title3 data + std::vector title3_data; + split_to_int64(data[2], ' ', &title3_data); + // load l1 data + std::vector l1_data; + split_to_int64(data[3], ' ', &l1_data); + title1_all.push_back(std::move(title1_data)); + title2_all.push_back(std::move(title2_data)); + title3_all.push_back(std::move(title3_data)); + l1_all.push_back(std::move(l1_data)); + } + num_samples = num_lines; + } +}; + +void PrepareInputs(std::vector *input_slots, DataRecord *data, + int batch_size) { + PaddleTensor title1_tensor, title2_tensor, title3_tensor, l1_tensor; + title1_tensor.name = "title1"; + title2_tensor.name = "title2"; + title3_tensor.name = "title3"; + l1_tensor.name = "l1"; + auto one_batch = data->NextBatch(); + int title1_size = one_batch.title1_lod[one_batch.title1_lod.size() - 1]; + title1_tensor.shape.assign({title1_size, 1}); + title1_tensor.lod.assign({one_batch.title1_lod}); + int title2_size = one_batch.title2_lod[one_batch.title2_lod.size() - 1]; + title2_tensor.shape.assign({title2_size, 1}); + title2_tensor.lod.assign({one_batch.title2_lod}); + int title3_size = one_batch.title3_lod[one_batch.title3_lod.size() - 1]; + title3_tensor.shape.assign({title3_size, 1}); + title3_tensor.lod.assign({one_batch.title3_lod}); + int l1_size = one_batch.l1_lod[one_batch.l1_lod.size() - 1]; + l1_tensor.shape.assign({l1_size, 1}); + l1_tensor.lod.assign({one_batch.l1_lod}); + + // assign data + TensorAssignData(&title1_tensor, one_batch.title1); + TensorAssignData(&title2_tensor, one_batch.title2); + TensorAssignData(&title3_tensor, one_batch.title3); + TensorAssignData(&l1_tensor, one_batch.l1); + // Set inputs. + input_slots->assign({title1_tensor, title2_tensor, title3_tensor, l1_tensor}); + for (auto &tensor : *input_slots) { + tensor.dtype = PaddleDType::INT64; + } +} + +void SetConfig(AnalysisConfig *cfg) { + cfg->model_dir = FLAGS_infer_model; + cfg->use_gpu = false; + cfg->device = 0; + cfg->specify_input_name = true; + cfg->enable_ir_optim = true; +} + +void SetInput(std::vector> *inputs) { + DataRecord data(FLAGS_infer_data, FLAGS_batch_size); + std::vector input_slots; + int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1; + LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size; + for (int bid = 0; bid < epoch; ++bid) { + PrepareInputs(&input_slots, &data, FLAGS_batch_size); + (*inputs).emplace_back(input_slots); + } +} + +// Easy for profiling independently. +TEST(Analyzer_seq_conv1, profile) { + AnalysisConfig cfg; + SetConfig(&cfg); + std::vector outputs; + + std::vector> input_slots_all; + SetInput(&input_slots_all); + TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads); + + if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) { + // the first inference result + PADDLE_ENFORCE_EQ(outputs.size(), 1UL); + size_t size = GetSize(outputs[0]); + PADDLE_ENFORCE_GT(size, 0); + float *result = static_cast(outputs[0].data.data()); + // output is probability, which is in (0, 1). + for (size_t i = 0; i < size; i++) { + EXPECT_GT(result[i], 0); + EXPECT_LT(result[i], 1); + } + } +} + +// Check the fuse status +TEST(Analyzer_seq_conv1, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + auto fuse_statis = GetFuseStatis(cfg, &num_ops); +} + +// Compare result of NativeConfig and AnalysisConfig +TEST(Analyzer_seq_conv1, compare) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> input_slots_all; + SetInput(&input_slots_all); + CompareNativeAndAnalysis(cfg, input_slots_all); +} + +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 05cd343433beb6d8fd80915f65b917bb13d345f6..9fcb5129d268a7730c11e5910077ad233050484e 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -38,6 +38,8 @@ DEFINE_bool(use_analysis, true, namespace paddle { namespace inference { +using contrib::AnalysisConfig; + void CompareResult(const std::vector &outputs, const std::vector &ref_outputs) { EXPECT_GT(outputs.size(), 0UL); @@ -45,11 +47,8 @@ void CompareResult(const std::vector &outputs, for (size_t i = 0; i < outputs.size(); i++) { auto &out = outputs[i]; auto &ref_out = ref_outputs[i]; - size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); - size_t ref_size = - std::accumulate(ref_out.shape.begin(), ref_out.shape.end(), 1, - [](int a, int b) { return a * b; }); + size_t size = VecReduceToInt(out.shape); + size_t ref_size = VecReduceToInt(ref_out.shape); EXPECT_GT(size, 0); EXPECT_EQ(size, ref_size); EXPECT_EQ(out.dtype, ref_out.dtype); @@ -77,18 +76,15 @@ void CompareResult(const std::vector &outputs, std::unique_ptr CreateTestPredictor( const AnalysisConfig &config, bool use_analysis = true) { if (use_analysis) { - return CreatePaddlePredictor( - config); + return CreatePaddlePredictor(config); } else { return CreatePaddlePredictor( config); } } -size_t GetSize(const PaddleTensor &out) { - return std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); -} +size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); } std::unordered_map GetFuseStatis(AnalysisConfig config, int *num_ops) { diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 286b03d7b7d11a50f33f0190c1a5b9097ed0f4a2..c091476d6d132db17a656d5c8dee65e3a88d9ac2 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include #include "paddle/fluid/operators/mkldnn_activation_op.h" +#include "paddle/fluid/platform/port.h" namespace paddle { namespace operators { @@ -105,105 +106,105 @@ class ActivationOpGrad : public framework::OperatorWithKernel { } }; -__attribute__((unused)) constexpr char SigmoidDoc[] = R"DOC( +UNUSED constexpr char SigmoidDoc[] = R"DOC( Sigmoid Activation Operator $$out = \frac{1}{1 + e^{-x}}$$ )DOC"; -__attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC( +UNUSED constexpr char LogSigmoidDoc[] = R"DOC( Logsigmoid Activation Operator $$out = \\log \\frac{1}{1 + e^{-x}}$$ )DOC"; -__attribute__((unused)) constexpr char ExpDoc[] = R"DOC( +UNUSED constexpr char ExpDoc[] = R"DOC( Exp Activation Operator. $out = e^x$ )DOC"; -__attribute__((unused)) constexpr char ReluDoc[] = R"DOC( +UNUSED constexpr char ReluDoc[] = R"DOC( Relu Activation Operator. $out = \max(x, 0)$ )DOC"; -__attribute__((unused)) constexpr char TanhDoc[] = R"DOC( +UNUSED constexpr char TanhDoc[] = R"DOC( Tanh Activation Operator. $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"; -__attribute__((unused)) constexpr char TanhShrinkDoc[] = R"DOC( +UNUSED constexpr char TanhShrinkDoc[] = R"DOC( TanhShrink Activation Operator. $$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$ )DOC"; -__attribute__((unused)) constexpr char SqrtDoc[] = R"DOC( +UNUSED constexpr char SqrtDoc[] = R"DOC( Sqrt Activation Operator. $out = \sqrt{x}$ )DOC"; -__attribute__((unused)) constexpr char AbsDoc[] = R"DOC( +UNUSED constexpr char AbsDoc[] = R"DOC( Abs Activation Operator. $out = |x|$ )DOC"; -__attribute__((unused)) constexpr char CeilDoc[] = R"DOC( +UNUSED constexpr char CeilDoc[] = R"DOC( Ceil Activation Operator. $out = ceil(x)$ )DOC"; -__attribute__((unused)) constexpr char FloorDoc[] = R"DOC( +UNUSED constexpr char FloorDoc[] = R"DOC( Floor Activation Operator. $out = floor(x)$ )DOC"; -__attribute__((unused)) constexpr char CosDoc[] = R"DOC( +UNUSED constexpr char CosDoc[] = R"DOC( Cosine Activation Operator. $out = cos(x)$ )DOC"; -__attribute__((unused)) constexpr char SinDoc[] = R"DOC( +UNUSED constexpr char SinDoc[] = R"DOC( Sine Activation Operator. $out = sin(x)$ )DOC"; -__attribute__((unused)) constexpr char RoundDoc[] = R"DOC( +UNUSED constexpr char RoundDoc[] = R"DOC( Round Activation Operator. $out = [x]$ )DOC"; -__attribute__((unused)) constexpr char ReciprocalDoc[] = R"DOC( +UNUSED constexpr char ReciprocalDoc[] = R"DOC( Reciprocal Activation Operator. $$out = \\frac{1}{x}$$ )DOC"; -__attribute__((unused)) constexpr char LogDoc[] = R"DOC( +UNUSED constexpr char LogDoc[] = R"DOC( Log Activation Operator. $out = \ln(x)$ @@ -212,21 +213,21 @@ Natural logarithm of x. )DOC"; -__attribute__((unused)) constexpr char SquareDoc[] = R"DOC( +UNUSED constexpr char SquareDoc[] = R"DOC( Square Activation Operator. $out = x^2$ )DOC"; -__attribute__((unused)) constexpr char SoftplusDoc[] = R"DOC( +UNUSED constexpr char SoftplusDoc[] = R"DOC( Softplus Activation Operator. $out = \ln(1 + e^{x})$ )DOC"; -__attribute__((unused)) constexpr char SoftsignDoc[] = R"DOC( +UNUSED constexpr char SoftsignDoc[] = R"DOC( Softsign Activation Operator. $$out = \frac{x}{1 + |x|}$$ diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc index b98190d40a2afa684cfd29cc52fc29fac851cca7..4cc980b41b34894f9d915d4b325887548091c0eb 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc @@ -23,8 +23,6 @@ namespace operators { using Tensor = framework::Tensor; using LoDTensor = framework::LoDTensor; -static constexpr int kROISize = 4; - template bool GT_E(T a, T b) { return (a > b) || fabs(a - b) < 1e-4; diff --git a/paddle/fluid/operators/pool_mkldnn_op.cc b/paddle/fluid/operators/pool_mkldnn_op.cc index 5341187d1ce9400ac34750ab691608e76158ae0d..56cef91e29cc7da27384c27a7ec63e90cfadfc3b 100644 --- a/paddle/fluid/operators/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/pool_mkldnn_op.cc @@ -46,6 +46,25 @@ static std::string gethash(const memory::dims& input_dims, dims2str(paddings) + pooling_type + suffix; } +static inline int ComputeCeiledOutput(int input_size, int kernel_size, + int padding, int stride) { + return (input_size - kernel_size + 2 * padding) / stride + 1; +} + +static inline void CorrectOutputSize( + const std::vector& src_tz, const std::vector& dst_tz, + const std::vector& kernel_size, const std::vector& paddings, + const std::vector& strides, + std::vector& right_bot_padding) { // NOLINT + for (size_t i = 0; i < right_bot_padding.size(); i++) { + int desired_size = ComputeCeiledOutput(src_tz[i + 2], kernel_size[i], + paddings[i], strides[i]); + if (desired_size != dst_tz[i + 2]) { + right_bot_padding[i] += strides[i]; + } + } +} + template class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { public: @@ -103,6 +122,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { auto pool_p = std::static_pointer_cast(dev_ctx.GetBlob(key_pool_p)); if (pool_p == nullptr) { + const std::vector& padding_left_top(paddings); + std::vector padding_right_bottom(paddings); + bool ceil_mode = ctx.Attr("ceil_mode"); + if (ceil_mode) { + CorrectOutputSize(src_tz, dst_tz, ksize, paddings, strides, + padding_right_bottom); + } auto src_md = platform::MKLDNNMemDesc( src_tz, platform::MKLDNNGetDataType(), input_format); @@ -114,8 +140,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { mkldnn::memory::format::any); std::shared_ptr pool_pd = - CreatePrimitiveDesc(src_md, dst_md, strides, paddings, ksize, - pooling_type, mkldnn_engine); + CreatePrimitiveDesc(src_md, dst_md, strides, padding_left_top, + padding_right_bottom, ksize, pooling_type, + mkldnn_engine, ceil_mode); // save pool_pd into global device context to be referred in backward path dev_ctx.SetBlob(key_pool_pd, pool_pd); @@ -171,14 +198,16 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel { private: std::unique_ptr CreatePrimitiveDesc( const mkldnn::memory::desc& src, const mkldnn::memory::desc& dst, - const std::vector& stride, const std::vector& padding, - const std::vector& kernel, const std::string& pooling_type, - const mkldnn::engine& engine) const { + const std::vector& stride, const std::vector& padding_left_top, + const std::vector& padding_right_bot, const std::vector& kernel, + const std::string& pooling_type, const mkldnn::engine& engine, + bool ceil_mode) const { auto pool_desc = mkldnn::pooling_forward::desc( mkldnn::prop_kind::forward, pooling_type == "max" ? mkldnn::algorithm::pooling_max : mkldnn::algorithm::pooling_avg, - src, dst, stride, kernel, padding, padding, mkldnn::padding_kind::zero); + src, dst, stride, kernel, padding_left_top, padding_right_bot, + mkldnn::padding_kind::zero); auto p_pool_pd = new mkldnn::pooling_forward::primitive_desc(pool_desc, engine); diff --git a/paddle/fluid/operators/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt_engine_op.h index d4ba0f9c33c91811647f9d19a332f139c16b0eb2..3c78c29c1a30d74947be84cd2b52ad308e732a2d 100644 --- a/paddle/fluid/operators/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt_engine_op.h @@ -34,7 +34,7 @@ namespace operators { using FluidDT = framework::proto::VarType_Type; using TRT_DT = nvinfer1::DataType; -namespace { +namespace { // NOLINT TRT_DT FluidDataType2TRT(FluidDT type) { switch (type) { diff --git a/paddle/fluid/operators/top_k_op.cc b/paddle/fluid/operators/top_k_op.cc index 4a8ac441cfaf642fde58ee30865a22e83c065498..92a0697e27ba0da66fa3b0f5380e7bd52575640d 100644 --- a/paddle/fluid/operators/top_k_op.cc +++ b/paddle/fluid/operators/top_k_op.cc @@ -30,6 +30,8 @@ class TopkOp : public framework::OperatorWithKernel { "Output(Indices) of TopkOp should not be null."); auto input_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(input_dims.size(), 2, + "Rank of TopK op's input must be 2."); const int k = static_cast(ctx->Attrs().Get("k")); PADDLE_ENFORCE_GE(k, 1, "k must >= 1"); diff --git a/paddle/fluid/platform/cudnn_helper_test.cc b/paddle/fluid/platform/cudnn_helper_test.cc index 517df6863499f20d7b66d15ef114a689700be5b2..28edfd2e50237c887dbeb7ac73e1f990ce239a9c 100644 --- a/paddle/fluid/platform/cudnn_helper_test.cc +++ b/paddle/fluid/platform/cudnn_helper_test.cc @@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#define GLOG_NO_ABBREVIATED_SEVERITIES +#define GOOGLE_GLOG_DLL_DECL + #include "paddle/fluid/platform/cudnn_helper.h" #include diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 61a653d9313daff96d39c08e80f17d7e33acceb1..f04395a8ac00f33501008aa12f22773ddda9b138 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -21,6 +21,7 @@ limitations under the License. */ #if defined(_WIN32) #define NOMINMAX // msvc max/min macro conflict with std::min/max #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h +#define GOOGLE_GLOG_DLL_DECL #endif #ifdef PADDLE_WITH_CUDA @@ -47,7 +48,7 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/dynload/curand.h" -#if !defined(__APPLE__) and !defined(_WIN32) +#if !defined(__APPLE__) && !defined(_WIN32) #include "paddle/fluid/platform/dynload/nccl.h" #endif // __APPLE__ #endif // PADDLE_WITH_CUDA @@ -216,7 +217,7 @@ inline typename std::enable_if::type throw_on_error( #endif } -#if !defined(__APPLE__) and !defined(_WIN32) +#if !defined(__APPLE__) && !defined(_WIN32) template inline typename std::enable_if::type throw_on_error( ncclResult_t stat, const Args&... args) { @@ -260,14 +261,8 @@ inline void throw_on_error(T e) { } \ } while (false) -#define PADDLE_THROW_EOF() \ - do { \ - throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \ - __LINE__); \ - } while (false) - #else -#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__) +#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__); #endif // REPLACE_ENFORCE_GLOG #else // !_WIN32 @@ -281,6 +276,12 @@ inline void throw_on_error(T e) { #define PADDLE_ENFORCE(x, ...) x #endif // !_WIN32 +#define PADDLE_THROW_EOF() \ + do { \ + throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \ + __LINE__); \ + } while (false) + /* * Some enforce helpers here, usage: * int a = 1; @@ -294,7 +295,7 @@ inline void throw_on_error(T e) { * extra messages is also supported, for example: * PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2) */ - +#if !defined(_WIN32) #define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \ __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__) #define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \ @@ -307,6 +308,7 @@ inline void throw_on_error(T e) { __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__) #define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \ __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__) + #define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \ do { \ if (UNLIKELY(nullptr == (__VAL))) { \ @@ -326,6 +328,27 @@ inline void throw_on_error(T e) { paddle::string::Sprintf("" __VA_ARGS__)); \ } \ } while (0) +#else +#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) ((__VAL0) == (__VAL1)) +#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) ((__VAL0) != (__VAL1)) +#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) ((__VAL0) > (__VAL1)) +#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) ((__VAL0) >= (__VAL1)) +#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) ((__VAL0) < (__VAL1)) +#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) ((__VAL0) <= (__VAL1)) + +#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \ + do { \ + if (!((__VAL0)__CMP(__VAL1))) { \ + PADDLE_THROW("Windows disable the enforce. Enforce failed."); \ + } \ + } while (0) +#define PADDLE_ENFORCE_NOT_NULL(__VAL1, ...) \ + do { \ + if (nullptr == (__VAL1)) { \ + PADDLE_THROW("Windows disable the enforce. Enforce failed"); \ + } \ + } while (0) +#endif // !_WIN32 } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/init.h b/paddle/fluid/platform/init.h index 0e30594672927253cc8083dcb88bb867d63ec729..992ca5e6f6a966a331616a698e3bebd2eee129d5 100644 --- a/paddle/fluid/platform/init.h +++ b/paddle/fluid/platform/init.h @@ -16,6 +16,9 @@ limitations under the License. */ #include #include +#define GLOG_NO_ABBREVIATED_SEVERITIES +#define GOOGLE_GLOG_DLL_DECL + #include "gflags/gflags.h" #include "glog/logging.h" diff --git a/paddle/fluid/pybind/const_value.cc b/paddle/fluid/pybind/const_value.cc index 1f61a0e289f32196ead04d71d07b513cbe4655b1..882e6332e8174b59eb6e19e788c8cced808d552c 100644 --- a/paddle/fluid/pybind/const_value.cc +++ b/paddle/fluid/pybind/const_value.cc @@ -48,6 +48,9 @@ void BindConstValue(pybind11::module* m) { op_proto_and_checker_maker.def( "kOpNameScopeAttrName", framework::OpProtoAndCheckerMaker::OpNamescopeAttrName); + op_proto_and_checker_maker.def( + "kOpCreationCallstackAttrName", + framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName); } } // namespace pybind diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index f50a68c54114d5cce15418ad22f38c83163ba866..e6a9524382be219e550017ed4f1a6070dca22fbf 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -147,6 +147,7 @@ function cmake_gen() { -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} -DPY_VERSION=${PY_VERSION:-2.7} + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} ======================================== EOF # Disable UNITTEST_USE_VIRTUALENV in docker because @@ -178,7 +179,8 @@ EOF -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \ -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \ -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \ - -DPY_VERSION=${PY_VERSION:-2.7} + -DPY_VERSION=${PY_VERSION:-2.7} \ + -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build} } @@ -361,7 +363,7 @@ EOF ctest --output-on-failure # make install should also be test when unittest make install -j `nproc` - pip install /usr/local/opt/paddle/share/wheels/*.whl + pip install ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]] ; then paddle version fi diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 9e4a5ae8baaf7f2975c8060856f9eecab55f241c..7bbdf7de89cc932e0023952e3c8e102f92b06855 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -19,17 +19,8 @@ from .framework import * # import all class inside executor into fluid module from . import executor from .executor import * - from . import trainer -from .trainer import Trainer -from .trainer import BeginEpochEvent -from .trainer import EndEpochEvent -from .trainer import BeginStepEvent -from .trainer import EndStepEvent -from .trainer import CheckpointConfig - from . import inferencer -from .inferencer import Inferencer from . import io from . import evaluator diff --git a/python/paddle/fluid/contrib/inferencer.py b/python/paddle/fluid/contrib/inferencer.py new file mode 100644 index 0000000000000000000000000000000000000000..b8d5f4ffeadca0a7b103682f175d50dc46fa258a --- /dev/null +++ b/python/paddle/fluid/contrib/inferencer.py @@ -0,0 +1,112 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import contextlib + +from .. import core + +from .. import executor +from .. import framework +from .. import io +from .. import parallel_executor +from .. import unique_name +from .trainer import check_and_get_place + +__all__ = ['Inferencer', ] + + +class Inferencer(object): + """ + Inferencer High Level API. + + Args: + infer_func (Python func): Infer function that will return predict Variable + param_path (str): The path where the inference model is saved by fluid.io.save_params + place (Place): place to do the inference + parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU. + + Examples: + .. code-block:: python + + def inference_program(): + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + y_predict = fluid.layers.fc(input=x, size=1, act=None) + return y_predict + + place = fluid.CPUPlace() + inferencer = fluid.Inferencer( + infer_func=inference_program, param_path="/tmp/model", place=place) + + """ + + def __init__(self, infer_func, param_path, place=None, parallel=False): + self.param_path = param_path + self.scope = core.Scope() + self.parallel = parallel + self.place = check_and_get_place(place) + + self.inference_program = framework.Program() + with framework.program_guard(self.inference_program): + with unique_name.guard(): + self.predict_var = infer_func() + + with self._prog_and_scope_guard(): + # load params from param_path into scope + io.load_params(executor.Executor(self.place), param_path) + + if parallel: + with self._prog_and_scope_guard(): + self.exe = parallel_executor.ParallelExecutor( + use_cuda=isinstance(self.place, core.CUDAPlace), + loss_name=self.predict_var.name) + else: + self.exe = executor.Executor(self.place) + + self.inference_program = self.inference_program.clone(for_test=True) + + def infer(self, inputs, return_numpy=True): + """ + Do Inference for Inputs + + Args: + inputs (map): a map of {"input_name": input_var} that will be feed into the inference program + return_numpy (bool): transform return value into numpy or not + + Returns: + Tensor or Numpy: the predict value of the inference model for the inputs + + Examples: + .. code-block:: python + + tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") + results = inferencer.infer({'x': tensor_x}) + """ + if not isinstance(inputs, dict): + raise ValueError( + "inputs should be a map of {'input_name': input_var}") + + with self._prog_and_scope_guard(): + results = self.exe.run(feed=inputs, + fetch_list=[self.predict_var.name], + return_numpy=return_numpy) + + return results + + @contextlib.contextmanager + def _prog_and_scope_guard(self): + with framework.program_guard(main_program=self.inference_program): + with executor.scope_guard(self.scope): + yield diff --git a/python/paddle/fluid/contrib/trainer.py b/python/paddle/fluid/contrib/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..8569e486f91786b5562e84dcdccf6d91da0612cc --- /dev/null +++ b/python/paddle/fluid/contrib/trainer.py @@ -0,0 +1,1258 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import contextlib +import os +import errno +import shutil +import six +import time + +from .. import core +from .. import data_feeder +from .. import executor +from .. import framework +from .. import io +# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module +from .. import optimizer as opt_module +from .. import parallel_executor +from ..transpiler import distribute_transpiler + +__all__ = [ + 'Trainer', 'BeginEpochEvent', 'EndEpochEvent', 'BeginStepEvent', + 'EndStepEvent', 'CheckpointConfig' +] + + +class BeginEpochEvent(object): + """ + The begin of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + """ + + def __init__(self, epoch_id): + self.epoch = epoch_id + + +class EndEpochEvent(object): + """ + The end of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + """ + + def __init__(self, epoch_id): + self.epoch = epoch_id + + +class BeginStepEvent(object): + """ + The begin of a training epoch. + + Args: + epoch_id(int): The current epoch ID. + step_id(int): The current step ID. + """ + + def __init__(self, epoch_id, step_id): + self.epoch = epoch_id + self.step = step_id + self.fetch_metrics = True + """ + If fetch_metrics is true, the metrics will be fetched at the + EndStepEvent. Default is True. + """ + + +class EndStepEvent(object): + """ + The end of a training step. + + Args: + epoch_id(int): The current epoch ID. + step_id(int): The current step ID. + metrics(list): A list of fetched tensor. The order of this list is same + as the :code:`train_func` returns. + """ + + def __init__(self, epoch_id, step_id, metrics): + self.epoch = epoch_id + self.step = step_id + self.metrics = metrics + + +class CheckpointConfig(object): + """ + Parameter object for :code:`save_checkpoint` and + :code:`fluid.Trainer`. Used to configuration how to save checkpoint. + + Args: + checkpoint_dir(str): Directory path to save check point. Default is the + current directory. + + max_num_checkpoints(int): The max number of local check points. + epoch_interval(int): Every number of epoch to save check point. + step_interval(int): Every number of step to save check point. + + Examples: + >>> config = fluid.CheckpointConfig("./checkpoints") + >>> trainer = fluid.Trainer(train_func=train_program, + >>> place=place, + >>> optimizer_func=optimizer_func, + >>> checkpoint_config=config) + >>> trainer.train(...) + """ + + def __init__(self, + checkpoint_dir=None, + max_num_checkpoints=3, + epoch_interval=1, + step_interval=10): + + assert epoch_interval >= 1 + assert step_interval >= 1 + + self.checkpoint_dir = checkpoint_dir \ + if checkpoint_dir is not None else os.getcwd() + self.max_num_checkpoints = max_num_checkpoints + self.epoch_interval = epoch_interval + self.step_interval = step_interval + self.epoch_id = 0 + self.step_id = 0 + self.load_serial = None + self.pserver_id = None + self.lookup_table_name = None + + +def check_and_get_place(place): + """ + Check the type of place or get the default place + Args: + place(None|core.CUDAPlace|core.CPUPlace): the place that trainer will be executed on. + + Raises: + TypeError if the type mismatched. + + Returns: + the original place if it is not None. + if fluid is compiled with CUDA, returns CUDAPlace(0) by default. + Otherwise returns CPUPlace by default. + """ + if place is None: + if core.is_compiled_with_cuda(): + return core.CUDAPlace(0) + else: + return core.CPUPlace() + else: + if not isinstance(place, core.CUDAPlace) and not isinstance( + place, core.CPUPlace): + raise TypeError("Place should be either CUDAPlace or CPUPlace") + return place + + +class Trainer(object): + """ + A trainer wraps MultiGPU/MultiNode training loops and can be used to train a + simple neural network easily. + + This API takes a :code:`train_func`. A :code:`train_func` is a function that + return loss as it first return value. The reset value can be fetched by + EndStepEvent.metrics + + This API also takes a :code:`optimizer_func` that will return an optimizer + instance. + + For example, to train a MLP for MNIST dataset, the sample program is + + >>> import paddle.fluid as fluid + >>> + >>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10): + >>> hidden = image + >>> for layer_size in layer_sizes: + >>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation) + >>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax") + >>> + >>> def train_mnist_mlp(): + >>> img = fluid.layers.data(name='image', shape=[784]) + >>> label = fluid.layers.data(name='label', shape=[1], dtype='int64') + >>> prediction = mlp(img) + >>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label)) + >>> + >>> def optimizer(): + >>> return fluid.optimizer.Adam() + >>> + >>> trainer = Trainer(train_func=train_mnist_mlp, + >>> optimizer_func=optimizer, + >>> place=fluid.CUDAPlace(0), + >>> parallel=True) + >>> + >>> def train_callback(event): + >>> if isinstance(event, fluid.EndStepEvent): + >>> print "Epoch ID", event.epoch, "Step ID",\ + >>> event.step, "AvgLoss", event.metrics[0] + >>> elif isinstance(event, fluid.EndEpochEvent): + >>> trainer.save_params("./model_{0}".format(event.epoch)) + >>> + >>> trainer.train(num_epochs=100, event_handler=train_callback) + + For more example, please see :ref:`api_guide_high_level_api`. + + + Args: + train_func(callable): A function which will return loss. The loss must be + a scalar tensor. + optimizer_func(callable): A function that returns an Optimizer object. + place(CUDAPlace|CPUPlace): The device place of this trainer. If + :code:`parallel=True,` all CUDA Places will be used if :code:`place` + is a :code:`CUDAPlace`. + parallel(bool): True if use multiple devices. + checkpoint_config(CheckpointConfig): Configuration about how to save + checkpoints. + """ + + def __init__(self, + train_func, + optimizer_func, + param_path=None, + place=None, + parallel=False, + checkpoint_config=None): + self.__stop = False + self.parallel = parallel + + # config for checkpoint + # only chief worker will save variables + self.trainer_id = 0 + self.checkpoint_cfg = checkpoint_config + if self.checkpoint_cfg: + assert isinstance(self.checkpoint_cfg, CheckpointConfig) + serial = _get_latest_checkpoint_serial( + self.checkpoint_cfg.checkpoint_dir) + self.checkpoint_cfg.load_serial = serial if serial >= 0 else None + + self.scope = core.Scope() + + # 1. we need to generate a framework.Program by calling + # program_func. Reference: fluid.program_guard in + # test_word2vec.py + + self.startup_program = framework.Program() + self.train_program = framework.Program() + + with framework.program_guard(self.train_program, self.startup_program): + program_func_outs = train_func() + self.train_func_outputs = program_func_outs if isinstance( + program_func_outs, list) else [program_func_outs] + self.test_program = self.train_program.clone(for_test=True) + + # The first element of program_func_outs is loss. + loss = self.train_func_outputs[0] + + optimizer = optimizer_func() + if not isinstance(optimizer, opt_module.Optimizer): + raise TypeError( + "The optimizer should be an instance of Optimizer") + optimize_ops, params_grads = optimizer.minimize(loss) + + self.place = check_and_get_place(place) + + self._dist_transpile_if_necessary(optimize_ops, params_grads) + + # 2. move the default_main_program to self.program and run the + # default_startup program on an empty core.Scope() + # Run startup program + with self._prog_and_scope_guard(): + exe = executor.Executor(place) + exe.run(self.startup_program) + + if self.checkpoint_cfg and self.checkpoint_cfg.load_serial is not None: + self._load_checkpoint() + + if param_path and os.path.isdir(param_path): + with self._prog_and_scope_guard(): + # load params from param_path into scope + io.load_persistables( + executor=exe, + dirname=param_path, + main_program=self.startup_program) + + def _transpile_nccl2_dist(self): + # PADDLE_TRAINER_IPS + if "PADDLE_TRAINER_IPS" not in os.environ: + self.nccl_id_var = None + else: + self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) + port = os.getenv("PADDLE_PSERVER_PORT") + worker_ips = os.getenv("PADDLE_TRAINER_IPS") + worker_endpoints = [] + for ip in worker_ips.split(","): + worker_endpoints.append(':'.join([ip, port])) + self.num_trainers = len(worker_endpoints) + current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port + worker_endpoints.remove(current_endpoint) + # TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id + # in ParallelExecutor to start + # distributed training using NCCL2 + self.nccl_id_var = self.startup_program.global_block().create_var( + name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW) + self.startup_program.global_block().append_op( + type="gen_nccl_id", + inputs={}, + outputs={"NCCLID": self.nccl_id_var}, + attrs={ + "endpoint": current_endpoint, + "endpoint_list": worker_endpoints, + "trainer_id": self.trainer_id + }) + + def _dist_transpile_if_necessary(self, optimize_ops, params_grads): + self._transpile_nccl2_dist() + if self.nccl_id_var != None: + return + + if "PADDLE_TRAINING_ROLE" not in os.environ: + return + + # the port of all pservers, needed by both trainer and pserver + port = os.getenv("PADDLE_PSERVER_PORT", "6174") + # comma separated ips of all pservers, needed by trainer and + # pserver + pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "") + eplist = [] + for ip in pserver_ips.split(","): + eplist.append(':'.join([ip, port])) + pserver_endpoints = ",".join(eplist) + # total number of workers/trainers in the job, needed by + # trainer and pserver + trainers = int(os.getenv("PADDLE_TRAINERS")) + # the IP of the local machine, needed by pserver only + current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port + # the unique trainer id, starting from 0, needed by trainer + # only + self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + + # the role, should be either PSERVER or TRAINER + training_role = os.getenv("PADDLE_TRAINING_ROLE") + with self._prog_and_scope_guard(): + t = distribute_transpiler.DistributeTranspiler() + t.transpile( + self.trainer_id, pservers=pserver_endpoints, trainers=trainers) + if training_role == "PSERVER": + if self.checkpoint_cfg: + pserver_id = eplist.index(current_endpoint) + self.checkpoint_cfg.pserver_id = pserver_id + if t.has_distributed_lookup_table: + self.checkpoint_cfg.lookup_table_name = t.table_name + + self.train_program = t.get_pserver_program(current_endpoint) + self.startup_program = t.get_startup_program(current_endpoint, + self.train_program) + elif training_role == "TRAINER": + self.train_program = t.get_trainer_program() + else: + raise ValueError( + 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' + ) + + def stop(self): + """ + stop training + """ + self.__stop = True + + def train(self, num_epochs, event_handler, reader=None, feed_order=None): + """ + Start the train loop to train the model. + + Args: + num_epochs(int): The number of epoch. An epoch will process all data in reader + event_handler(callable): The event handler. A function with type (ev:Event)->void + reader(callable): A reader creator object. See also + :ref:`api_guide_python_reader` . + feed_order(list): Feeding order of reader. None will following the defining + order in program + + Returns: + None + """ + training_role = os.getenv("PADDLE_TRAINING_ROLE", "") + if training_role == "PSERVER": + with self._prog_and_scope_guard(): + exe = executor.Executor(self.place) + exe.run() + return + if self.parallel: + self._train_by_parallel_executor(num_epochs, event_handler, reader, + feed_order) + else: + self._train_by_executor(num_epochs, event_handler, reader, + feed_order) + + def test(self, reader, feed_order): + """ + Test the model on given test data + + Args: + reader(callable): The reader that yields test data. + feed_order(list): Feeding order of reader. None will following the + defining order in program + """ + + return self._test_by_executor(reader, feed_order, + self.train_func_outputs) + + def save_params(self, param_path): + """ + Save all parameters into :code:`param_path`. + + Args: + param_path(str): The path to save parameters. + + Returns: + None + """ + with self._prog_and_scope_guard(): + exe = executor.Executor(self.place) + io.save_persistables(exe, dirname=param_path) + + def save_inference_model(self, param_path, feeded_var_names, + target_var_indexes): + """ + Save model for cpp inference into :code:`param_path`. + + Args: + param_path(str): The path to save parameters. + feeded_var_names(list(str)): The name of the vars that you + need to feed in before run program. + target_var_indexes(list(int)): the index of target var that + you need to return in trainer.train_func. + Returns: + None + """ + with self._prog_and_scope_guard(): + exe = executor.Executor(self.place) + target_vars = [ + self.train_func_outputs[index] for index in target_var_indexes + ] + io.save_inference_model(param_path, feeded_var_names, target_vars, + exe) + + @contextlib.contextmanager + def _prog_and_scope_guard(self): + with framework.program_guard( + main_program=self.train_program, + startup_program=self.startup_program): + with executor.scope_guard(self.scope): + yield + + def _train_by_executor(self, num_epochs, event_handler, reader, feed_order): + """ + Train by Executor and single device. + + Args: + num_epochs: + event_handler: + reader: + feed_order: + + Returns: + + """ + with self._prog_and_scope_guard(): + feed_var_list = build_feed_var_list(self.train_program, feed_order) + feeder = data_feeder.DataFeeder( + feed_list=feed_var_list, place=self.place) + exe = executor.Executor(self.place) + reader = feeder.decorate_reader(reader, multi_devices=False) + self._train_by_any_executor(event_handler, exe, num_epochs, reader) + + def _train_by_any_executor(self, event_handler, exe, num_epochs, reader): + if self.checkpoint_cfg: + epochs = [ + epoch_id for epoch_id in range(num_epochs) + if epoch_id >= self.checkpoint_cfg.epoch_id + ] + else: + epochs = [epoch_id for epoch_id in range(num_epochs)] + + for epoch_id in epochs: + event_handler(BeginEpochEvent(epoch_id)) + for step_id, data in enumerate(reader()): + if self.__stop: + if self.checkpoint_cfg: + self._clean_checkpoint() + return + + if self.checkpoint_cfg and self.checkpoint_cfg.load_serial \ + and self.checkpoint_cfg.step_id >= step_id and self.checkpoint_cfg.epoch_id == epoch_id: + continue + + begin_event = BeginStepEvent(epoch_id, step_id) + event_handler(begin_event) + if begin_event.fetch_metrics: + metrics = exe.run(feed=data, + fetch_list=[ + var.name + for var in self.train_func_outputs + ]) + else: + metrics = exe.run(feed=data, fetch_list=[]) + + if self.checkpoint_cfg: + self._save_checkpoint(epoch_id, step_id) + event_handler(EndStepEvent(epoch_id, step_id, metrics)) + event_handler(EndEpochEvent(epoch_id)) + if self.checkpoint_cfg: + self._clean_checkpoint() + + def _test_by_executor(self, reader, feed_order, fetch_list): + with executor.scope_guard(self.scope): + feed_var_list = build_feed_var_list(self.test_program, feed_order) + feeder = data_feeder.DataFeeder( + feed_list=feed_var_list, place=self.place) + exe = executor.Executor(self.place) + accumulated = len(fetch_list) * [0] + count = 0 + for data in reader(): + outs = exe.run(program=self.test_program, + feed=feeder.feed(data), + fetch_list=fetch_list) + accumulated = [x[0] + x[1][0] for x in zip(accumulated, outs)] + count += 1 + + return [x / count for x in accumulated] + + def _train_by_parallel_executor(self, num_epochs, event_handler, reader, + feed_order): + with self._prog_and_scope_guard(): + pe = self._get_or_create_parallel_executor() + feed_var_list = build_feed_var_list(self.train_program, feed_order) + feeder = data_feeder.DataFeeder( + feed_list=feed_var_list, place=self.place) + reader = feeder.decorate_reader(reader, multi_devices=True) + self._train_by_any_executor(event_handler, pe, num_epochs, reader) + + def _get_parallel_executor(self): + return getattr(self, 'parallel_executor', None) + + def _get_or_create_parallel_executor(self): + if self._get_parallel_executor() is None: + self.parallel_executor = parallel_executor.ParallelExecutor( + use_cuda=isinstance(self.place, core.CUDAPlace), + loss_name=self.train_func_outputs[0].name) + return self._get_parallel_executor() + + def _clean_checkpoint(self): + assert self.checkpoint_cfg + clean_checkpoint(checkpoint_dir=self.checkpoint_cfg.checkpoint_dir) + + def _get_checkpoint_load_args(self): + """ + epoch_id and step_id are runtime arguments, they are not variables, will load them independently. + """ + return ["epoch_id", "step_id"] + + def _get_checkpoint_save_args(self, epoch_id, step_id): + """ + epoch_id and step_id are runtime arguments, they are not variables, will save them independently. + """ + trainer_args = {} + trainer_args["epoch_id"] = epoch_id + trainer_args["step_id"] = step_id + return trainer_args + + def _save_checkpoint(self, epoch_id, step_id): + assert self.checkpoint_cfg + + if epoch_id % self.checkpoint_cfg.epoch_interval == 0 \ + and step_id % self.checkpoint_cfg.step_interval == 0: + exe = executor.Executor(self.place) + save_checkpoint( + executor=exe, + checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, + trainer_id=self.trainer_id, + trainer_args=self._get_checkpoint_save_args(epoch_id, step_id), + main_program=self.train_program, + max_num_checkpoints=self.checkpoint_cfg.max_num_checkpoints) + + def _load_checkpoint(self): + with self._prog_and_scope_guard(): + exe = executor.Executor(self.place) + load_checkpoint( + executor=exe, + checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, + main_program=self.startup_program) + + if not self.checkpoint_cfg.pserver_id: + load_trainer_args = self._get_checkpoint_load_args() + trainer_args = load_checkpoint( + executor=exe, + checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, + main_program=self.startup_program, + role_id=self.trainer_id, + is_trainer=True, + load_trainer_args=load_trainer_args) + + if len(trainer_args) != 2: + raise ValueError( + "the return trainer_args length do not equal _get_checkpoint_load_args" + ) + self.checkpoint_cfg.epoch_id = int(trainer_args[0]) + self.checkpoint_cfg.step_id = int(trainer_args[1]) + else: + if self.checkpoint_cfg.lookup_table_name: + load_checkpoint( + executor=exe, + checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, + main_program=self.startup_program, + role_id=self.checkpoint_cfg.pserver_id, + is_trainer=False, + load_trainer_args=None, + load_lookup_table=self.checkpoint_cfg.lookup_table_name) + + +def build_feed_var_list(program, feed_order): + if not isinstance(program, framework.Program): + raise TypeError("The 'program' should be an object of Program") + + if isinstance(feed_order, list): + feed_var_list = [ + program.global_block().var(var_name) for var_name in feed_order + ] + else: + if not isinstance(feed_order, dict): + raise TypeError( + "The 'feed_order' should be either None, list or dict.") + if not sorted(feed_order.values()) == list(range(len(feed_order))): + raise ValueError( + "The values of 'feed_order' should be a permutation of [0, len(feed_order))" + ) + sorted_pair_list = sorted( + six.iteritems(feed_order), key=lambda item: item[1]) + feed_var_list = [ + program.global_block().var(pair[0]) for pair in sorted_pair_list + ] + return feed_var_list + + +# move Checkpoint APIs from io.py to trainer.py, make all of them are private. +SUCCESS_MARK_FILENAME = "_SUCCESS" +CHECKPOINT_PREFIX = "checkpoint" +MODEL_DIR = "__model__" +LOOKUP_TABLE_DIR = "__lookup_table__" +TRAINER_PREFIX = "trainer" +CHECKPOINT_SEPARATOR = "_" + + +def save_checkpoint(executor, + checkpoint_dir, + trainer_id, + main_program, + trainer_args=None, + max_num_checkpoints=3, + lookup_table=None, + pserver_endpoints=None): + """ + This function filters out all checkpoint variables from the give + main_program and then saves these variables to the `checkpoint_dir` + directory. + + In the training precess, we generally save a checkpoint in each + iteration. So there might be a lot of checkpoints in the + `checkpoint_dir`. To avoid them taking too much disk space, the + `max_num_checkpoints` are introduced to limit the total number of + checkpoints. If the number of existing checkpints is greater than + the `max_num_checkpoints`, oldest ones will be scroll deleted. + + A variable is a checkpoint variable and will be saved if it meets + all following conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for save checkpoint. + checkpoint_dir(str): The folder where to save checkpoints. + trainer_id(int): currect trainer id, if id is equal to 0, the trainer + is chief. + trainer_args(dict|None): Current training arguments. Such as 'epoch_id' + and 'step_id'. + Defaut: None + main_program(Program): The program whose checkpoint variables will + be saved. + max_num_checkpoints(int): The max number of total number of existing + checkpoints. + Default: 3 + lookup_table(string|None): the lookup table name, when use distribute + lookup table, we can get lookup table name by DistributeTranspiler. + table_name + pserver_endpoints(list|None): the parameter server ip:port list. + when use distribute lookup table, we can get pserver_endpoints by + distribute arguments. + + Returns: + None + + Raises: + ValueError: If `checkpoint_dir` is None. + AssertionError: If `trainer_args` is not a dict. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./checkpoints" + prog = fluid.default_main_program() + trainer_args = {"epoch_id": 200, + "step_id": 20} # just an example + table_name = "share_w" + ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] + + save_checkpoint(executor=exe, + checkpoint_dir=path, + trainer_id=0, + trainer_args=trainer_args, + main_program=prog, + max_num_checkpoints=3, + lookup_table=table_name, + pserver_endpoints = ps_endpoints) + """ + if checkpoint_dir is None: + raise ValueError("'checkpoint_dir' should not be None") + + if main_program is None: + raise ValueError('main_program should not be None.') + + if trainer_args: + assert isinstance(trainer_args, dict) + + is_chief = trainer_id == 0 + + _make_chekcpoint_dirs(checkpoint_dir) + serial = _get_latest_checkpoint_serial(checkpoint_dir) + 1 + cur_dir = _get_serial_dir(checkpoint_dir, serial) + + _save_trainer_args(cur_dir, trainer_id, trainer_args) + + if is_chief: + _save_persist_vars_without_grad(executor, cur_dir, main_program) + + if is_chief and lookup_table and pserver_endpoints: + _save_pserver_vars_by_notify(executor, cur_dir, lookup_table, + pserver_endpoints) + + _scroll_delete(checkpoint_dir, max_num_checkpoints) + + +def load_checkpoint(executor, + checkpoint_dir, + main_program, + role_id=0, + is_trainer=True, + load_trainer_args=None, + load_lookup_table=None): + """ + This function filters out all checkpoint variables from the give + main_program and then try to load these variables from the + `checkpoint_dir` directory. + + In the training precess, we generally save a checkpoint in each + iteration. So there are more than one checkpoint in the + `checkpoint_dir` (each checkpoint has its own sub folder), use + `serial` to specify which serial of checkpoint you would like to + load. + + A variable is a checkpoint variable and will be loaded if it meets + all following conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for loading checkpoint. + checkpoint_dir(str): The folder where all checkpoints are. + serial(int): The serial of checkpoint you would like to load. + main_program(Program): The program whose checkpoint variables will + be loaded. + role_id(int): the trainer id or the parameter server id. + is_trainer(bool): trainer is True and parameter server is False. + load_trainer_args(list|None): list about load trainer args. + load_lookup_table(str|None): the lookup table name + + Returns: + None + + Raises: + ValueError: If `checkpoint_dir` is None. + ValueError: If `main_program` is None. + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + path = "./checkpoints" + prog = fluid.default_main_program() + load_checkpoint(executor=exe, checkpoint_dir=path, + serial=9, main_program=prog) + + # In this example, `load_checkpoint` function + # will first filters out all checkpoint variables in the default + # main program, and then try to load these variables form the + # folder "./checkpoints/checkpoint_9/__model__". + """ + + if checkpoint_dir is None: + raise ValueError("'checkpoint_dir' should not be None") + + serial = _get_latest_checkpoint_serial(checkpoint_dir) + + # there are nothing need to be loaded + if serial is None or serial < 0: + return + + if main_program is None: + raise ValueError('main_program should not be None.') + + if is_trainer and load_trainer_args is None: + cur_dir = _get_serial_dir(checkpoint_dir, serial) + _load_persist_vars_without_grad(executor, cur_dir, main_program, True) + return + + if is_trainer and load_trainer_args: + return _load_trainer_args(checkpoint_dir, serial, role_id, + load_trainer_args) + + if not is_trainer and load_lookup_table: + _load_lookup_table_vars(executor, checkpoint_dir, main_program, role_id, + load_lookup_table) + + +def clean_checkpoint(checkpoint_dir, delete_dir=False): + """ + clean the checkpoint dir, when the train exits normally, + the trainer will call clean_checkpoint to delete checkpoint directory saved before. + delete_dir only works when the directory is empty, otherwise, OSError is raised. + + : param checkpoint_dir + : param delete_dir + """ + + if checkpoint_dir is None: + raise ValueError("'checkpoint_dir' should not be None") + _scroll_delete(checkpoint_dir, max_num_checkpoints=0) + + if delete_dir and not os.listdir(checkpoint_dir): + os.rmdir(checkpoint_dir) + + +def _load_persist_vars_without_grad(executor, + dirname, + program, + has_model_dir=False): + """ + This function filters out all checkpoint variables from the give + program and then trys to load these variables from the given directory. + + A variable is a checkpoint variable if it meets all following + conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for loading variables. + dirname(str): The directory path. + program(Program): The program whose checkpoint variables will + be loaded. + has_model_dir(bool): if True, the function loads variables + from a sub directory named '__model__'. + Default: False + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + _load_persist_vars_without_grad(executor=exe, + dirname=param_path, program=prog, has_model_dir=True) + + # In this example, `_load_persist_vars_without_grad` function + # will first filters out all checkpoint variables in the default + # main program, and then trys to load these variables form the + # folder "./my_paddle_model/__model__". + """ + + if has_model_dir: + dirname = _get_model_dir(dirname) + + io.load_vars( + executor, + dirname=dirname, + main_program=program, + predicate=_is_checkpoint_var, + filename=None) + + +def _load_lookup_table_vars(executor, dirname, program, pserver_id, table_name): + """ + The parameter server will load lookup table's local file in + selectedrows variable. + + Args: + executor(Executor): The executor to run for loading persistable variables + dirname(str): The directory path + main_program(Program): Find the variable named table_name in main_program + pserver_id(int): the serial number in pserver_endpoints list + table_name(str): lookup table name + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + dirname = "./checkpoints/checkpoint_9/" + prog = fluid.default_main_program() + pserver_id = 1 + table_name = "share_w" + _load_lookup_table_vars(executor=exe, + dirname=dirname, program=prog, pserver_id=pserver_id, + table_name=table_name) + """ + + for var in program.list_vars(): + if var.name == table_name: + lookup_table_var = var + break + + assert lookup_table_var is not None + + lookup_table_dir = os.path.join(dirname, LOOKUP_TABLE_DIR) + table_file = table_name + CHECKPOINT_SEPARATOR + str(pserver_id) + + load_prog = framework.Program() + load_block = load_prog.global_block() + + load_block.append_op( + type='load', + inputs={}, + outputs={'Out': [lookup_table_var]}, + attrs={'file_path': os.path.join(lookup_table_dir, table_file)}) + + executor.run(load_prog) + + +def _save_persist_vars_without_grad(executor, dirname, program): + """ + This function filters out all checkpoint variables from the give + program and then save these variables to a sub-folder '__model__' of + the given directory. + + A variable is a checkpoint variable if it meets all following + conditions: + 1. It's persistable. + 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. + 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". + + Args: + executor(Executor): The executor to run for saving variables. + dirname(str): The directory path. + program(Program): The program whose checkpoint variables will + be saved. + + Returns: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + _save_persist_vars_without_grad(executor=exe, + dirname=param_path, program=prog) + + # In this example, `_save_persist_vars_without_grad` function + # will first filters out all checkpoint variables in the default + # main program, and then saves these variables to the folder + # "./my_paddle_model/__model__". + """ + cur_dir = _get_model_dir(dirname) + io.save_vars( + executor, + dirname=cur_dir, + main_program=program, + vars=None, + predicate=_is_checkpoint_var, + filename=None) + _write_success(cur_dir) + + +def _save_pserver_vars_by_notify(executor, dirname, lookup_table, + ps_endpoint_list): + """ + This function will send checkpoint notify message from Trainer 0 + to all the pservers. + The checkpoint notify message contains lookup table name, + the absolute path on pserver to save lookup_table. + + Args: + executor(Executor): The executor to run for send checkpoint notify. + dirname(str): The folder where to save checkpoints. + lookup_table(string): the lookup table name, when use distribute + lookup table, we can get lookup table name by DistributeTranspiler. + table_name + ps_endpoint_list(list): the parameter server ip:port list. + when use distribute lookup table, we can get ps_endpoint_list by + distribute arguments. + Return: + None + + Examples: + .. code-block:: python + + exe = fluid.Executor(fluid.CPUPlace()) + param_path = "./my_paddle_model" + prog = fluid.default_main_program() + table_name = "share_w" + ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] + + _save_pserver_vars_by_notify(executor=exe, + dirname=param_path, lookup_table=table_name, + ps_endpoint_list=ps_endpoints) + """ + cur_dir = _get_lookuptable_dir(dirname) + + checkpoint_notify_program = framework.Program() + checkpoint_notify_block = checkpoint_notify_program.global_block() + + attrs = {} + attrs['epmap'] = ps_endpoint_list + attrs['dir'] = cur_dir + attrs['lookup_table'] = lookup_table + + checkpoint_notify_block.append_op( + type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs) + executor.run(checkpoint_notify_program) + + +def _save_trainer_args(dirname, trainer_id, trainer_args): + assert isinstance(trainer_args, dict) + + cur_dir = _get_trainer_dir(dirname, trainer_id) + + for name, value in six.iteritems(trainer_args): + args_file = os.path.join(cur_dir, name) + with open(args_file, 'w') as f: + f.write(str(value)) + _write_success(cur_dir) + + +def _load_trainer_args(checkpoint_dir, serial, trainer_id, trainer_args): + """ + trainer will load some args from it's independent directory, + such as epoch_id and step_id. + + Args: + checkpoint_dir(str): The folder where all checkpoints are. + serial(int): The serial of checkpoint you would like to load. + trainer_id(int): current trainer id. + trainer_args(list): list about load trainer args + Return: + None + + Examples: + .. code-block:: python + + param_path = "./checkpoint/" + serial = 7 + trainer_id = 2 + trainer_args = ["epoch_id", "step_id"] + + _load_trainer_args(checkpoint_dir=param_path, serial=serial, + trainer_id=trainer_id, trainer_args=trainer_args) + """ + assert isinstance(trainer_args, list) + + cur_dir = _get_serial_dir(checkpoint_dir, serial) + cur_dir = _get_trainer_dir(cur_dir, trainer_id) + + ret_values = [] + + for arg in trainer_args: + cur_file = os.path.join(cur_dir, arg) + with open(cur_file, 'r') as f: + contents = f.read() + ret_values.append(contents.strip()) + return ret_values + + +def _is_checkpoint_var(var): + """ + the checkpoint will not save or load all the variables. + var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded. + + : param var(Variable) + """ + if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ + var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ + var.desc.type() == core.VarDesc.VarType.RAW: + return False + # @GRAD are named for gradient variables, checkpoint will not save it. + if "@GRAD" in var.name: + return False + # .trainer_ are named for distribute train variables, checkpoint will not save it. + if ".trainer_" in var.name: + return False + + # .block is named for distribute train variables, checkpoint will not save it. + if ".block" in var.name: + return False + + return var.persistable + + +def _make_chekcpoint_dirs(dirs): + """ + _make_chekcpoint_dirs will makdir local directory directly, when the directory is exist, it will igore it. + """ + assert dirs is not None + + if os.path.isfile(dirs): + raise OSError(errno.ENOTDIR, "dirs path shoule be a Directory.", dirs) + + if not os.path.isdir(dirs): + try: + os.makedirs(dirs) + except OSError as err: + if err.errno != errno.EEXIST: + raise err + + +def _get_dir_serial(dirname): + _, serial = dirname.split(CHECKPOINT_SEPARATOR) + + try: + serial_num = int(serial) + except ValueError: + serial_num = -1 + return serial_num + + +def _get_serial_dir(dirname, serial): + serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial) + serial_dir = os.path.join(dirname, serial_folder) + _make_chekcpoint_dirs(serial_dir) + + return serial_dir + + +def _get_model_dir(dirname): + model_dir = os.path.join(dirname, MODEL_DIR) + _make_chekcpoint_dirs(model_dir) + return model_dir + + +def _get_lookuptable_dir(dirname): + lookuptable_dir = os.path.join(dirname, LOOKUP_TABLE_DIR) + _make_chekcpoint_dirs(lookuptable_dir) + return lookuptable_dir + + +def _get_trainer_dir(dirname, trainer_id): + trainer_folder = TRAINER_PREFIX + CHECKPOINT_SEPARATOR + str(trainer_id) + trainer_dir = os.path.join(dirname, trainer_folder) + _make_chekcpoint_dirs(trainer_dir) + return trainer_dir + + +def _scroll_delete(dirname, max_num_checkpoints=3): + dirs = os.listdir(dirname) + serial_map = {} + for serial in dirs: + serial_num = _get_dir_serial(serial) + serial_map[serial_num] = serial + + if len(list(serial_map.keys())) <= max_num_checkpoints: + return + + serials = list(serial_map.keys()) + serials.sort(reverse=True) + serials = serials[max_num_checkpoints:] + for serial in serials: + cur_dir = _get_serial_dir(dirname, serial) + try: + shutil.rmtree(cur_dir) + except OSError as err: + if err.errno != errno.ENOENT: + raise err + + +def _write_success(dirname): + """ + write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct. + + : param dirname + """ + success_file = os.path.join(dirname, SUCCESS_MARK_FILENAME) + with open(success_file, 'a') as f: + now = time.ctime() + f.write(now) + + +def _get_latest_checkpoint_serial(checkpoint_dir): + """ + get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory + + : param checkpoint_dir + """ + if not checkpoint_dir: + return -1 + + def has_success(checkpoint_dir, cur_dir): + """ + is _SUCCESS in this dir + """ + + serial = _get_dir_serial(cur_dir) + if serial == -1 or not os.path.isdir( + os.path.join(checkpoint_dir, cur_dir)): + return -1 + + success_path = os.path.join( + _get_serial_dir(checkpoint_dir, serial), MODEL_DIR, + SUCCESS_MARK_FILENAME) + if os.path.isfile(success_path): + return serial + + if not os.path.isdir(checkpoint_dir): + return -1 + + current_dir = -1 + dirs = os.listdir(checkpoint_dir) + for cur_dir in dirs: + success_num = has_success(checkpoint_dir, cur_dir) + if success_num > current_dir: + current_dir = success_num + return current_dir diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 1d3c94229048ef568dfa651cc20731190beee3b8..bced5fd1d9c617ab614212c811e86422d65a2e56 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -18,6 +18,7 @@ import collections import contextlib import re import six +import traceback import numpy as np @@ -34,11 +35,12 @@ except ImportError as e: except Exception as e: raise e from . import unique_name +import os +PADDLE_ON_MODEL_CE = os.environ.get('PADDLE_ON_MODEL_CE', None) is not None __all__ = [ 'Program', 'Operator', - 'Parameter', 'default_startup_program', 'default_main_program', 'program_guard', @@ -490,7 +492,8 @@ class OpProtoHolder(object): return { core.op_proto_and_checker_maker.kOpRoleAttrName(), core.op_proto_and_checker_maker.kOpRoleVarAttrName(), - core.op_proto_and_checker_maker.kOpNameScopeAttrName() + core.op_proto_and_checker_maker.kOpNameScopeAttrName(), + core.op_proto_and_checker_maker.kOpCreationCallstackAttrName() } @@ -573,6 +576,11 @@ class Operator(object): if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0: del op_attrs[role_var_name] + if not PADDLE_ON_MODEL_CE: + callstack_var_name = op_maker.kOpCreationCallstackAttrName() + op_attrs[callstack_var_name] = list( + reversed(traceback.format_stack()))[1:] + if len(self.desc.type()) != 0: return if type is None: diff --git a/python/paddle/fluid/inferencer.py b/python/paddle/fluid/inferencer.py index a9b94a20720615dbfca97749463f27dbc88ac64f..7bdd430f985bd0b3818f6ef305ce2d7d8976106b 100644 --- a/python/paddle/fluid/inferencer.py +++ b/python/paddle/fluid/inferencer.py @@ -12,101 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function - -import contextlib - -from . import core - -from . import executor -from . import framework -from . import io -from . import parallel_executor -from . import unique_name -from .trainer import check_and_get_place - -__all__ = ['Inferencer', ] - - -class Inferencer(object): - """ - Inferencer High Level API. - - Args: - infer_func (Python func): Infer function that will return predict Variable - param_path (str): The path where the inference model is saved by fluid.io.save_params - place (Place): place to do the inference - parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU. - - Examples: - .. code-block:: python - - def inference_program(): - x = fluid.layers.data(name='x', shape=[13], dtype='float32') - y_predict = fluid.layers.fc(input=x, size=1, act=None) - return y_predict - - place = fluid.CPUPlace() - inferencer = fluid.Inferencer( - infer_func=inference_program, param_path="/tmp/model", place=place) - - """ - - def __init__(self, infer_func, param_path, place=None, parallel=False): - self.param_path = param_path - self.scope = core.Scope() - self.parallel = parallel - self.place = check_and_get_place(place) - - self.inference_program = framework.Program() - with framework.program_guard(self.inference_program): - with unique_name.guard(): - self.predict_var = infer_func() - - with self._prog_and_scope_guard(): - # load params from param_path into scope - io.load_params(executor.Executor(self.place), param_path) - - if parallel: - with self._prog_and_scope_guard(): - self.exe = parallel_executor.ParallelExecutor( - use_cuda=isinstance(self.place, core.CUDAPlace), - loss_name=self.predict_var.name) - else: - self.exe = executor.Executor(self.place) - - self.inference_program = self.inference_program.clone(for_test=True) - - def infer(self, inputs, return_numpy=True): - """ - Do Inference for Inputs - - Args: - inputs (map): a map of {"input_name": input_var} that will be feed into the inference program - return_numpy (bool): transform return value into numpy or not - - Returns: - Tensor or Numpy: the predict value of the inference model for the inputs - - Examples: - .. code-block:: python - - tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32") - results = inferencer.infer({'x': tensor_x}) - """ - if not isinstance(inputs, dict): - raise ValueError( - "inputs should be a map of {'input_name': input_var}") - - with self._prog_and_scope_guard(): - results = self.exe.run(feed=inputs, - fetch_list=[self.predict_var.name], - return_numpy=return_numpy) - - return results - - @contextlib.contextmanager - def _prog_and_scope_guard(self): - with framework.program_guard(main_program=self.inference_program): - with executor.scope_guard(self.scope): - yield +# NOTE: inferencer is moved into fluid.contrib.inferencer. +__all__ = [] diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 6e0f3de4141aff3efb98b6d1162823a13f83a7bb..2cb61a9cd25c744710ab7ac9ea591902740f78da 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -6471,12 +6471,14 @@ def _elementwise_op(helper): assert y is not None, 'y cannot be None in {}'.format(op_type) axis = helper.kwargs.get('axis', -1) use_mkldnn = helper.kwargs.get('use_mkldnn', False) - name = helper.kwargs.get('name', None) - if name is None: - out = helper.create_tmp_variable(dtype=x.dtype) - else: - out = helper.create_variable( - name=name, dtype=x.dtype, persistable=False) + out = helper.kwargs.get('out', None) + if out is None: + name = helper.kwargs.get('name', None) + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) helper.append_op( type=op_type, @@ -6489,7 +6491,13 @@ def _elementwise_op(helper): @templatedoc() -def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): +def scale(x, + scale=1.0, + bias=0.0, + bias_after_scale=True, + out=None, + act=None, + name=None): """ ${comment} @@ -6498,6 +6506,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): scale(${scale_type}): ${scale_comment} bias(${bias_type}): ${bias_comment} bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment} + out(Tensor): Output tensor. act(basestring|None): Activation applied to the output. name(basestring|None): Name of the output. @@ -6506,11 +6515,12 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): """ helper = LayerHelper('scale', **locals()) - if name is None: - out = helper.create_tmp_variable(dtype=x.dtype) - else: - out = helper.create_variable( - name=name, dtype=x.dtype, persistable=False) + if out is None: + if name is None: + out = helper.create_tmp_variable(dtype=x.dtype) + else: + out = helper.create_variable( + name=name, dtype=x.dtype, persistable=False) helper.append_op( type='scale', @@ -6524,31 +6534,73 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None): return helper.append_activation(out) -def elementwise_add(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_add(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_add', **locals())) -def elementwise_div(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_div(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_div', **locals())) -def elementwise_sub(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_sub(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_sub', **locals())) -def elementwise_mul(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_mul(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_mul', **locals())) -def elementwise_max(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_max(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_max', **locals())) -def elementwise_min(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_min(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_min', **locals())) -def elementwise_pow(x, y, axis=-1, use_mkldnn=False, act=None, name=None): +def elementwise_pow(x, + y, + out=None, + axis=-1, + use_mkldnn=False, + act=None, + name=None): return _elementwise_op(LayerHelper('elementwise_pow', **locals())) @@ -6560,6 +6612,7 @@ for func in [ func.__doc__ = _generate_doc_string_( op_proto, additional_args_lines=[ + "out (Tensor): The output tensor of elementwise op.", "act (basestring|None): Activation applied to the output.", "name (basestring|None): Name of the output." ]) diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 48d92c342d630582aebfae60b59d868f5893c422..7867bfe00e25711643eab1ab8d0141dbbad3da52 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -21,6 +21,7 @@ __activations_noattr__ = [ 'exp', 'tanh', 'tanh_shrink', + 'softshrink', 'sqrt', 'abs', 'ceil', @@ -52,7 +53,6 @@ __all__ = [ 'slice', 'shape', 'maxout', - 'softshrink', ] for _OP in set(__all__): diff --git a/python/paddle/fluid/nets.py b/python/paddle/fluid/nets.py index 051fe84364639ca6028326c0cb02b204a02531af..06513801dd8b34d366f9632f6943c8046872c31b 100644 --- a/python/paddle/fluid/nets.py +++ b/python/paddle/fluid/nets.py @@ -21,6 +21,7 @@ __all__ = [ "sequence_conv_pool", "glu", "scaled_dot_product_attention", + "img_conv_group", ] diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py index 44af29d3390e35129d0ee65b31eacad6b28a9d60..57d272cbfb948840679e80e8db40379c57603113 100644 --- a/python/paddle/fluid/parallel_executor.py +++ b/python/paddle/fluid/parallel_executor.py @@ -74,28 +74,7 @@ class ParallelExecutor(object): build_strategy=None, num_trainers=1, trainer_id=0, - scope=None, - **kwargs): - if len(kwargs) != 0: - err_msg = "" - for key in kwargs: - if key in dir(ExecutionStrategy): - err_msg += \ - "Setting {0} by constructor is deprecated. Use " \ - "strategy=ExecutionStrategy(); strategy.{0}=xxx; " \ - "pe=ParallelExecutor(exec_strategy=strategy) " \ - "instead.\n ".format(key) - elif key in dir(BuildStrategy): - err_msg += \ - "Setting {0} by constructor is deprecated. Use " \ - "strategy=BuildStrategy(); See help(" \ - "paddle.fluid.ParallelExecutor.BuildStrategy) \n".format( - key) - else: - err_msg += "Setting {0} by constructor is deprecated. Use strategy.\n".format( - key) - raise ValueError(err_msg) - + scope=None): self._places = [] self._act_places = [] if use_cuda: diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index f0be794327f51cbbc4202b8b7b401b712b6d66a3..a51607bfdb1dde3d25f490770cc2ba368ceb27ff 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -185,7 +185,17 @@ class WeightNormParamAttr(ParamAttr): Args: dim(list): The parameter's name. Default None. - kwargs: Any field in ParamAttr. Default None. + name(str): The parameter's name. Default None. + initializer(Initializer): The method to initial this parameter. Default None. + learning_rate(float): The parameter's learning rate. The learning rate when + optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`. + Default 1.0. + regularizer(WeightDecayRegularizer): Regularization factor. Default None. + trainable(bool): Whether this parameter is trainable. Default True. + gradient_clip(BaseGradientClipAttr): The method to clip this parameter's + gradient. Default None. + do_model_average(bool): Whether this parameter should do model average. + Default False. Examples: .. code-block:: python @@ -204,6 +214,21 @@ class WeightNormParamAttr(ParamAttr): # these paramters for inference. params_with_weight_norm = [] - def __init__(self, dim=None, **kwargs): - super(WeightNormParamAttr, self).__init__(**kwargs) + def __init__(self, + dim=None, + name=None, + initializer=None, + learning_rate=1.0, + regularizer=None, + trainable=True, + gradient_clip=None, + do_model_average=False): + super(WeightNormParamAttr, self).__init__( + name=name, + initializer=initializer, + learning_rate=learning_rate, + regularizer=regularizer, + trainable=trainable, + gradient_clip=gradient_clip, + do_model_average=do_model_average) self.dim = dim diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py index e1368a3392a9cab3e82eff0a73eb225a52aa03bf..87f3b7502e26d3e6a437985f99d7897b060e101e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py +++ b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import contextlib import numpy import unittest @@ -57,11 +67,11 @@ def optimizer_func(): def train(use_cuda, train_program, params_dirname, inference_model_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_func) def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): if event.step == 10: test_metrics = trainer.test( reader=test_reader, feed_order=['x', 'y']) @@ -91,7 +101,7 @@ def infer(use_cuda, inference_program, params_dirname=None): return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place) batch_size = 10 diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py index de276755bb1eb2746cc780575a40357255223809..d744a00242422defb360590b193e07c6f811dcb9 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py @@ -14,11 +14,22 @@ from __future__ import print_function +import sys + import paddle import paddle.fluid as fluid + +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.core as core import numpy -import six import os import cifar10_small_test_set @@ -106,7 +117,7 @@ def train(use_cuda, train_program, parallel, params_dirname): paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False) def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): avg_cost, accuracy = trainer.test( reader=test_reader, feed_order=['pixel', 'label']) @@ -118,7 +129,7 @@ def train(use_cuda, train_program, parallel, params_dirname): return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, optimizer_func=optimizer_func, place=place, @@ -133,7 +144,7 @@ def train(use_cuda, train_program, parallel, params_dirname): def infer(use_cuda, inference_program, parallel, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place, diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py index dd547f3448ae55c07b6c09f9de4ac08d8ec5ee88..82294d4b26fe64e6cddc81f9ba3480caf5b51620 100644 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py +++ b/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py @@ -14,11 +14,22 @@ from __future__ import print_function +import sys + import paddle import paddle.fluid as fluid + +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.core as core import numpy -import six import os import cifar10_small_test_set @@ -83,7 +94,7 @@ def train(use_cuda, train_program, parallel, params_dirname): paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False) def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): avg_cost, accuracy = trainer.test( reader=test_reader, feed_order=['pixel', 'label']) @@ -95,7 +106,7 @@ def train(use_cuda, train_program, parallel, params_dirname): return place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_func, @@ -110,7 +121,7 @@ def train(use_cuda, train_program, parallel, params_dirname): def infer(use_cuda, inference_program, parallel, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place, diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py index ec4e1c768c7f2a2421ac409a2eecc0100c086a6a..9e155a59145db88dab27576a4a67a5d450bcfc9d 100755 --- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import numpy as np WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict() @@ -149,7 +159,7 @@ def optimize_func(): def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimize_func) feed_order = [ @@ -164,7 +174,7 @@ def train(use_cuda, train_program, params_dirname): # place) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.conll05.test(), batch_size=BATCH_SIZE) avg_cost_set = trainer.test( @@ -184,7 +194,7 @@ def train(use_cuda, train_program, params_dirname): if math.isnan(float(avg_cost)): sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): + elif isinstance(event, EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, list(map(np.array, event.metrics)))) if event.step == 1: # Run 2 iterations to speed CI @@ -204,7 +214,7 @@ def train(use_cuda, train_program, params_dirname): def infer(use_cuda, inference_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( inference_program, param_path=params_dirname, place=place) # Setup input by creating LoDTensor to represent sequence of words. diff --git a/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py b/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py index 560f1189581f631dc6a3470cf8f22f902ca26f26..b597dcf801dc5ad4b5957875634018cfdcd0b83b 100644 --- a/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py +++ b/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py @@ -13,17 +13,28 @@ # limitations under the License. from __future__ import print_function + import contextlib +import sys import numpy as np import paddle import paddle.fluid as fluid + +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.framework as framework import paddle.fluid.layers as pd from paddle.fluid.executor import Executor from functools import partial import unittest -import os dict_size = 30000 source_dict_dim = target_dict_dim = dict_size @@ -198,12 +209,12 @@ def train(use_cuda, is_sparse, is_local=True): ] def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): print('pass_id=' + str(event.epoch) + ' batch=' + str(event.step)) if event.step == 10: trainer.stop() - trainer = fluid.Trainer( + trainer = Trainer( train_func=partial(train_program, is_sparse), place=place, optimizer_func=optimizer_func) diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py index 973308498bec3cddde2ef651751ad5d0c9f84503..ce183883e3bddd8633dd9c393ee358ba6210ea61 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py @@ -14,14 +14,22 @@ from __future__ import print_function -import argparse +import sys + import paddle.fluid as fluid + +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.core as core import paddle -import six -import sys import numpy -import unittest import math import sys import os @@ -68,14 +76,14 @@ def optimizer_func(): def train(use_cuda, train_program, parallel, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_func, parallel=parallel) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) avg_cost, acc = trainer.test( @@ -91,7 +99,7 @@ def train(use_cuda, train_program, parallel, params_dirname): event.epoch + 1, avg_cost, acc)) if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): + elif isinstance(event, EndStepEvent): print( ("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, @@ -112,7 +120,7 @@ def train(use_cuda, train_program, parallel, params_dirname): def infer(use_cuda, inference_program, parallel, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place, diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py index cb4aeb430e1a9662a183084c0cdacc41c5a8ec11..45a5ff34af00f2dbe69bd4f08a50626d6ca814f8 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py +++ b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py @@ -14,14 +14,22 @@ from __future__ import print_function -import argparse +import sys + import paddle.fluid as fluid + +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.core as core import paddle -import six -import sys import numpy -import unittest import math import sys import os @@ -55,14 +63,14 @@ def optimizer_func(): def train(use_cuda, train_program, params_dirname, parallel): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_func, parallel=parallel) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) avg_cost, acc = trainer.test( @@ -94,7 +102,7 @@ def train(use_cuda, train_program, params_dirname, parallel): def infer(use_cuda, inference_program, parallel, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place, diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py index 9e2767783bb6748cfc8f95567627068d7532a8c8..82193737967b2bebdd17cef8752eeb9cec2e85ce 100644 --- a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py +++ b/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py @@ -19,6 +19,16 @@ import sys import numpy as np import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import paddle.fluid.layers as layers import paddle.fluid.nets as nets @@ -164,7 +174,7 @@ def optimizer_func(): def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, place=place, optimizer_func=optimizer_func) feed_order = [ @@ -173,7 +183,7 @@ def train(use_cuda, train_program, params_dirname): ] def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): test_reader = paddle.batch( paddle.dataset.movielens.test(), batch_size=BATCH_SIZE) avg_cost_set = trainer.test( @@ -208,7 +218,7 @@ def train(use_cuda, train_program, params_dirname): def infer(use_cuda, inference_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( inference_program, param_path=params_dirname, place=place) # Use the first data from paddle.dataset.movielens.test() as input. diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py index 097c2a468fca558106aba2f24c332256189d9076..14719774b9d90c2e96d8f6134469502241a5f1f2 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * from functools import partial import numpy as np @@ -72,13 +82,13 @@ def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - trainer = fluid.Trainer( + trainer = Trainer( train_func=partial(train_program, word_dict), place=place, optimizer_func=optimizer_func) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) avg_cost, acc = trainer.test( @@ -96,7 +106,7 @@ def train(use_cuda, train_program, params_dirname): event.epoch + 1, avg_cost, acc)) if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): + elif isinstance(event, EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, list(map(np.array, event.metrics)))) if event.step == 1: # Run 2 iterations to speed CI @@ -119,7 +129,7 @@ def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=partial(inference_program, word_dict), param_path=params_dirname, place=place) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py index 5f74cd142590abb93f8846bc831a9f5e3dd2f311..62fbba6fe1a62da6a93d50abc074bf5d794cf458 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * from functools import partial import numpy as np @@ -87,13 +97,13 @@ def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - trainer = fluid.Trainer( + trainer = Trainer( train_func=partial(train_program, word_dict), place=place, optimizer_func=optimizer_func) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE) avg_cost, acc = trainer.test( @@ -111,7 +121,7 @@ def train(use_cuda, train_program, params_dirname): event.epoch + 1, avg_cost, acc)) if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): + elif isinstance(event, EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, list(map(np.array, event.metrics)))) if event.step == 1: # Run 2 iterations to speed CI @@ -134,7 +144,7 @@ def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=partial(inference_program, word_dict), param_path=params_dirname, place=place) diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py index 284a6ca168636377699c287236c491352566909b..7523ad3fef17f61b1bde1fc687761cc6b86c3d9e 100644 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py +++ b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * from functools import partial import numpy as np @@ -79,13 +89,13 @@ def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - trainer = fluid.Trainer( + trainer = Trainer( train_func=partial(train_program, word_dict), place=place, optimizer_func=optimizer_func) def event_handler(event): - if isinstance(event, fluid.EndEpochEvent): + if isinstance(event, EndEpochEvent): test_reader = paddle.batch( paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE, @@ -105,7 +115,7 @@ def train(use_cuda, train_program, params_dirname): event.epoch + 1, avg_cost, acc)) if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") - elif isinstance(event, fluid.EndStepEvent): + elif isinstance(event, EndStepEvent): print("Step {0}, Epoch {1} Metrics {2}".format( event.step, event.epoch, list(map(np.array, event.metrics)))) if event.step == 1: # Run 2 iterations to speed CI @@ -129,7 +139,7 @@ def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() word_dict = paddle.dataset.imdb.word_dict() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=partial(inference_program, word_dict), param_path=params_dirname, place=place) diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py index 1c7cf3199a07c3f65d967eda70a481b1bd1b1638..e4c0cc5429d3fe891034161d90fadfa9dd078b0b 100644 --- a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py +++ b/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py @@ -16,6 +16,16 @@ from __future__ import print_function import paddle import paddle.fluid as fluid +import sys +try: + from paddle.fluid.contrib.trainer import * + from paddle.fluid.contrib.inferencer import * +except ImportError: + print( + "In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib", + file=sys.stderr) + from paddle.fluid.trainer import * + from paddle.fluid.inferencer import * import numpy as np import math import sys @@ -95,7 +105,7 @@ def train(use_cuda, train_program, params_dirname): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() def event_handler(event): - if isinstance(event, fluid.EndStepEvent): + if isinstance(event, EndStepEvent): outs = trainer.test( reader=test_reader, feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw']) @@ -109,7 +119,7 @@ def train(use_cuda, train_program, params_dirname): if math.isnan(avg_cost): sys.exit("got NaN loss, training failed.") - trainer = fluid.Trainer( + trainer = Trainer( train_func=train_program, optimizer_func=optimizer_func, place=place) trainer.train( @@ -121,7 +131,7 @@ def train(use_cuda, train_program, params_dirname): def infer(use_cuda, inference_program, params_dirname=None): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - inferencer = fluid.Inferencer( + inferencer = Inferencer( infer_func=inference_program, param_path=params_dirname, place=place) # Setup inputs by creating 4 LoDTensors representing 4 words. Here each word diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index a198b25520f97ce23b9c1ebb9cd82fc458222d73..ecde407e6d85ea1bfc0181b4b60e095ea496fb1a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -659,5 +659,28 @@ class TestLoadSliceVar(TranspilerTest): pserver2._slice_vars_and_attrs[idx][2].shape)) +class TestNCCL2Transpile(TranspilerTest): + def test_nccl2_transpile(self): + if fluid.core.is_compiled_with_cuda(): #test nccl2 only with cuda + main = fluid.Program() + startup = fluid.Program() + with fluid.program_guard(main, startup): + self.net_conf() + + config = fluid.DistributeTranspilerConfig() + config.mode = "nccl2" + t = fluid.DistributeTranspiler(config=config) + t.transpile( + 0, + trainers="127.0.0.1:6174,127.0.0.1:6175", + current_endpoint="127.0.0.1:6174", + startup_program=startup) + print([op.type for op in startup.global_block().ops]) + self.assertEqual(startup.global_block().ops[-1].type, "gen_nccl_id") + self.assertIsNotNone(startup.global_block().vars.get("NCCLID")) + else: + pass + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 1fe7016924696b6e47d9cc35c137004f15a9b507..f474cdae2054531d44724e0e3e0e58a35fb8ddcd 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -758,6 +758,14 @@ class TestBook(unittest.TestCase): out = layers.expand(x, [1, 2]) print(str(program)) + def test_softshrink(self): + program = Program() + with program_guard(program): + input = layers.data(name="input", shape=[16], dtype="float32") + out = layers.softshrink(input, name='softshrink') + self.assertIsNotNone(out) + print(str(program)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py index 4153394c1da776d0a41e1415a09fa7d6f4b14d6d..37b9a9188ab44df81029ae6d9925ae21c1929cff 100644 --- a/python/paddle/fluid/tests/unittests/test_operator_desc.py +++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py @@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase): set(mul_op.attr_names), set([ "x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var", - "op_namescope" + "op_namescope", "op_callstack" ])) self.assertEqual(mul_op.has_attr("x_num_col_dims"), True) self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT) diff --git a/python/paddle/fluid/trainer.py b/python/paddle/fluid/trainer.py index 30cdfe4ad2c9892184862b70ff49417ce5a08516..b495b6699b5d02ca8c466c984820be5c497d626e 100644 --- a/python/paddle/fluid/trainer.py +++ b/python/paddle/fluid/trainer.py @@ -12,1247 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from __future__ import print_function - -import contextlib -import os -import errno -import shutil -import six -import time - -from . import core -from . import data_feeder -from . import executor -from . import framework -from . import io -# optimizer is same as the parameter of Trainer.__init__. Rename it to opt_module -from . import optimizer as opt_module -from . import parallel_executor -from .transpiler import distribute_transpiler - -__all__ = [ - 'Trainer', 'BeginEpochEvent', 'EndEpochEvent', 'BeginStepEvent', - 'EndStepEvent', 'CheckpointConfig' -] - - -class BeginEpochEvent(object): - """ - The begin of a training epoch. - - Args: - epoch_id(int): The current epoch ID. - """ - - def __init__(self, epoch_id): - self.epoch = epoch_id - - -class EndEpochEvent(object): - """ - The end of a training epoch. - - Args: - epoch_id(int): The current epoch ID. - """ - - def __init__(self, epoch_id): - self.epoch = epoch_id - - -class BeginStepEvent(object): - """ - The begin of a training epoch. - - Args: - epoch_id(int): The current epoch ID. - step_id(int): The current step ID. - """ - - def __init__(self, epoch_id, step_id): - self.epoch = epoch_id - self.step = step_id - self.fetch_metrics = True - """ - If fetch_metrics is true, the metrics will be fetched at the - EndStepEvent. Default is True. - """ - - -class EndStepEvent(object): - """ - The end of a training step. - - Args: - epoch_id(int): The current epoch ID. - step_id(int): The current step ID. - metrics(list): A list of fetched tensor. The order of this list is same - as the :code:`train_func` returns. - """ - - def __init__(self, epoch_id, step_id, metrics): - self.epoch = epoch_id - self.step = step_id - self.metrics = metrics - - -class CheckpointConfig(object): - """ - Parameter object for :code:`save_checkpoint` and - :code:`fluid.Trainer`. Used to configuration how to save checkpoint. - - Args: - checkpoint_dir(str): Directory path to save check point. Default is the - current directory. - - max_num_checkpoints(int): The max number of local check points. - epoch_interval(int): Every number of epoch to save check point. - step_interval(int): Every number of step to save check point. - - Examples: - >>> config = fluid.CheckpointConfig("./checkpoints") - >>> trainer = fluid.Trainer(train_func=train_program, - >>> place=place, - >>> optimizer_func=optimizer_func, - >>> checkpoint_config=config) - >>> trainer.train(...) - """ - - def __init__(self, - checkpoint_dir=None, - max_num_checkpoints=3, - epoch_interval=1, - step_interval=10): - - assert epoch_interval >= 1 - assert step_interval >= 1 - - self.checkpoint_dir = checkpoint_dir \ - if checkpoint_dir is not None else os.getcwd() - self.max_num_checkpoints = max_num_checkpoints - self.epoch_interval = epoch_interval - self.step_interval = step_interval - self.epoch_id = 0 - self.step_id = 0 - self.load_serial = None - self.pserver_id = None - self.lookup_table_name = None - - -def check_and_get_place(place): - """ - Check the type of place or get the default place - Args: - place(None|core.CUDAPlace|core.CPUPlace): the place that trainer will be executed on. - - Raises: - TypeError if the type mismatched. - - Returns: - the original place if it is not None. - if fluid is compiled with CUDA, returns CUDAPlace(0) by default. - Otherwise returns CPUPlace by default. - """ - if place is None: - if core.is_compiled_with_cuda(): - return core.CUDAPlace(0) - else: - return core.CPUPlace() - else: - if not isinstance(place, core.CUDAPlace) and not isinstance( - place, core.CPUPlace): - raise TypeError("Place should be either CUDAPlace or CPUPlace") - return place - - -class Trainer(object): - """ - A trainer wraps MultiGPU/MultiNode training loops and can be used to train a - simple neural network easily. - - This API takes a :code:`train_func`. A :code:`train_func` is a function that - return loss as it first return value. The reset value can be fetched by - EndStepEvent.metrics - - This API also takes a :code:`optimizer_func` that will return an optimizer - instance. - - For example, to train a MLP for MNIST dataset, the sample program is - - >>> import paddle.fluid as fluid - >>> - >>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10): - >>> hidden = image - >>> for layer_size in layer_sizes: - >>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation) - >>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax") - >>> - >>> def train_mnist_mlp(): - >>> img = fluid.layers.data(name='image', shape=[784]) - >>> label = fluid.layers.data(name='label', shape=[1], dtype='int64') - >>> prediction = mlp(img) - >>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label)) - >>> - >>> def optimizer(): - >>> return fluid.optimizer.Adam() - >>> - >>> trainer = Trainer(train_func=train_mnist_mlp, - >>> optimizer_func=optimizer, - >>> place=fluid.CUDAPlace(0), - >>> parallel=True) - >>> - >>> def train_callback(event): - >>> if isinstance(event, fluid.EndStepEvent): - >>> print "Epoch ID", event.epoch, "Step ID",\ - >>> event.step, "AvgLoss", event.metrics[0] - >>> elif isinstance(event, fluid.EndEpochEvent): - >>> trainer.save_params("./model_{0}".format(event.epoch)) - >>> - >>> trainer.train(num_epochs=100, event_handler=train_callback) - - For more example, please see :ref:`api_guide_high_level_api`. - - - Args: - train_func(callable): A function which will return loss. The loss must be - a scalar tensor. - optimizer_func(callable): A function that returns an Optimizer object. - place(CUDAPlace|CPUPlace): The device place of this trainer. If - :code:`parallel=True,` all CUDA Places will be used if :code:`place` - is a :code:`CUDAPlace`. - parallel(bool): True if use multiple devices. - checkpoint_config(CheckpointConfig): Configuration about how to save - checkpoints. - """ - - def __init__(self, - train_func, - optimizer_func, - param_path=None, - place=None, - parallel=False, - checkpoint_config=None): - self.__stop = False - self.parallel = parallel - - # config for checkpoint - # only chief worker will save variables - self.trainer_id = 0 - self.checkpoint_cfg = checkpoint_config - if self.checkpoint_cfg: - assert isinstance(self.checkpoint_cfg, CheckpointConfig) - serial = _get_latest_checkpoint_serial( - self.checkpoint_cfg.checkpoint_dir) - self.checkpoint_cfg.load_serial = serial if serial >= 0 else None - - self.scope = core.Scope() - - # 1. we need to generate a framework.Program by calling - # program_func. Reference: fluid.program_guard in - # test_word2vec.py - - self.startup_program = framework.Program() - self.train_program = framework.Program() - - with framework.program_guard(self.train_program, self.startup_program): - program_func_outs = train_func() - self.train_func_outputs = program_func_outs if isinstance( - program_func_outs, list) else [program_func_outs] - self.test_program = self.train_program.clone(for_test=True) - - # The first element of program_func_outs is loss. - loss = self.train_func_outputs[0] - - optimizer = optimizer_func() - if not isinstance(optimizer, opt_module.Optimizer): - raise TypeError( - "The optimizer should be an instance of Optimizer") - optimize_ops, params_grads = optimizer.minimize(loss) - - self.place = check_and_get_place(place) - - self._dist_transpile_if_necessary(optimize_ops, params_grads) - - # 2. move the default_main_program to self.program and run the - # default_startup program on an empty core.Scope() - # Run startup program - with self._prog_and_scope_guard(): - exe = executor.Executor(place) - exe.run(self.startup_program) - - if self.checkpoint_cfg and self.checkpoint_cfg.load_serial is not None: - self._load_checkpoint() - - if param_path and os.path.isdir(param_path): - with self._prog_and_scope_guard(): - # load params from param_path into scope - io.load_persistables( - executor=exe, - dirname=param_path, - main_program=self.startup_program) - - def _transpile_nccl2_dist(self): - # PADDLE_TRAINER_IPS - if "PADDLE_TRAINER_IPS" not in os.environ: - self.nccl_id_var = None - else: - self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID")) - port = os.getenv("PADDLE_PSERVER_PORT") - worker_ips = os.getenv("PADDLE_TRAINER_IPS") - worker_endpoints = [] - for ip in worker_ips.split(","): - worker_endpoints.append(':'.join([ip, port])) - self.num_trainers = len(worker_endpoints) - current_endpoint = os.getenv("PADDLE_CURRENT_IP") + ":" + port - worker_endpoints.remove(current_endpoint) - # TODO(wuyi): use self.nccl_id_var, self.num_trainers and self.trainer_id - # in ParallelExecutor to start - # distributed training using NCCL2 - self.nccl_id_var = self.startup_program.global_block().create_var( - name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW) - self.startup_program.global_block().append_op( - type="gen_nccl_id", - inputs={}, - outputs={"NCCLID": self.nccl_id_var}, - attrs={ - "endpoint": current_endpoint, - "endpoint_list": worker_endpoints, - "trainer_id": self.trainer_id - }) - - def _dist_transpile_if_necessary(self, optimize_ops, params_grads): - self._transpile_nccl2_dist() - if self.nccl_id_var != None: - return - - if "PADDLE_TRAINING_ROLE" not in os.environ: - return - - # the port of all pservers, needed by both trainer and pserver - port = os.getenv("PADDLE_PSERVER_PORT", "6174") - # comma separated ips of all pservers, needed by trainer and - # pserver - pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "") - eplist = [] - for ip in pserver_ips.split(","): - eplist.append(':'.join([ip, port])) - pserver_endpoints = ",".join(eplist) - # total number of workers/trainers in the job, needed by - # trainer and pserver - trainers = int(os.getenv("PADDLE_TRAINERS")) - # the IP of the local machine, needed by pserver only - current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port - # the unique trainer id, starting from 0, needed by trainer - # only - self.trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) - - # the role, should be either PSERVER or TRAINER - training_role = os.getenv("PADDLE_TRAINING_ROLE") - with self._prog_and_scope_guard(): - t = distribute_transpiler.DistributeTranspiler() - t.transpile( - self.trainer_id, pservers=pserver_endpoints, trainers=trainers) - if training_role == "PSERVER": - if self.checkpoint_cfg: - pserver_id = eplist.index(current_endpoint) - self.checkpoint_cfg.pserver_id = pserver_id - if t.has_distributed_lookup_table: - self.checkpoint_cfg.lookup_table_name = t.table_name - - self.train_program = t.get_pserver_program(current_endpoint) - self.startup_program = t.get_startup_program(current_endpoint, - self.train_program) - elif training_role == "TRAINER": - self.train_program = t.get_trainer_program() - else: - raise ValueError( - 'TRAINING_ROLE environment variable must be either TRAINER or PSERVER' - ) - - def stop(self): - """ - stop training - """ - self.__stop = True - - def train(self, num_epochs, event_handler, reader=None, feed_order=None): - """ - Start the train loop to train the model. - - Args: - num_epochs(int): The number of epoch. An epoch will process all data in reader - event_handler(callable): The event handler. A function with type (ev:Event)->void - reader(callable): A reader creator object. See also - :ref:`api_guide_python_reader` . - feed_order(list): Feeding order of reader. None will following the defining - order in program - - Returns: - None - """ - training_role = os.getenv("PADDLE_TRAINING_ROLE", "") - if training_role == "PSERVER": - with self._prog_and_scope_guard(): - exe = executor.Executor(self.place) - exe.run() - return - if self.parallel: - self._train_by_parallel_executor(num_epochs, event_handler, reader, - feed_order) - else: - self._train_by_executor(num_epochs, event_handler, reader, - feed_order) - - def test(self, reader, feed_order): - """ - Test the model on given test data - - Args: - reader(callable): The reader that yields test data. - feed_order(list): Feeding order of reader. None will following the - defining order in program - """ - - return self._test_by_executor(reader, feed_order, - self.train_func_outputs) - - def save_params(self, param_path): - """ - Save all parameters into :code:`param_path`. - - Args: - param_path(str): The path to save parameters. - - Returns: - None - """ - with self._prog_and_scope_guard(): - exe = executor.Executor(self.place) - io.save_persistables(exe, dirname=param_path) - - def save_inference_model(self, param_path, feeded_var_names, - target_var_indexes): - """ - Save model for cpp inference into :code:`param_path`. - - Args: - param_path(str): The path to save parameters. - feeded_var_names(list(str)): The name of the vars that you - need to feed in before run program. - target_var_indexes(list(int)): the index of target var that - you need to return in trainer.train_func. - Returns: - None - """ - with self._prog_and_scope_guard(): - exe = executor.Executor(self.place) - target_vars = [ - self.train_func_outputs[index] for index in target_var_indexes - ] - io.save_inference_model(param_path, feeded_var_names, target_vars, - exe) - - @contextlib.contextmanager - def _prog_and_scope_guard(self): - with framework.program_guard( - main_program=self.train_program, - startup_program=self.startup_program): - with executor.scope_guard(self.scope): - yield - - def _train_by_executor(self, num_epochs, event_handler, reader, feed_order): - """ - Train by Executor and single device. - - Args: - num_epochs: - event_handler: - reader: - feed_order: - - Returns: - - """ - with self._prog_and_scope_guard(): - feed_var_list = build_feed_var_list(self.train_program, feed_order) - feeder = data_feeder.DataFeeder( - feed_list=feed_var_list, place=self.place) - exe = executor.Executor(self.place) - reader = feeder.decorate_reader(reader, multi_devices=False) - self._train_by_any_executor(event_handler, exe, num_epochs, reader) - - def _train_by_any_executor(self, event_handler, exe, num_epochs, reader): - if self.checkpoint_cfg: - epochs = [ - epoch_id for epoch_id in range(num_epochs) - if epoch_id >= self.checkpoint_cfg.epoch_id - ] - else: - epochs = [epoch_id for epoch_id in range(num_epochs)] - - for epoch_id in epochs: - event_handler(BeginEpochEvent(epoch_id)) - for step_id, data in enumerate(reader()): - if self.__stop: - if self.checkpoint_cfg: - self._clean_checkpoint() - return - - if self.checkpoint_cfg and self.checkpoint_cfg.load_serial \ - and self.checkpoint_cfg.step_id >= step_id and self.checkpoint_cfg.epoch_id == epoch_id: - continue - - begin_event = BeginStepEvent(epoch_id, step_id) - event_handler(begin_event) - if begin_event.fetch_metrics: - metrics = exe.run(feed=data, - fetch_list=[ - var.name - for var in self.train_func_outputs - ]) - else: - metrics = exe.run(feed=data, fetch_list=[]) - - if self.checkpoint_cfg: - self._save_checkpoint(epoch_id, step_id) - event_handler(EndStepEvent(epoch_id, step_id, metrics)) - event_handler(EndEpochEvent(epoch_id)) - if self.checkpoint_cfg: - self._clean_checkpoint() - - def _test_by_executor(self, reader, feed_order, fetch_list): - with executor.scope_guard(self.scope): - feed_var_list = build_feed_var_list(self.test_program, feed_order) - feeder = data_feeder.DataFeeder( - feed_list=feed_var_list, place=self.place) - exe = executor.Executor(self.place) - accumulated = len(fetch_list) * [0] - count = 0 - for data in reader(): - outs = exe.run(program=self.test_program, - feed=feeder.feed(data), - fetch_list=fetch_list) - accumulated = [x[0] + x[1][0] for x in zip(accumulated, outs)] - count += 1 - - return [x / count for x in accumulated] - - def _train_by_parallel_executor(self, num_epochs, event_handler, reader, - feed_order): - with self._prog_and_scope_guard(): - pe = self._get_or_create_parallel_executor() - feed_var_list = build_feed_var_list(self.train_program, feed_order) - feeder = data_feeder.DataFeeder( - feed_list=feed_var_list, place=self.place) - reader = feeder.decorate_reader(reader, multi_devices=True) - self._train_by_any_executor(event_handler, pe, num_epochs, reader) - - def _get_parallel_executor(self): - return getattr(self, 'parallel_executor', None) - - def _get_or_create_parallel_executor(self): - if self._get_parallel_executor() is None: - self.parallel_executor = parallel_executor.ParallelExecutor( - use_cuda=isinstance(self.place, core.CUDAPlace), - loss_name=self.train_func_outputs[0].name) - return self._get_parallel_executor() - - def _clean_checkpoint(self): - assert self.checkpoint_cfg - clean_checkpoint(checkpoint_dir=self.checkpoint_cfg.checkpoint_dir) - - def _get_checkpoint_load_args(self): - """ - epoch_id and step_id are runtime arguments, they are not variables, will load them independently. - """ - return ["epoch_id", "step_id"] - - def _get_checkpoint_save_args(self, epoch_id, step_id): - """ - epoch_id and step_id are runtime arguments, they are not variables, will save them independently. - """ - trainer_args = {} - trainer_args["epoch_id"] = epoch_id - trainer_args["step_id"] = step_id - return trainer_args - - def _save_checkpoint(self, epoch_id, step_id): - assert self.checkpoint_cfg - - if epoch_id % self.checkpoint_cfg.epoch_interval == 0 \ - and step_id % self.checkpoint_cfg.step_interval == 0: - exe = executor.Executor(self.place) - save_checkpoint( - executor=exe, - checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, - trainer_id=self.trainer_id, - trainer_args=self._get_checkpoint_save_args(epoch_id, step_id), - main_program=self.train_program, - max_num_checkpoints=self.checkpoint_cfg.max_num_checkpoints) - - def _load_checkpoint(self): - with self._prog_and_scope_guard(): - exe = executor.Executor(self.place) - load_checkpoint( - executor=exe, - checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, - main_program=self.startup_program) - - if not self.checkpoint_cfg.pserver_id: - load_trainer_args = self._get_checkpoint_load_args() - trainer_args = load_checkpoint( - executor=exe, - checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, - main_program=self.startup_program, - role_id=self.trainer_id, - is_trainer=True, - load_trainer_args=load_trainer_args) - - if len(trainer_args) != 2: - raise ValueError( - "the return trainer_args length do not equal _get_checkpoint_load_args" - ) - self.checkpoint_cfg.epoch_id = int(trainer_args[0]) - self.checkpoint_cfg.step_id = int(trainer_args[1]) - else: - if self.checkpoint_cfg.lookup_table_name: - load_checkpoint( - executor=exe, - checkpoint_dir=self.checkpoint_cfg.checkpoint_dir, - main_program=self.startup_program, - role_id=self.checkpoint_cfg.pserver_id, - is_trainer=False, - load_trainer_args=None, - load_lookup_table=self.checkpoint_cfg.lookup_table_name) - - -def build_feed_var_list(program, feed_order): - if not isinstance(program, framework.Program): - raise TypeError("The 'program' should be an object of Program") - - if isinstance(feed_order, list): - feed_var_list = [ - program.global_block().var(var_name) for var_name in feed_order - ] - else: - if not isinstance(feed_order, dict): - raise TypeError( - "The 'feed_order' should be either None, list or dict.") - if not sorted(feed_order.values()) == list(range(len(feed_order))): - raise ValueError( - "The values of 'feed_order' should be a permutation of [0, len(feed_order))" - ) - sorted_pair_list = sorted( - six.iteritems(feed_order), key=lambda item: item[1]) - feed_var_list = [ - program.global_block().var(pair[0]) for pair in sorted_pair_list - ] - return feed_var_list - - -# move Checkpoint APIs from io.py to trainer.py, make all of them are private. -SUCCESS_MARK_FILENAME = "_SUCCESS" -CHECKPOINT_PREFIX = "checkpoint" -MODEL_DIR = "__model__" -LOOKUP_TABLE_DIR = "__lookup_table__" -TRAINER_PREFIX = "trainer" -CHECKPOINT_SEPARATOR = "_" - - -def save_checkpoint(executor, - checkpoint_dir, - trainer_id, - main_program, - trainer_args=None, - max_num_checkpoints=3, - lookup_table=None, - pserver_endpoints=None): - """ - This function filters out all checkpoint variables from the give - main_program and then saves these variables to the `checkpoint_dir` - directory. - - In the training precess, we generally save a checkpoint in each - iteration. So there might be a lot of checkpoints in the - `checkpoint_dir`. To avoid them taking too much disk space, the - `max_num_checkpoints` are introduced to limit the total number of - checkpoints. If the number of existing checkpints is greater than - the `max_num_checkpoints`, oldest ones will be scroll deleted. - - A variable is a checkpoint variable and will be saved if it meets - all following conditions: - 1. It's persistable. - 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. - 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - - Args: - executor(Executor): The executor to run for save checkpoint. - checkpoint_dir(str): The folder where to save checkpoints. - trainer_id(int): currect trainer id, if id is equal to 0, the trainer - is chief. - trainer_args(dict|None): Current training arguments. Such as 'epoch_id' - and 'step_id'. - Defaut: None - main_program(Program): The program whose checkpoint variables will - be saved. - max_num_checkpoints(int): The max number of total number of existing - checkpoints. - Default: 3 - lookup_table(string|None): the lookup table name, when use distribute - lookup table, we can get lookup table name by DistributeTranspiler. - table_name - pserver_endpoints(list|None): the parameter server ip:port list. - when use distribute lookup table, we can get pserver_endpoints by - distribute arguments. - - Returns: - None - - Raises: - ValueError: If `checkpoint_dir` is None. - AssertionError: If `trainer_args` is not a dict. - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - path = "./checkpoints" - prog = fluid.default_main_program() - trainer_args = {"epoch_id": 200, - "step_id": 20} # just an example - table_name = "share_w" - ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] - - save_checkpoint(executor=exe, - checkpoint_dir=path, - trainer_id=0, - trainer_args=trainer_args, - main_program=prog, - max_num_checkpoints=3, - lookup_table=table_name, - pserver_endpoints = ps_endpoints) - """ - if checkpoint_dir is None: - raise ValueError("'checkpoint_dir' should not be None") - - if main_program is None: - raise ValueError('main_program should not be None.') - - if trainer_args: - assert isinstance(trainer_args, dict) - - is_chief = trainer_id == 0 - - _make_chekcpoint_dirs(checkpoint_dir) - serial = _get_latest_checkpoint_serial(checkpoint_dir) + 1 - cur_dir = _get_serial_dir(checkpoint_dir, serial) - - _save_trainer_args(cur_dir, trainer_id, trainer_args) - - if is_chief: - _save_persist_vars_without_grad(executor, cur_dir, main_program) - - if is_chief and lookup_table and pserver_endpoints: - _save_pserver_vars_by_notify(executor, cur_dir, lookup_table, - pserver_endpoints) - - _scroll_delete(checkpoint_dir, max_num_checkpoints) - - -def load_checkpoint(executor, - checkpoint_dir, - main_program, - role_id=0, - is_trainer=True, - load_trainer_args=None, - load_lookup_table=None): - """ - This function filters out all checkpoint variables from the give - main_program and then try to load these variables from the - `checkpoint_dir` directory. - - In the training precess, we generally save a checkpoint in each - iteration. So there are more than one checkpoint in the - `checkpoint_dir` (each checkpoint has its own sub folder), use - `serial` to specify which serial of checkpoint you would like to - load. - - A variable is a checkpoint variable and will be loaded if it meets - all following conditions: - 1. It's persistable. - 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. - 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - - Args: - executor(Executor): The executor to run for loading checkpoint. - checkpoint_dir(str): The folder where all checkpoints are. - serial(int): The serial of checkpoint you would like to load. - main_program(Program): The program whose checkpoint variables will - be loaded. - role_id(int): the trainer id or the parameter server id. - is_trainer(bool): trainer is True and parameter server is False. - load_trainer_args(list|None): list about load trainer args. - load_lookup_table(str|None): the lookup table name - - Returns: - None - - Raises: - ValueError: If `checkpoint_dir` is None. - ValueError: If `main_program` is None. - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - path = "./checkpoints" - prog = fluid.default_main_program() - load_checkpoint(executor=exe, checkpoint_dir=path, - serial=9, main_program=prog) - - # In this example, `load_checkpoint` function - # will first filters out all checkpoint variables in the default - # main program, and then try to load these variables form the - # folder "./checkpoints/checkpoint_9/__model__". - """ - - if checkpoint_dir is None: - raise ValueError("'checkpoint_dir' should not be None") - - serial = _get_latest_checkpoint_serial(checkpoint_dir) - - # there are nothing need to be loaded - if serial is None or serial < 0: - return - - if main_program is None: - raise ValueError('main_program should not be None.') - - if is_trainer and load_trainer_args is None: - cur_dir = _get_serial_dir(checkpoint_dir, serial) - _load_persist_vars_without_grad(executor, cur_dir, main_program, True) - return - - if is_trainer and load_trainer_args: - return _load_trainer_args(checkpoint_dir, serial, role_id, - load_trainer_args) - - if not is_trainer and load_lookup_table: - _load_lookup_table_vars(executor, checkpoint_dir, main_program, role_id, - load_lookup_table) - - -def clean_checkpoint(checkpoint_dir, delete_dir=False): - """ - clean the checkpoint dir, when the train exits normally, - the trainer will call clean_checkpoint to delete checkpoint directory saved before. - delete_dir only works when the directory is empty, otherwise, OSError is raised. - - : param checkpoint_dir - : param delete_dir - """ - - if checkpoint_dir is None: - raise ValueError("'checkpoint_dir' should not be None") - _scroll_delete(checkpoint_dir, max_num_checkpoints=0) - - if delete_dir and not os.listdir(checkpoint_dir): - os.rmdir(checkpoint_dir) - - -def _load_persist_vars_without_grad(executor, - dirname, - program, - has_model_dir=False): - """ - This function filters out all checkpoint variables from the give - program and then trys to load these variables from the given directory. - - A variable is a checkpoint variable if it meets all following - conditions: - 1. It's persistable. - 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. - 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - - Args: - executor(Executor): The executor to run for loading variables. - dirname(str): The directory path. - program(Program): The program whose checkpoint variables will - be loaded. - has_model_dir(bool): if True, the function loads variables - from a sub directory named '__model__'. - Default: False - - Returns: - None - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - param_path = "./my_paddle_model" - prog = fluid.default_main_program() - _load_persist_vars_without_grad(executor=exe, - dirname=param_path, program=prog, has_model_dir=True) - - # In this example, `_load_persist_vars_without_grad` function - # will first filters out all checkpoint variables in the default - # main program, and then trys to load these variables form the - # folder "./my_paddle_model/__model__". - """ - - if has_model_dir: - dirname = _get_model_dir(dirname) - - io.load_vars( - executor, - dirname=dirname, - main_program=program, - predicate=_is_checkpoint_var, - filename=None) - - -def _load_lookup_table_vars(executor, dirname, program, pserver_id, table_name): - """ - The parameter server will load lookup table's local file in - selectedrows variable. - - Args: - executor(Executor): The executor to run for loading persistable variables - dirname(str): The directory path - main_program(Program): Find the variable named table_name in main_program - pserver_id(int): the serial number in pserver_endpoints list - table_name(str): lookup table name - - Returns: - None - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - dirname = "./checkpoints/checkpoint_9/" - prog = fluid.default_main_program() - pserver_id = 1 - table_name = "share_w" - _load_lookup_table_vars(executor=exe, - dirname=dirname, program=prog, pserver_id=pserver_id, - table_name=table_name) - """ - - for var in program.list_vars(): - if var.name == table_name: - lookup_table_var = var - break - - assert lookup_table_var is not None - - lookup_table_dir = os.path.join(dirname, LOOKUP_TABLE_DIR) - table_file = table_name + CHECKPOINT_SEPARATOR + str(pserver_id) - - load_prog = framework.Program() - load_block = load_prog.global_block() - - load_block.append_op( - type='load', - inputs={}, - outputs={'Out': [lookup_table_var]}, - attrs={'file_path': os.path.join(lookup_table_dir, table_file)}) - - executor.run(load_prog) - - -def _save_persist_vars_without_grad(executor, dirname, program): - """ - This function filters out all checkpoint variables from the give - program and then save these variables to a sub-folder '__model__' of - the given directory. - - A variable is a checkpoint variable if it meets all following - conditions: - 1. It's persistable. - 2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW. - 3. It's name contains no "@GRAD" nor ".trainer_" nor ".block". - - Args: - executor(Executor): The executor to run for saving variables. - dirname(str): The directory path. - program(Program): The program whose checkpoint variables will - be saved. - - Returns: - None - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - param_path = "./my_paddle_model" - prog = fluid.default_main_program() - _save_persist_vars_without_grad(executor=exe, - dirname=param_path, program=prog) - - # In this example, `_save_persist_vars_without_grad` function - # will first filters out all checkpoint variables in the default - # main program, and then saves these variables to the folder - # "./my_paddle_model/__model__". - """ - cur_dir = _get_model_dir(dirname) - io.save_vars( - executor, - dirname=cur_dir, - main_program=program, - vars=None, - predicate=_is_checkpoint_var, - filename=None) - _write_success(cur_dir) - - -def _save_pserver_vars_by_notify(executor, dirname, lookup_table, - ps_endpoint_list): - """ - This function will send checkpoint notify message from Trainer 0 - to all the pservers. - The checkpoint notify message contains lookup table name, - the absolute path on pserver to save lookup_table. - - Args: - executor(Executor): The executor to run for send checkpoint notify. - dirname(str): The folder where to save checkpoints. - lookup_table(string): the lookup table name, when use distribute - lookup table, we can get lookup table name by DistributeTranspiler. - table_name - ps_endpoint_list(list): the parameter server ip:port list. - when use distribute lookup table, we can get ps_endpoint_list by - distribute arguments. - Return: - None - - Examples: - .. code-block:: python - - exe = fluid.Executor(fluid.CPUPlace()) - param_path = "./my_paddle_model" - prog = fluid.default_main_program() - table_name = "share_w" - ps_endpoints = ["127.0.0.1:6000","127.0.0.1:6001"] - - _save_pserver_vars_by_notify(executor=exe, - dirname=param_path, lookup_table=table_name, - ps_endpoint_list=ps_endpoints) - """ - cur_dir = _get_lookuptable_dir(dirname) - - checkpoint_notify_program = framework.Program() - checkpoint_notify_block = checkpoint_notify_program.global_block() - - attrs = {} - attrs['epmap'] = ps_endpoint_list - attrs['dir'] = cur_dir - attrs['lookup_table'] = lookup_table - - checkpoint_notify_block.append_op( - type='checkpoint_notify', inputs={}, outputs={}, attrs=attrs) - executor.run(checkpoint_notify_program) - - -def _save_trainer_args(dirname, trainer_id, trainer_args): - assert isinstance(trainer_args, dict) - - cur_dir = _get_trainer_dir(dirname, trainer_id) - - for name, value in six.iteritems(trainer_args): - args_file = os.path.join(cur_dir, name) - with open(args_file, 'w') as f: - f.write(str(value)) - _write_success(cur_dir) - - -def _load_trainer_args(checkpoint_dir, serial, trainer_id, trainer_args): - """ - trainer will load some args from it's independent directory, - such as epoch_id and step_id. - - Args: - checkpoint_dir(str): The folder where all checkpoints are. - serial(int): The serial of checkpoint you would like to load. - trainer_id(int): current trainer id. - trainer_args(list): list about load trainer args - Return: - None - - Examples: - .. code-block:: python - - param_path = "./checkpoint/" - serial = 7 - trainer_id = 2 - trainer_args = ["epoch_id", "step_id"] - - _load_trainer_args(checkpoint_dir=param_path, serial=serial, - trainer_id=trainer_id, trainer_args=trainer_args) - """ - assert isinstance(trainer_args, list) - - cur_dir = _get_serial_dir(checkpoint_dir, serial) - cur_dir = _get_trainer_dir(cur_dir, trainer_id) - - ret_values = [] - - for arg in trainer_args: - cur_file = os.path.join(cur_dir, arg) - with open(cur_file, 'r') as f: - contents = f.read() - ret_values.append(contents.strip()) - return ret_values - - -def _is_checkpoint_var(var): - """ - the checkpoint will not save or load all the variables. - var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded. - - : param var(Variable) - """ - if var.desc.type() == core.VarDesc.VarType.FEED_MINIBATCH or \ - var.desc.type() == core.VarDesc.VarType.FETCH_LIST or \ - var.desc.type() == core.VarDesc.VarType.RAW: - return False - # @GRAD are named for gradient variables, checkpoint will not save it. - if "@GRAD" in var.name: - return False - # .trainer_ are named for distribute train variables, checkpoint will not save it. - if ".trainer_" in var.name: - return False - - # .block is named for distribute train variables, checkpoint will not save it. - if ".block" in var.name: - return False - - return var.persistable - - -def _make_chekcpoint_dirs(dirs): - """ - _make_chekcpoint_dirs will makdir local directory directly, when the directory is exist, it will igore it. - """ - assert dirs is not None - - if os.path.isfile(dirs): - raise OSError(errno.ENOTDIR, "dirs path shoule be a Directory.", dirs) - - if not os.path.isdir(dirs): - try: - os.makedirs(dirs) - except OSError as err: - if err.errno != errno.EEXIST: - raise err - - -def _get_dir_serial(dirname): - _, serial = dirname.split(CHECKPOINT_SEPARATOR) - - try: - serial_num = int(serial) - except ValueError: - serial_num = -1 - return serial_num - - -def _get_serial_dir(dirname, serial): - serial_folder = CHECKPOINT_PREFIX + CHECKPOINT_SEPARATOR + str(serial) - serial_dir = os.path.join(dirname, serial_folder) - _make_chekcpoint_dirs(serial_dir) - - return serial_dir - - -def _get_model_dir(dirname): - model_dir = os.path.join(dirname, MODEL_DIR) - _make_chekcpoint_dirs(model_dir) - return model_dir - - -def _get_lookuptable_dir(dirname): - lookuptable_dir = os.path.join(dirname, LOOKUP_TABLE_DIR) - _make_chekcpoint_dirs(lookuptable_dir) - return lookuptable_dir - - -def _get_trainer_dir(dirname, trainer_id): - trainer_folder = TRAINER_PREFIX + CHECKPOINT_SEPARATOR + str(trainer_id) - trainer_dir = os.path.join(dirname, trainer_folder) - _make_chekcpoint_dirs(trainer_dir) - return trainer_dir - - -def _scroll_delete(dirname, max_num_checkpoints=3): - dirs = os.listdir(dirname) - serial_map = {} - for serial in dirs: - serial_num = _get_dir_serial(serial) - serial_map[serial_num] = serial - - if len(list(serial_map.keys())) <= max_num_checkpoints: - return - - serials = list(serial_map.keys()) - serials.sort(reverse=True) - serials = serials[max_num_checkpoints:] - for serial in serials: - cur_dir = _get_serial_dir(dirname, serial) - try: - shutil.rmtree(cur_dir) - except OSError as err: - if err.errno != errno.ENOENT: - raise err - - -def _write_success(dirname): - """ - write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct. - - : param dirname - """ - success_file = os.path.join(dirname, SUCCESS_MARK_FILENAME) - with open(success_file, 'a') as f: - now = time.ctime() - f.write(now) - - -def _get_latest_checkpoint_serial(checkpoint_dir): - """ - get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory - - : param checkpoint_dir - """ - if not checkpoint_dir: - return -1 - - def has_success(checkpoint_dir, cur_dir): - """ - is _SUCCESS in this dir - """ - - serial = _get_dir_serial(cur_dir) - if serial == -1 or not os.path.isdir( - os.path.join(checkpoint_dir, cur_dir)): - return -1 - - success_path = os.path.join( - _get_serial_dir(checkpoint_dir, serial), MODEL_DIR, - SUCCESS_MARK_FILENAME) - if os.path.isfile(success_path): - return serial - - if not os.path.isdir(checkpoint_dir): - return -1 - - current_dir = -1 - dirs = os.listdir(checkpoint_dir) - for cur_dir in dirs: - success_num = has_success(checkpoint_dir, cur_dir) - if success_num > current_dir: - current_dir = success_num - return current_dir +# NOTE: Trainer is moved into fluid.contrib.trainer. +__all__ = [] diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 3f8c7b844a9fdc8404560ba4c78f9d328af2852a..43071def7a906e585909e50e4c0c52c56d981cde 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -136,6 +136,8 @@ class DistributeTranspilerConfig(object): slice_var_up = True split_method = None min_block_size = 8192 + # supported modes: pserver, nccl2 + mode = "pserver" print_log = False @@ -144,27 +146,30 @@ class DistributeTranspiler(object): **DistributeTranspiler** Convert the fluid program to distributed data-parallelism programs. + Supports two modes: pserver mode and nccl2 mode. - The main_program will be transformed to use a remote parameter server - to do parameter optimization. And the optimization graph will be put - into a parameter server program. + In pserver mode, the main_program will be transformed to use a remote + parameter server to do parameter optimization. And the optimization + graph will be put into a parameter server program. + + In nccl2 mode, the transpiler will append a NCCL_ID broadcasting + op in startup_program to share the NCCL_ID across the job nodes. + After transpile_nccl2 called, you ***must*** pass trainer_id and + num_trainers argument to ParallelExecutor to enable NCCL2 distributed + mode. Examples: .. code-block:: python - # Define your model before these codes. - port = os.getenv("PADDLE_PSERVER_PORT", "6174") - pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "") - eplist = [] - for ip in pserver_ips.split(","): - eplist.append(':'.join([ip, port])) - pserver_endpoints = ",".join(eplist) - trainers = int(os.getenv("PADDLE_TRAINERS")) - current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port - trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + # for pserver mode + pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174" + trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174" + current_endpoint = "192.168.0.1:6174" + trainer_id = 0 + trainers = 4 role = os.getenv("PADDLE_TRAINING_ROLE") - t = distribute_transpiler.DistributeTranspiler() + t = fluid.DistributeTranspiler() t.transpile( trainer_id, pservers=pserver_endpoints, trainers=trainers) if role == "PSERVER": @@ -173,6 +178,18 @@ class DistributeTranspiler(object): pserver_program) elif role == "TRAINER": trainer_program = t.get_trainer_program() + + # for nccl2 mode + config = fluid.DistributeTranspilerConfig() + config.mode = "nccl2" + t = fluid.DistributeTranspiler(config=config) + t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep) + exe = fluid.ParallelExecutor( + use_cuda, + loss_name=loss_var.name, + num_trainers=len(trainers.split(",)), + trainer_id=trainer_id + ) """ def __init__(self, config=None): @@ -190,13 +207,41 @@ class DistributeTranspiler(object): assert (self.config.min_block_size >= 8192) assert (self.config.split_method.__bases__[0] == PSDispatcher) + def _transpile_nccl2(self, + trainer_id, + trainers, + current_endpoint, + startup_program=None): + if not startup_program: + startup_program = default_startup_program() + if trainer_id >= 0: + worker_endpoints = trainers.split(",") + # send NCCL_ID to others or recv from trainer 0 + worker_endpoints.remove(current_endpoint) + + nccl_id_var = startup_program.global_block().create_var( + name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW) + startup_program.global_block().append_op( + type="gen_nccl_id", + inputs={}, + outputs={"NCCLID": nccl_id_var}, + attrs={ + "endpoint": current_endpoint, + "endpoint_list": worker_endpoints, + "trainer_id": trainer_id + }) + return nccl_id_var + else: + raise ValueError("must set trainer_id > 0") + def transpile(self, trainer_id, program=None, pservers="127.0.0.1:6174", trainers=1, sync_mode=True, - startup_program=None): + startup_program=None, + current_endpoint="127.0.0.1:6174"): """ Run the transpiler. @@ -207,10 +252,15 @@ class DistributeTranspiler(object): default is fluid.default_main_program(). pservers (str): comma separated ip:port string for the pserver list. - trainers (int): number of trainers in the distributed job. + trainers (int|str): in pserver mode this is the number of + trainers, in nccl2 mode this is a string of trainer + endpoints. sync_mode (bool): Do sync training or not, default is True. startup_program (Program|None): startup_program to transpile, default is fluid.default_main_program(). + current_endpoint (str): need pass current endpoint when + transpile as nccl2 distributed mode. In pserver mode + this argument is not used. """ if program is None: program = default_main_program() @@ -220,6 +270,15 @@ class DistributeTranspiler(object): self.startup_program = startup_program self.origin_startup_program = self.startup_program.clone() + if self.config.mode == "nccl2": + assert (isinstance(trainers, str)) + self._transpile_nccl2( + trainer_id, + trainers, + current_endpoint, + startup_program=startup_program) + return + self.trainer_num = trainers self.sync_mode = sync_mode self.trainer_id = trainer_id @@ -1082,7 +1141,7 @@ to transpile() call.") if self.sync_mode else [] }, attrs={ - "sync_mode": False, + "sync_mode": self.sync_mode, "epmap": pserver_endpoints, RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE, OP_ROLE_VAR_ATTR_NAME: [