提交 5c57e150 编写于 作者: N nhzlx

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add_ut_for_trt

......@@ -62,8 +62,26 @@ if(NOT CMAKE_CROSSCOMPILING)
endif()
if(WIN32)
# windows stupid compile option for all targets.
# windows header option for all targets.
add_definitions(-D_XKEYCHECK_H)
# Use symbols instead of absolute path, reduce the cmake link command length.
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_LIBRARIES 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_LIBRARIES 1)
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_OBJECTS 1)
SET(CMAKE_C_USE_RESPONSE_FILE_FOR_INCLUDES 1)
SET(CMAKE_CXX_USE_RESPONSE_FILE_FOR_INCLUDES 1)
SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@")
SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@")
# Specify the program to use when building static libraries
SET(CMAKE_C_CREATE_STATIC_LIBRARY "<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>")
SET(CMAKE_CXX_CREATE_STATIC_LIBRARY "<CMAKE_AR> lib <TARGET> <LINK_FLAGS> <OBJECTS>")
# set defination for the dll export
if (NOT MSVC)
message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.")
endif(NOT MSVC)
endif(WIN32)
if(NOT WITH_GOLANG)
......
......@@ -110,6 +110,20 @@ function(find_fluid_modules TARGET_NAME)
endif()
endfunction(find_fluid_modules)
# find all third_party modules is used for paddle static library
# for reduce the dependency when building the inference libs.
set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY)
function(find_fluid_thirdparties TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "third_party" pos)
if(pos GREATER 1)
get_property(fluid_ GLOBAL PROPERTY FLUID_THIRD_PARTY)
set(fluid_third_partys ${fluid_third_partys} ${TARGET_NAME})
set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY "${fluid_third_partys}")
endif()
endfunction(find_fluid_thirdparties)
function(merge_static_libs TARGET_NAME)
set(libs ${ARGN})
list(REMOVE_DUPLICATES libs)
......@@ -204,18 +218,13 @@ function(merge_static_libs TARGET_NAME)
foreach(lib ${libs})
# Get the file names of the libraries to be merged
#if(NOT $<TARGET_FILE:${lib}> MATCHES "lib.*\\.lib")
# message("library" ${lib})
# set(libfiles ${libfiles} lib$<TARGET_FILE:${lib}>)
#else()
set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
#endif()
endforeach()
# windows cmd return error in clean env.
# COMMAND del "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
# msvc will put libarary in directory of "/Release/xxxlib" by default
# COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
add_custom_command(TARGET ${TARGET_NAME} POST_BUILD
COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.lib ${libfiles}
COMMAND cmake -E make_directory "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}"
COMMAND lib /OUT:${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/lib${TARGET_NAME}.lib ${libfiles}
)
endif(WIN32)
endfunction(merge_static_libs)
......
......@@ -2,6 +2,6 @@
Thanks for reading PaddlePaddle documentation.
Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [Fluiddoc Repo](https://github.com/PaddlePaddle/Paddle) and updated in Fluiddoc Repo.
Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [FluidDoc Repo](https://github.com/PaddlePaddle/FluidDoc) and updated there.
Please turn to Fluiddoc Repo for the latest documentation.
Please turn to FluidDoc Repo for the latest documentation.
......@@ -22,9 +22,6 @@ paddle.fluid.Operator.rename_input ArgSpec(args=['self', 'old_name', 'new_name']
paddle.fluid.Operator.rename_output ArgSpec(args=['self', 'old_name', 'new_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.set_attr ArgSpec(args=['self', 'name', 'val'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.to_string ArgSpec(args=['self', 'throw_on_error'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Parameter.__init__ ArgSpec(args=['self', 'block', 'shape', 'dtype'], varargs=None, keywords='kwargs', defaults=None)
paddle.fluid.Parameter.astype ArgSpec(args=['self', 'dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Parameter.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.program_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
......@@ -35,29 +32,16 @@ paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None,
paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False))
paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.scope_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.Trainer.__init__ ArgSpec(args=['self', 'train_func', 'optimizer_func', 'param_path', 'place', 'parallel', 'checkpoint_config'], varargs=None, keywords=None, defaults=(None, None, False, None))
paddle.fluid.Trainer.save_inference_model ArgSpec(args=['self', 'param_path', 'feeded_var_names', 'target_var_indexes'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.save_params ArgSpec(args=['self', 'param_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.test ArgSpec(args=['self', 'reader', 'feed_order'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.train ArgSpec(args=['self', 'num_epochs', 'event_handler', 'reader', 'feed_order'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.BeginEpochEvent.__init__ ArgSpec(args=['self', 'epoch_id'], varargs=None, keywords=None, defaults=None)
paddle.fluid.EndEpochEvent.__init__ ArgSpec(args=['self', 'epoch_id'], varargs=None, keywords=None, defaults=None)
paddle.fluid.BeginStepEvent.__init__ ArgSpec(args=['self', 'epoch_id', 'step_id'], varargs=None, keywords=None, defaults=None)
paddle.fluid.EndStepEvent.__init__ ArgSpec(args=['self', 'epoch_id', 'step_id', 'metrics'], varargs=None, keywords=None, defaults=None)
paddle.fluid.CheckpointConfig.__init__ ArgSpec(args=['self', 'checkpoint_dir', 'max_num_checkpoints', 'epoch_interval', 'step_interval'], varargs=None, keywords=None, defaults=(None, 3, 1, 10))
paddle.fluid.Inferencer.__init__ ArgSpec(args=['self', 'infer_func', 'param_path', 'place', 'parallel'], varargs=None, keywords=None, defaults=(None, False))
paddle.fluid.Inferencer.infer ArgSpec(args=['self', 'inputs', 'return_numpy'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None))
paddle.fluid.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
paddle.fluid.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DistributeTranspilerConfig.__init__
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords='kwargs', defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.__init__ ArgSpec(args=['self', 'use_cuda', 'loss_name', 'main_program', 'share_vars_from', 'exec_strategy', 'build_strategy', 'num_trainers', 'trainer_id', 'scope'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 1, 0, None))
paddle.fluid.ParallelExecutor.run ArgSpec(args=['self', 'fetch_list', 'feed', 'feed_dict', 'return_numpy'], varargs=None, keywords=None, defaults=(None, None, True))
paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ExecutionStrategy) -> None
paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.GradientScaleStrategy, arg0: int) -> None
......@@ -178,14 +162,14 @@ paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, key
paddle.fluid.layers.sequence_enumerate ArgSpec(args=['input', 'win_size', 'pad_value', 'name'], varargs=None, keywords=None, defaults=(0, None))
paddle.fluid.layers.expand ArgSpec(args=['x', 'expand_times', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_concat ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(-1, False, None, None))
paddle.fluid.layers.scale ArgSpec(args=['x', 'scale', 'bias', 'bias_after_scale', 'out', 'act', 'name'], varargs=None, keywords=None, defaults=(1.0, 0.0, True, None, None, None))
paddle.fluid.layers.elementwise_add ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_div ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_sub ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_mul ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_max ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_min ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.elementwise_pow ArgSpec(args=['x', 'y', 'out', 'axis', 'use_mkldnn', 'act', 'name'], varargs=None, keywords=None, defaults=(None, -1, False, None, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
......@@ -265,12 +249,12 @@ paddle.fluid.layers.sum ArgSpec(args=[], varargs='args', keywords='kwargs', defa
paddle.fluid.layers.slice ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.shape ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.maxout ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.softshrink ArgSpec(args=[], varargs='args', keywords='kwargs', defaults=None)
paddle.fluid.layers.sigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.logsigmoid ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.exp ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.tanh ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.tanh_shrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.softshrink ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sqrt ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.abs ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.ceil ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
......@@ -336,7 +320,7 @@ paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_startup_program ArgSpec(args=['self', 'endpoint', 'pserver_program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.transpiler.DistributeTranspiler.get_trainer_program ArgSpec(args=['self', 'wait_port'], varargs=None, keywords=None, defaults=(True,))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None))
paddle.fluid.transpiler.DistributeTranspiler.transpile ArgSpec(args=['self', 'trainer_id', 'program', 'pservers', 'trainers', 'sync_mode', 'startup_program', 'current_endpoint'], varargs=None, keywords=None, defaults=(None, '127.0.0.1:6174', 1, True, None, '127.0.0.1:6174'))
paddle.fluid.transpiler.memory_optimize ArgSpec(args=['input_program', 'skip_opt_set', 'print_log', 'level'], varargs=None, keywords=None, defaults=(None, False, 0))
paddle.fluid.transpiler.release_memory ArgSpec(args=['input_program', 'skip_opt_set'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.HashName.__init__ ArgSpec(args=['self', 'pserver_endpoints'], varargs=None, keywords=None, defaults=None)
......@@ -350,6 +334,7 @@ paddle.fluid.nets.simple_img_conv_pool ArgSpec(args=['input', 'num_filters', 'fi
paddle.fluid.nets.sequence_conv_pool ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max'))
paddle.fluid.nets.glu ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,))
paddle.fluid.nets.scaled_dot_product_attention ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0))
paddle.fluid.nets.img_conv_group ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn', 'use_mkldnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True, False))
paddle.fluid.optimizer.SGDOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'regularization', 'name'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.optimizer.SGDOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'use_nesterov', 'regularization', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
......@@ -389,7 +374,7 @@ paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> Non
paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
paddle.fluid.ParamAttr.__init__ ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False))
paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim'], varargs=None, keywords='kwargs', defaults=(None,))
paddle.fluid.WeightNormParamAttr.__init__ ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False))
paddle.fluid.DataFeeder.__init__ ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.DataFeeder.decorate_reader ArgSpec(args=['self', 'reader', 'multi_devices', 'num_places', 'drop_last'], varargs=None, keywords=None, defaults=(None, True))
paddle.fluid.DataFeeder.feed ArgSpec(args=['self', 'iterable'], varargs=None, keywords=None, defaults=None)
......
......@@ -13,6 +13,11 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// logging.h and windows.h conflict
#define GLOG_NO_ABBREVIATED_SEVERITIES
// solve static linking error in windows
// https://github.com/google/glog/issues/301
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/framework/tensor.h"
#include "unsupported/Eigen/CXX11/Tensor"
......@@ -46,11 +51,13 @@ struct EigenTensor {
using ConstType =
Eigen::TensorMap<Eigen::Tensor<const T, D, MajorType, IndexType>>;
static Type From(Tensor& tensor, DDim dims) {
static Type From(Tensor& tensor, DDim dims) { // NOLINT
return Type(tensor.data<T>(), EigenDim<D>::From(dims));
}
static Type From(Tensor& tensor) { return From(tensor, tensor.dims_); }
static Type From(Tensor& tensor) { // NOLINT
return From(tensor, tensor.dims_);
} // NOLINT
static ConstType From(const Tensor& tensor, DDim dims) {
return ConstType(tensor.data<T>(), EigenDim<D>::From(dims));
......@@ -64,7 +71,8 @@ struct EigenTensor {
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
struct EigenMatrix : public EigenTensor<T, 2, MajorType, IndexType> {
static typename EigenMatrix::Type Reshape(Tensor& tensor, int num_col_dims) {
static typename EigenMatrix::Type Reshape(Tensor& tensor, // NOLINT
int num_col_dims) {
int rank = tensor.dims_.size();
PADDLE_ENFORCE(num_col_dims > 0 && num_col_dims < rank,
"`num_col_dims` must be between (0, rank_of_tensor).");
......@@ -86,11 +94,12 @@ template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
struct EigenVector : public EigenTensor<T, 1, MajorType, IndexType> {
// Flatten reshapes a Tensor into an EigenVector.
static typename EigenVector::Type Flatten(Tensor& tensor) {
static typename EigenVector::Type Flatten(Tensor& tensor) { // NOLINT
return EigenVector::From(tensor, {product(tensor.dims_)});
}
static typename EigenVector::ConstType Flatten(const Tensor& tensor) {
static typename EigenVector::ConstType Flatten(
const Tensor& tensor) { // NOLINT
return EigenVector::From(tensor, {product(tensor.dims_)});
}
};
......@@ -104,7 +113,7 @@ struct EigenScalar {
using ConstType = Eigen::TensorMap<
Eigen::TensorFixedSize<const T, Eigen::Sizes<>, MajorType, IndexType>>;
static Type From(Tensor& tensor) { return Type(tensor.data<T>()); }
static Type From(Tensor& tensor) { return Type(tensor.data<T>()); } // NOLINT
static ConstType From(const Tensor& tensor) {
return ConstType(tensor.data<T>());
......
......@@ -26,8 +26,6 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
PADDLE_ENFORCE(graph.get());
FusePassBase::Init("conv_relu_mkldnn_fuse", graph.get());
std::unordered_set<Node*> nodes2delete;
GraphPatternDetector gpd;
auto* conv_input = gpd.mutable_pattern()
->NewNode("conv_relu_mkldnn_fuse/conv_input")
......@@ -43,35 +41,19 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
VLOG(4) << "handle ConvReLU fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_relu_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_bias, conv_bias, conv_relu_pattern); // Bias
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp
GET_IR_NODE_FROM_SUBGRAPH(conv, conv, conv_relu_pattern); // CONV op
GET_IR_NODE_FROM_SUBGRAPH(relu_out, relu_out, conv_relu_pattern); // Out
GET_IR_NODE_FROM_SUBGRAPH(relu, relu, conv_relu_pattern); // ReLU op
// Create an ConvReLU Node.
OpDesc desc;
std::string conv_relu_i_in = subgraph.at(conv_input)->Name();
std::string conv_relu_w_in = conv_weight->Name();
std::string conv_relu_b_in = conv_bias->Name();
std::string conv_relu_out = relu_out->Name();
desc.SetInput("Input", std::vector<std::string>({conv_relu_i_in}));
desc.SetInput("Filter", std::vector<std::string>({conv_relu_w_in}));
desc.SetInput("Bias", std::vector<std::string>({conv_relu_b_in}));
desc.SetOutput("Output", std::vector<std::string>({conv_relu_out}));
desc.SetType("conv2d");
for (auto& attr : conv->Op()->GetAttrMap()) {
desc.SetAttr(attr.first, attr.second);
}
desc.SetAttr("fuse_relu", true);
auto conv_relu_node = g->CreateOpNode(&desc); // OpDesc will be copied.
GraphSafeRemoveNodes(graph.get(), {conv, relu, conv_out});
// Transform Conv node into ConvReLU node.
OpDesc* desc = conv->Op();
desc->SetOutput("Output", std::vector<std::string>({relu_out->Name()}));
desc->SetAttr("fuse_relu", true);
GraphSafeRemoveNodes(graph.get(), {relu, conv_out});
PADDLE_ENFORCE(subgraph.count(conv_input));
IR_NODE_LINK_TO(subgraph.at(conv_input), conv_relu_node);
IR_NODE_LINK_TO(conv_weight, conv_relu_node);
IR_NODE_LINK_TO(conv_bias, conv_relu_node);
IR_NODE_LINK_TO(conv_relu_node, relu_out);
IR_NODE_LINK_TO(conv, relu_out);
found_conv_relu_count++;
};
......
......@@ -85,19 +85,16 @@ TEST(ConvReLUFusePass, basic) {
for (auto* node : graph->Nodes()) {
if (node->IsOp() && node->Op()->Type() == "conv2d") {
if (node->Op()->HasAttr("use_mkldnn")) {
bool use_mkldnn = boost::get<bool>(node->Op()->GetAttr("use_mkldnn"));
if (use_mkldnn) {
if (node->Op()->HasAttr("fuse_relu")) {
bool fuse_relu = boost::get<bool>(node->Op()->GetAttr("fuse_relu"));
auto* op = node->Op();
ASSERT_TRUE(op->HasAttr("use_mkldnn"));
EXPECT_TRUE(boost::get<bool>(op->GetAttr("use_mkldnn")));
ASSERT_TRUE(op->HasAttr("fuse_relu"));
bool fuse_relu = boost::get<bool>(op->GetAttr("fuse_relu"));
if (fuse_relu) {
++conv_relu_count;
}
}
}
}
}
}
EXPECT_EQ(conv_relu_count, 1);
}
......
......@@ -638,11 +638,6 @@ PDNode *patterns::ConvReLU::operator()(
->AsInput()
->assert_is_persistable_var()
->assert_is_op_input("conv2d", "Filter");
// Bias
auto *conv_bias_var = pattern->NewNode(conv_bias_repr())
->AsInput()
->assert_is_persistable_var()
->assert_is_op_input("conv2d", "Bias");
// intermediate variable, will be removed in the IR after fuse.
auto *conv_out_var = pattern->NewNode(conv_out_repr())
->AsIntermediate()
......@@ -653,8 +648,7 @@ PDNode *patterns::ConvReLU::operator()(
->AsOutput()
->assert_is_op_output("relu");
conv_op->LinksFrom({conv_input, conv_weight_var, conv_bias_var})
.LinksTo({conv_out_var});
conv_op->LinksFrom({conv_input, conv_weight_var}).LinksTo({conv_out_var});
relu_op->LinksFrom({conv_out_var}).LinksTo({relu_out_var});
return relu_out_var;
}
......
......@@ -379,7 +379,7 @@ struct PatternBase {
// op: conv + relu
// named nodes:
// conv_input, conv_weight,
// conv_bias, conv_out, conv,
// conv_out, conv,
// relu_out, relu
struct ConvReLU : public PatternBase {
ConvReLU(PDPattern* pattern, const std::string& name_scope)
......@@ -392,7 +392,6 @@ struct ConvReLU : public PatternBase {
PATTERN_DECL_NODE(relu);
// declare variable node's name
PATTERN_DECL_NODE(conv_weight);
PATTERN_DECL_NODE(conv_bias);
PATTERN_DECL_NODE(conv_out);
PATTERN_DECL_NODE(relu_out);
};
......
......@@ -132,7 +132,9 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
AddAttr<std::string>(OpNamescopeAttrName(), "Operator name with namesope.")
.SetDefault("");
AddAttr<std::vector<std::string>>(OpCreationCallstackAttrName(),
"Callstack for Op Creatation.")
.SetDefault({});
Validate();
}
......
......@@ -46,6 +46,7 @@ class OpProtoAndCheckerMaker {
static const char *OpRoleAttrName() { return "op_role"; }
static const char *OpRoleVarAttrName() { return "op_role_var"; }
static const char *OpNamescopeAttrName() { return "op_namescope"; }
static const char *OpCreationCallstackAttrName() { return "op_callstack"; }
void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker);
......
......@@ -23,6 +23,11 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#if defined(_WIN32)
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#endif
#include "glog/logging.h" // For VLOG()
#include "paddle/fluid/framework/attribute.h"
#include "paddle/fluid/framework/details/op_registry.h"
......@@ -246,16 +251,14 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
__use_op_itself_##op_type, \
"USE_OP_ITSELF must be called in global namespace"); \
extern int TouchOpRegistrar_##op_type(); \
static int use_op_itself_##op_type##_ __attribute__((unused)) = \
TouchOpRegistrar_##op_type()
UNUSED static int use_op_itself_##op_type##_ = TouchOpRegistrar_##op_type()
#define USE_OP_DEVICE_KERNEL(op_type, LIBRARY_TYPE) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__use_op_kernel_##op_type##_##LIBRARY_TYPE##__, \
"USE_OP_DEVICE_KERNEL must be in global namespace"); \
extern int TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE(); \
static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_ \
__attribute__((unused)) = \
UNUSED static int use_op_kernel_##op_type##_##LIBRARY_TYPE##_ = \
TouchOpKernelRegistrar_##op_type##_##LIBRARY_TYPE()
// TODO(fengjiayi): The following macros
......
......@@ -11,15 +11,20 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/framework/operator.h"
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -137,7 +142,10 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
try {
if (VLOG_IS_ON(4)) {
VLOG(4) << place << " " << DebugStringEx(&scope);
}
if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place);
......@@ -149,7 +157,33 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place);
if (VLOG_IS_ON(3)) {
VLOG(3) << place << " " << DebugStringEx(&scope);
}
} catch (platform::EnforceNotMet exception) {
if (Attrs().count("sub_block") != 0) {
throw exception;
}
auto& callstack = Attr<std::vector<std::string>>(
OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
if (callstack.empty()) {
throw exception;
}
std::ostringstream sout;
sout << "Invoke operator " << Type() << " error.\n";
sout << "Python Callstacks: \n";
for (auto& line : callstack) {
sout << line;
}
sout << "C++ Callstacks: \n";
sout << exception.err_str_;
exception.err_str_ = sout.str();
throw exception;
} catch (...) {
std::rethrow_exception(std::current_exception());
}
}
bool OperatorBase::HasInputs(const std::string& name) const {
......@@ -177,7 +211,7 @@ const std::vector<std::string>& OperatorBase::Inputs(
}
bool OperatorBase::HasOutputs(const std::string& name) const {
if (outputs_.find(name) != outputs_.end()) {
if (outputs_.end() != outputs_.find(name)) {
return true;
} else {
return false;
......
......@@ -20,6 +20,8 @@ limitations under the License. */
#include <tuple>
#include <unordered_map>
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "glog/logging.h" // For VLOG
#include "paddle/fluid/framework/attribute.h"
......
......@@ -71,15 +71,15 @@ bool AnalysisPredictor::Init(
inference_program_ = paddle::inference::Load(
executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
} else {
LOG(ERROR) << "fail to load inference model.";
LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
return false;
}
OptimizeInferenceProgram();
ctx_ = executor_->Prepare(*inference_program_, 0);
if (config_._use_mkldnn) {
executor_->EnableMKLDNN(*inference_program_);
}
ctx_ = executor_->Prepare(*inference_program_, 0);
VLOG(5) << "to create variables";
PADDLE_ENFORCE(scope_.get());
......@@ -109,7 +109,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
argument_.origin_program_desc.reset(
new ProgramDesc(*inference_program_->Proto()));
PADDLE_ENFORCE(config_.ir_mode == AnalysisConfig::IrPassMode::kExclude,
PADDLE_ENFORCE(
config_.ir_mode == contrib::AnalysisConfig::IrPassMode::kExclude,
"Only kExclude is supported yet.");
Analyzer().DisableIrPasses(config_.ir_passes).Run(&argument_);
......@@ -126,8 +127,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig& config) {
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<contrib::AnalysisConfig, PaddleEngineKind::kAnalysis>(
const contrib::AnalysisConfig& config) {
VLOG(3) << "create AnalysisConfig";
if (config.use_gpu) {
// 1. GPU memeroy
......@@ -154,4 +156,11 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
return predictor;
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<contrib::AnalysisConfig>(
const contrib::AnalysisConfig& config) {
return CreatePaddlePredictor<contrib::AnalysisConfig,
PaddleEngineKind::kAnalysis>(config);
}
} // namespace paddle
......@@ -30,7 +30,7 @@ using framework::proto::ProgramDesc;
*/
class AnalysisPredictor : public NativePaddlePredictor {
public:
explicit AnalysisPredictor(const AnalysisConfig& config)
explicit AnalysisPredictor(const contrib::AnalysisConfig& config)
: NativePaddlePredictor(config), config_(config) {}
bool Init(const std::shared_ptr<framework::Scope>& parent_scope);
......@@ -46,7 +46,7 @@ class AnalysisPredictor : public NativePaddlePredictor {
Argument& analysis_argument() { return argument_; }
private:
AnalysisConfig config_;
contrib::AnalysisConfig config_;
Argument argument_;
};
......
......@@ -31,21 +31,24 @@
namespace paddle {
using paddle::contrib::AnakinConfig;
template <typename Target>
PaddleInferenceAnakinPredictor<Target>::PaddleInferenceAnakinPredictor(
const AnakinConfig &config) {
const contrib::AnakinConfig &config) {
CHECK(Init(config));
}
template <>
PaddleInferenceAnakinPredictor<anakin::X86>::PaddleInferenceAnakinPredictor(
const AnakinConfig &config) {
const contrib::AnakinConfig &config) {
omp_set_dynamic(0);
omp_set_num_threads(1);
mkl_set_num_threads(1);
CHECK(Init(config));
}
template <typename Target>
bool PaddleInferenceAnakinPredictor<Target>::Init(const AnakinConfig &config) {
bool PaddleInferenceAnakinPredictor<Target>::Init(
const contrib::AnakinConfig &config) {
if (!(graph_.load(config.model_file))) {
VLOG(3) << "fail to load graph from " << config.model_file;
return false;
......@@ -200,10 +203,11 @@ template class PaddleInferenceAnakinPredictor<anakin::X86>;
// A factory to help create difference predictor.
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnakinConfig, PaddleEngineKind::kAnakin>(const AnakinConfig &config) {
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
const contrib::AnakinConfig &config) {
VLOG(3) << "Anakin Predictor create.";
if (config.target_type == AnakinConfig::NVGPU) {
if (config.target_type == contrib::AnakinConfig::NVGPU) {
#ifdef PADDLE_WITH_CUDA
VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ].";
std::unique_ptr<PaddlePredictor> x(
......@@ -213,7 +217,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
LOG(ERROR) << "AnakinConfig::NVGPU could not used in ONLY-CPU environment";
return nullptr;
#endif
} else if (config.target_type == AnakinConfig::X86) {
} else if (config.target_type == contrib::AnakinConfig::X86) {
VLOG(3) << "Anakin Predictor create on [ Intel X86 ].";
std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor<anakin::X86>(config));
......
......@@ -29,6 +29,8 @@ limitations under the License. */
namespace paddle {
using contrib::AnakinConfig;
template <typename Target>
class PaddleInferenceAnakinPredictor : public PaddlePredictor {
public:
......
......@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -101,14 +102,11 @@ bool NativePaddlePredictor::Init(
inference_program_ = paddle::inference::Load(
executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
} else {
LOG(ERROR) << "fail to load inference model.";
LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
return false;
}
ctx_ = executor_->Prepare(*inference_program_, 0);
if (config_._use_mkldnn) {
executor_->EnableMKLDNN(*inference_program_);
}
executor_->CreateVariables(*inference_program_,
sub_scope_ ? sub_scope_ : scope_.get(), 0);
......@@ -218,57 +216,20 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
PaddleTensor *output) {
std::vector<int> shape;
auto dims_i = fetch.dims();
auto lod = fetch.lod();
const T *output_ptr = fetch.data<T>();
auto num = fetch.numel();
std::vector<T> data;
if (0 == lod.size()) {
std::copy(output_ptr, output_ptr + num, std::back_inserter(data));
for (int j = 0; j < dims_i.size(); ++j) {
shape.push_back(dims_i[j]);
}
} else {
// for batch detection
// image[0] -> output[0] shape {145, 6}
// image[1] -> output[1] shape {176, 6}
// then,
// the batch output shape {321, 6}
// the lod {{0, 145, 321}}
// so we should append output[0] to {176, 6}
size_t max_dim = 0;
for (size_t j = 1; j < lod[0].size(); j++) {
max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]);
}
size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back();
if (max_dim > 0) {
data.resize((lod[0].size() - 1) * max_dim * common_dim, 0);
}
for (size_t j = 1; j < lod[0].size(); j++) {
size_t start = lod[0][j - 1] * common_dim;
size_t end = lod[0][j] * common_dim;
if (end > start) {
std::copy(output_ptr + start, output_ptr + end,
data.begin() + (j - 1) * max_dim * common_dim);
}
}
shape.push_back(lod[0].size() - 1);
shape.push_back(max_dim);
for (int j = 1; j < dims_i.size(); ++j) {
shape.push_back(dims_i[j]);
}
}
output->shape = shape;
auto &buffer = output->data;
if (buffer.empty() || buffer.length() < sizeof(T) * data.size()) {
buffer.Resize(sizeof(T) * data.size());
}
std::memcpy(buffer.data(), data.data(), sizeof(T) * data.size());
// copy LoD
for (const auto &level : fetch.lod()) {
output->lod.emplace_back(level);
// set shape.
auto shape = framework::vectorize(fetch.dims());
output->shape.assign(shape.begin(), shape.end());
// set data.
const T *data = fetch.data<T>();
int num_elems = inference::VecReduceToInt(shape);
output->data.Resize(num_elems * sizeof(T));
// The fetched tensor output by fetch op, should always in CPU memory, so just
// copy.
memcpy(output->data.data(), data, num_elems * sizeof(T));
// set lod
output->lod.clear();
for (auto &level : fetch.lod()) {
output->lod.emplace_back(level.begin(), level.end());
}
}
......@@ -330,4 +291,10 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
#endif
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
const NativeConfig &config) {
return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}
} // namespace paddle
......@@ -14,15 +14,22 @@
#pragma once
// logging.h and windows.h conflict
#define GLOG_NO_ABBREVIATED_SEVERITIES
// solve static linking error in windows
// https://github.com/google/glog/issues/301
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/profiler.h"
......
......@@ -25,10 +25,11 @@ using inference::analysis::Argument;
using inference::Singleton;
using inference::analysis::Analyzer;
using framework::proto::ProgramDesc;
using paddle::contrib::MixedRTConfig;
class TensorRTSubgraphPredictor : public NativePaddlePredictor {
public:
explicit TensorRTSubgraphPredictor(const TensorRTConfig& config)
explicit TensorRTSubgraphPredictor(const MixedRTConfig& config)
: NativePaddlePredictor(config), config_(config) {}
bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
......@@ -121,13 +122,13 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
}
private:
TensorRTConfig config_;
MixedRTConfig config_;
};
template <>
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
const TensorRTConfig& config) {
CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
const MixedRTConfig& config) {
VLOG(3) << "create TensorRTSubgraphPredictor";
if (config.use_gpu) {
// 1. GPU memeroy
......@@ -156,6 +157,13 @@ CreatePaddlePredictor<TensorRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
return std::move(predictor);
}
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<MixedRTConfig>(
const MixedRTConfig& config) {
return CreatePaddlePredictor<MixedRTConfig,
PaddleEngineKind::kAutoMixedTensorRT>(config);
}
} // namespace paddle
USE_TRT_CONVERTER(elementwise_add_weight);
......
......@@ -20,6 +20,8 @@
namespace paddle {
using contrib::MixedRTConfig;
DEFINE_string(dirname, "", "Directory of the inference model.");
void CompareTensorRTWithFluid(bool enable_tensorrt) {
......@@ -32,7 +34,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) {
config0.fraction_of_gpu_memory = 0.3;
config0.device = 0;
TensorRTConfig config1;
MixedRTConfig config1;
config1.model_dir = FLAGS_dirname + "word2vec.inference.model";
config1.use_gpu = true;
config1.fraction_of_gpu_memory = 0.3;
......@@ -42,7 +44,7 @@ void CompareTensorRTWithFluid(bool enable_tensorrt) {
auto predictor0 =
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config0);
auto predictor1 =
CreatePaddlePredictor<TensorRTConfig,
CreatePaddlePredictor<MixedRTConfig,
PaddleEngineKind::kAutoMixedTensorRT>(config1);
for (int batch_id = 0; batch_id < 1; batch_id++) {
......
cmake_minimum_required(VERSION 3.0)
project(cpp_inference_demo CXX C)
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON)
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF)
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON)
macro(safe_set_static_flag)
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/MD")
endforeach(flag_var)
endmacro()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
if (WIN32)
set(CMAKE_STATIC_LIBRARY_PREFIX "lib")
if (WITH_STATIC_LIB)
safe_set_static_flag()
add_definitions(-DSTATIC_LIB)
set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "/w")
set(CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} "/w")
endif()
set(CMAKE_STATIC_LIBRARY_PREFIX "lib")
else()
set(CMAKE_STATIC_LIBRARY_PREFIX "")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
set(CMAKE_STATIC_LIBRARY_PREFIX "")
endif()
message("flags" ${CMAKE_CXX_FLAGS})
if(NOT DEFINED PADDLE_LIB)
message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib")
......@@ -16,14 +35,18 @@ if(NOT DEFINED DEMO_NAME)
message(FATAL_ERROR "please set DEMO_NAME with -DDEMO_NAME=demo_name")
endif()
option(WITH_MKL "Compile demo with MKL/OpenBlas support, default use MKL." ON)
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." OFF)
option(WITH_STATIC_LIB "Compile demo with static/shared library, default use static." ON)
if(WITH_GPU)
if(NOT WIN32)
set(CUDA_LIB "/usr/local/cuda/lib64/" CACHE STRING "CUDA Library")
else()
if(CUDA_LIB STREQUAL "")
set(CUDA_LIB "C:\\Program\ Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v8.0\\lib\\x64")
endif()
endif(NOT WIN32)
endif()
include_directories("D:/Paddle/")
include_directories("${PADDLE_LIB}")
include_directories("${PADDLE_LIB}/third_party/install/protobuf/include")
include_directories("${PADDLE_LIB}/third_party/install/glog/include")
......@@ -83,10 +106,18 @@ set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
${CMAKE_STATIC_LIBRARY_PREFIX}glog ${CMAKE_STATIC_LIBRARY_PREFIX}gflags ${CMAKE_STATIC_LIBRARY_PREFIX}protobuf
${EXTERNAL_LIB})
# NOTE(dzhwinter) shlwapi is deprecated.
set(DEPS ${DEPS} libcmt shlwapi)
endif(NOT WIN32)
if(WITH_GPU)
if(NOT WIN32)
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
else()
set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX} )
endif()
endif()
target_link_libraries(${DEMO_NAME} ${DEPS})
......@@ -18,6 +18,8 @@ limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <algorithm>
#include <memory>
#include <thread> //NOLINT
#include "paddle/fluid/inference/paddle_inference_api.h"
......@@ -67,7 +69,8 @@ void Main(bool use_gpu) {
0.000932706};
const size_t num_elements = outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements);
i++) {
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
result[i]);
}
......@@ -113,7 +116,8 @@ void MainThreads(int num_threads, bool use_gpu) {
const size_t num_elements =
outputs.front().data.length() / sizeof(float);
// The outputs' buffers are in CPU memory.
for (size_t i = 0; i < std::min(5UL, num_elements); i++) {
for (size_t i = 0; i < std::min(static_cast<size_t>(5), num_elements);
i++) {
PADDLE_ENFORCE(static_cast<float*>(outputs.front().data.data())[i],
result[i]);
}
......
# windows inference
本文介绍windows inference,目前只提供了静态编译,编译出paddle_fluid.lib,包含了除openblas.dll之外的所有第三方依赖库。
1. 下载最新的paddle_fluid.lib和openblas.dll,并把它们放在同一个目录下。
2. 准备预训练好的模型文件,例如models中的模型,可以将模型用safe_inference_model接口保存下来。将模型文件放到该目录下
3. 进入Paddle/paddle/fluid/inference/api/demo_ci目录,新建build目录,然后使用cmake生成vs2015的solution文件。
其中PADDLE_LIB是前面的paddle_fluid.lib对应文件夹, CUDA_LIB指定为x64格式下的cuda系统库目录文件夹。
```shell
cmake .. -G "Visual Studio 14 2015 Win64" -DWITH_GPU=ON -DWITH_MKL=OFF -DWITH_STATIC_LIB=ON -DCMAKE_BUILD_TYPE=Release -DDEMO_NAME=inference_icnet -DPADDLE_LIB=D:\to_the_paddle_fluid.lib -DCUDA_LIB=D:\tools\v8.0\lib\x64
```
然后用vs2015打开对应的项目文件,注意使用静态链接 "/MT",生成对应的exe。将openblas.dll放到exe所在目录。
4. 该exe即为项目生成文件,可绑定运行。
## FAQ
1. cmake需要您手动下载,并添加到系统路径里
2. 路径中的不要包含空格,例如发现CUDA_LIB路径是Program Files(x86)可能会出错。可以将CUDA拷贝到一个新位置。
......@@ -74,13 +74,17 @@ template <>
std::string to_string<std::vector<std::vector<float>>>(
const std::vector<std::vector<std::vector<float>>> &vec);
template <typename T>
int VecReduceToInt(const std::vector<T> &v) {
return std::accumulate(v.begin(), v.end(), 1, [](T a, T b) { return a * b; });
}
template <typename T>
static void TensorAssignData(PaddleTensor *tensor,
const std::vector<std::vector<T>> &data) {
// Assign buffer
int dim = std::accumulate(tensor->shape.begin(), tensor->shape.end(), 1,
[](int a, int b) { return a * b; });
tensor->data.Resize(sizeof(T) * dim);
int num_elems = VecReduceToInt(tensor->shape);
tensor->data.Resize(sizeof(T) * num_elems);
int c = 0;
for (const auto &f : data) {
for (T v : f) {
......@@ -89,7 +93,7 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
std::string DescribeTensor(const PaddleTensor &tensor) {
static std::string DescribeTensor(const PaddleTensor &tensor) {
std::stringstream os;
os << "Tensor [" << tensor.name << "]\n";
os << " - type: ";
......@@ -113,8 +117,7 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
os << "\n";
os << " - data: ";
int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1,
[](int a, int b) { return a * b; });
int dim = VecReduceToInt(tensor.shape);
for (int i = 0; i < dim; i++) {
os << static_cast<float *>(tensor.data.data())[i] << " ";
}
......@@ -122,7 +125,7 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
return os.str();
}
void PrintTime(int batch_size, int repeat, int num_threads, int tid,
static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double latency, int epoch = 1) {
LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat
<< ", threads: " << num_threads << ", thread id: " << tid
......
......@@ -28,34 +28,61 @@ limitations under the License. */
namespace paddle {
// Data type.
enum PaddleDType {
FLOAT32,
INT64,
// TODO(Superjomn) support more data types if needed.
};
/*
* Memory menage for PaddleTensor.
* The PaddleBuf holds a buffer for data input or output. The memory can be
* allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
* should be reused for better performance.
*
* For user allocated memory, the following API can be used:
* - PaddleBuf(void* data, size_t length) to set an external memory by
* specifying
* the memory address and length.
* - Reset(void* data, size_t length) to reset the PaddleBuf with an external
* memory.
* ATTENTION, for user allocated memory, deallocation should be done by users
* externally after the program finished. The PaddleBuf won't do any allocation
* or deallocation.
*
* To have the PaddleBuf allocate and manage the memory:
* - PaddleBuf(size_t length) will allocate a memory of size `length`.
* - Resize(size_t length) resize the memory to no less than `length`, ATTENTION
* if the allocated memory is larger than `length`, nothing will done.
*/
class PaddleBuf {
public:
PaddleBuf() = default;
PaddleBuf(PaddleBuf&& other);
// Copy only available when memory is managed externally.
explicit PaddleBuf(const PaddleBuf&);
PaddleBuf& operator=(const PaddleBuf&);
PaddleBuf& operator=(PaddleBuf&&);
// Do not own the memory.
PaddleBuf(void* data, size_t length)
: data_(data), length_(length), memory_owned_{false} {}
// Own memory.
// PaddleBuf allocate memory internally, and manage it.
explicit PaddleBuf(size_t length)
: data_(new char[length]), length_(length), memory_owned_(true) {}
// Resize to `length` bytes.
// Set external memory, the PaddleBuf won't manage it.
PaddleBuf(void* data, size_t length)
: data_(data), length_(length), memory_owned_{false} {}
// Copy only available when memory is managed externally.
explicit PaddleBuf(const PaddleBuf&);
// Resize the memory.
void Resize(size_t length);
// Reset to external memory.
// Reset to external memory, with address and length set.
void Reset(void* data, size_t length);
// Tell whether the buffer is empty.
bool empty() const { return length_ == 0; }
// Get the memory address.
void* data() const { return data_; }
// Get the memory length.
size_t length() const { return length_; }
~PaddleBuf() { Free(); }
PaddleBuf& operator=(const PaddleBuf&);
PaddleBuf& operator=(PaddleBuf&&);
PaddleBuf() = default;
PaddleBuf(PaddleBuf&& other);
private:
void Free();
......@@ -64,6 +91,7 @@ class PaddleBuf {
bool memory_owned_{true};
};
// Basic input and output data structure for PaddlePredictor.
struct PaddleTensor {
PaddleTensor() = default;
std::string name; // variable name.
......@@ -73,19 +101,8 @@ struct PaddleTensor {
std::vector<std::vector<size_t>> lod; // Tensor+LoD equals LoDTensor
};
enum class PaddleEngineKind {
kNative = 0, // Use the native Fluid facility.
kAnakin, // Use Anakin for inference.
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
kAnalysis
// TODO(Superjomn) support following engines latter.
// kTensorRT, // Use TensorRT for inference.
// kAutoMixedAnakin, // Automatically mix Fluid with Anakin.
};
/*
* A simple Inference API for Paddle. Currently this API can be used by
* non-sequence scenerios.
* A simple Inference API for Paddle.
*/
class PaddlePredictor {
public:
......@@ -120,26 +137,53 @@ struct NativeConfig : public PaddlePredictor::Config {
// GPU related fields.
bool use_gpu{false};
int device{0};
float fraction_of_gpu_memory{-1.f}; // Negative to notify initialization.
// NOTE: NOT use it, just for the internal test, will discard later
bool _use_mkldnn{false};
// Specify the variable's name of each input.
bool specify_input_name{false};
float fraction_of_gpu_memory{-1.f}; // Change to a float in (0,1] if needed.
// Specify the exact path of program and parameter files.
std::string prog_file;
std::string param_file;
// Specify the variable's name of each input if input tensors don't follow the
// `feeds` and `fetches` of the phase `save_inference_model`.
bool specify_input_name{false};
};
// Configurations for Anakin engine.
struct AnakinConfig : public PaddlePredictor::Config {
enum TargetType { NVGPU = 0, X86 };
int device;
std::string model_file;
int max_batch_size{-1};
TargetType target_type;
// A factory to help create different predictors.
//
// Usage:
//
// NativeConfig config;
// ... // change the configs.
// auto native_predictor = CreatePaddlePredictor(config);
//
// FOR EXTENSION DEVELOPER:
// Different predictors are designated by config type. Similar configs can be
// merged, but there shouldn't be a huge config containing different fields for
// more than one kind of predictors.
template <typename ConfigT>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
// NOTE The following APIs are too trivial, we will discard it in the following
// versions.
enum class PaddleEngineKind {
kNative = 0, // Use the native Fluid facility.
kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT.
kAnalysis, // More optimization.
kAnakin // Use Anakin for inference, not mature yet.
};
struct TensorRTConfig : public NativeConfig {
template <typename ConfigT, PaddleEngineKind engine>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
// ==
//
// -----------------------------------------------------------------------------------
// NOTE: The following APIs are not mature yet, we are still working on them.
namespace contrib {
// Accelerate GPU computation with TensorRT engine.
struct MixedRTConfig : public NativeConfig {
// Determine whether a subgraph will be executed by TRT.
int min_subgraph_size{1};
// While TensorRT allows an engine optimized for a given max batch size
......@@ -162,7 +206,6 @@ struct TensorRTConfig : public NativeConfig {
// NOTE WIP, not stable yet.
struct AnalysisConfig : public NativeConfig {
//
enum class IrPassMode {
kSystem, // Use system default passes, not customize.
kInclude, // Specify the passes in `ir_passes`.
......@@ -173,18 +216,21 @@ struct AnalysisConfig : public NativeConfig {
IrPassMode ir_mode{IrPassMode::kExclude};
// attention lstm fuse works only on some specific models, disable as default.
std::vector<std::string> ir_passes{"attention_lstm_fuse_pass"};
// NOTE this is just for internal development, please not use it.
bool _use_mkldnn{false};
};
// A factory to help create different predictors.
//
// FOR EXTENSION DEVELOPER:
// Different predictors are designated by config type and engine kind. Similar
// configs can be merged, but there shouldn't be a huge config containing
// different fields for more than one kind of predictors.
//
// Similarly, each engine kind should map to a unique predictor implementation.
template <typename ConfigT, PaddleEngineKind engine = PaddleEngineKind::kNative>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
// Configurations for Anakin engine.
struct AnakinConfig : public PaddlePredictor::Config {
enum TargetType { NVGPU = 0, X86 };
int device;
std::string model_file;
int max_batch_size{-1};
TargetType target_type;
};
} // namespace contrib
int PaddleDtypeSize(PaddleDType dtype);
......
......@@ -58,6 +58,11 @@ set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classifi
download_model_and_data(${TEXT_CLASSIFICATION_INSTALL_DIR} "text-classification-Senta.tar.gz" "text_classification_data.txt.tar.gz")
inference_analysis_api_test(test_analyzer_text_classification ${TEXT_CLASSIFICATION_INSTALL_DIR} analyzer_text_classification_tester.cc)
# seq_conv1
set(SEQ_CONV1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_conv1")
download_model_and_data(${SEQ_CONV1_INSTALL_DIR} "seq_conv1_model.tar.gz" "seq_conv1_data.txt.tar.gz")
inference_analysis_api_test(test_analyzer_seq_conv1 ${SEQ_CONV1_INSTALL_DIR} analyzer_seq_conv1_tester.cc)
# ocr
set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr")
if (NOT EXISTS ${OCR_INSTALL_DIR})
......
......@@ -22,10 +22,10 @@ DEFINE_string(model, "", "Directory of the inference model(mobile_v2).");
namespace paddle {
AnakinConfig GetConfig() {
AnakinConfig config;
contrib::AnakinConfig GetConfig() {
contrib::AnakinConfig config;
// using AnakinConfig::X86 if you need to use cpu to do inference
config.target_type = AnakinConfig::NVGPU;
config.target_type = contrib::AnakinConfig::NVGPU;
config.model_file = FLAGS_model;
config.device = 0;
config.max_batch_size = 1;
......@@ -33,9 +33,10 @@ AnakinConfig GetConfig() {
}
TEST(inference, anakin) {
AnakinConfig config = GetConfig();
auto config = GetConfig();
auto predictor =
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config);
CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
config);
float data[1 * 3 * 224 * 224] = {1.0f};
PaddleTensor tensor;
......
......@@ -97,10 +97,10 @@ void Data::get_batch_data(
namespace paddle {
AnakinConfig GetConfig() {
AnakinConfig config;
contrib::AnakinConfig GetConfig() {
contrib::AnakinConfig config;
// using AnakinConfig::X86 if you need to use cpu to do inference
config.target_type = AnakinConfig::X86;
config.target_type = contrib::AnakinConfig::X86;
config.model_file = FLAGS_model;
config.device = 0;
config.max_batch_size = 1000; // the max number of token
......@@ -121,9 +121,10 @@ void set_tensor(std::string name, std::vector<int> shape,
}
void single_test() {
AnakinConfig config = GetConfig();
auto config = GetConfig();
auto predictor =
CreatePaddlePredictor<AnakinConfig, PaddleEngineKind::kAnakin>(config);
CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
config);
int max_batch_size = 1000;
std::string feature_file = FLAGS_datapath;
......
......@@ -95,7 +95,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void SetConfig(AnalysisConfig *cfg) {
void SetConfig(contrib::AnalysisConfig *cfg) {
cfg->prog_file = FLAGS_infer_model + "/__model__";
cfg->param_file = FLAGS_infer_model + "/param";
cfg->use_gpu = false;
......@@ -117,7 +117,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
// Easy for profiling independently.
TEST(Analyzer_Chinese_ner, profile) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
......@@ -141,7 +141,7 @@ TEST(Analyzer_Chinese_ner, profile) {
// Check the fuse status
TEST(Analyzer_Chinese_ner, fuse_statis) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
......@@ -155,7 +155,7 @@ TEST(Analyzer_Chinese_ner, fuse_statis) {
// Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_Chinese_ner, compare) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
......
......@@ -149,7 +149,7 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
void SetConfig(AnalysisConfig *cfg) {
void SetConfig(contrib::AnalysisConfig *cfg) {
cfg->prog_file = FLAGS_infer_model + "/__model__";
cfg->param_file = FLAGS_infer_model + "/param";
cfg->use_gpu = false;
......@@ -172,7 +172,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
// Easy for profiling independently.
TEST(Analyzer_rnn1, profile) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
......@@ -183,7 +183,7 @@ TEST(Analyzer_rnn1, profile) {
// Check the fuse status
TEST(Analyzer_rnn1, fuse_statis) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
......@@ -198,7 +198,7 @@ TEST(Analyzer_rnn1, fuse_statis) {
// Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_rnn1, compare) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
......@@ -208,7 +208,7 @@ TEST(Analyzer_rnn1, compare) {
// Test Multi-Thread.
TEST(Analyzer_rnn1, multi_thread) {
AnalysisConfig cfg;
contrib::AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace paddle {
namespace inference {
struct DataRecord {
std::vector<std::vector<int64_t>> title1_all, title2_all, title3_all, l1_all;
std::vector<std::vector<int64_t>> title1, title2, title3, l1;
std::vector<size_t> title1_lod, title2_lod, title3_lod, l1_lod;
size_t batch_iter{0};
size_t batch_size{1};
size_t num_samples; // total number of samples
DataRecord() = default;
explicit DataRecord(const std::string &path, int batch_size = 1)
: batch_size(batch_size) {
Load(path);
}
DataRecord NextBatch() {
DataRecord data;
size_t batch_end = batch_iter + batch_size;
// NOTE skip the final batch, if no enough data is provided.
if (batch_end <= title1_all.size()) {
data.title1_all.assign(title1_all.begin() + batch_iter,
title1_all.begin() + batch_end);
data.title2_all.assign(title2_all.begin() + batch_iter,
title2_all.begin() + batch_end);
data.title3_all.assign(title3_all.begin() + batch_iter,
title3_all.begin() + batch_end);
data.l1_all.assign(l1_all.begin() + batch_iter,
l1_all.begin() + batch_end);
// Prepare LoDs
data.title1_lod.push_back(0);
data.title2_lod.push_back(0);
data.title3_lod.push_back(0);
data.l1_lod.push_back(0);
CHECK(!data.title1_all.empty());
CHECK(!data.title2_all.empty());
CHECK(!data.title3_all.empty());
CHECK(!data.l1_all.empty());
CHECK_EQ(data.title1_all.size(), data.title2_all.size());
CHECK_EQ(data.title1_all.size(), data.title3_all.size());
CHECK_EQ(data.title1_all.size(), data.l1_all.size());
for (size_t j = 0; j < data.title1_all.size(); j++) {
data.title1.push_back(data.title1_all[j]);
data.title2.push_back(data.title2_all[j]);
data.title3.push_back(data.title3_all[j]);
data.l1.push_back(data.l1_all[j]);
// calculate lod
data.title1_lod.push_back(data.title1_lod.back() +
data.title1_all[j].size());
data.title2_lod.push_back(data.title2_lod.back() +
data.title2_all[j].size());
data.title3_lod.push_back(data.title3_lod.back() +
data.title3_all[j].size());
data.l1_lod.push_back(data.l1_lod.back() + data.l1_all[j].size());
}
}
batch_iter += batch_size;
return data;
}
void Load(const std::string &path) {
std::ifstream file(path);
std::string line;
int num_lines = 0;
while (std::getline(file, line)) {
num_lines++;
std::vector<std::string> data;
split(line, '\t', &data);
// load title1 data
std::vector<int64_t> title1_data;
split_to_int64(data[0], ' ', &title1_data);
// load title2 data
std::vector<int64_t> title2_data;
split_to_int64(data[1], ' ', &title2_data);
// load title3 data
std::vector<int64_t> title3_data;
split_to_int64(data[2], ' ', &title3_data);
// load l1 data
std::vector<int64_t> l1_data;
split_to_int64(data[3], ' ', &l1_data);
title1_all.push_back(std::move(title1_data));
title2_all.push_back(std::move(title2_data));
title3_all.push_back(std::move(title3_data));
l1_all.push_back(std::move(l1_data));
}
num_samples = num_lines;
}
};
void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
int batch_size) {
PaddleTensor title1_tensor, title2_tensor, title3_tensor, l1_tensor;
title1_tensor.name = "title1";
title2_tensor.name = "title2";
title3_tensor.name = "title3";
l1_tensor.name = "l1";
auto one_batch = data->NextBatch();
int title1_size = one_batch.title1_lod[one_batch.title1_lod.size() - 1];
title1_tensor.shape.assign({title1_size, 1});
title1_tensor.lod.assign({one_batch.title1_lod});
int title2_size = one_batch.title2_lod[one_batch.title2_lod.size() - 1];
title2_tensor.shape.assign({title2_size, 1});
title2_tensor.lod.assign({one_batch.title2_lod});
int title3_size = one_batch.title3_lod[one_batch.title3_lod.size() - 1];
title3_tensor.shape.assign({title3_size, 1});
title3_tensor.lod.assign({one_batch.title3_lod});
int l1_size = one_batch.l1_lod[one_batch.l1_lod.size() - 1];
l1_tensor.shape.assign({l1_size, 1});
l1_tensor.lod.assign({one_batch.l1_lod});
// assign data
TensorAssignData<int64_t>(&title1_tensor, one_batch.title1);
TensorAssignData<int64_t>(&title2_tensor, one_batch.title2);
TensorAssignData<int64_t>(&title3_tensor, one_batch.title3);
TensorAssignData<int64_t>(&l1_tensor, one_batch.l1);
// Set inputs.
input_slots->assign({title1_tensor, title2_tensor, title3_tensor, l1_tensor});
for (auto &tensor : *input_slots) {
tensor.dtype = PaddleDType::INT64;
}
}
void SetConfig(AnalysisConfig *cfg) {
cfg->model_dir = FLAGS_infer_model;
cfg->use_gpu = false;
cfg->device = 0;
cfg->specify_input_name = true;
cfg->enable_ir_optim = true;
}
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
DataRecord data(FLAGS_infer_data, FLAGS_batch_size);
std::vector<PaddleTensor> input_slots;
int epoch = FLAGS_test_all_data ? data.num_samples / FLAGS_batch_size : 1;
LOG(INFO) << "number of samples: " << epoch * FLAGS_batch_size;
for (int bid = 0; bid < epoch; ++bid) {
PrepareInputs(&input_slots, &data, FLAGS_batch_size);
(*inputs).emplace_back(input_slots);
}
}
// Easy for profiling independently.
TEST(Analyzer_seq_conv1, profile) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<PaddleTensor> outputs;
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
TestPrediction(cfg, input_slots_all, &outputs, FLAGS_num_threads);
if (FLAGS_num_threads == 1 && !FLAGS_test_all_data) {
// the first inference result
PADDLE_ENFORCE_EQ(outputs.size(), 1UL);
size_t size = GetSize(outputs[0]);
PADDLE_ENFORCE_GT(size, 0);
float *result = static_cast<float *>(outputs[0].data.data());
// output is probability, which is in (0, 1).
for (size_t i = 0; i < size; i++) {
EXPECT_GT(result[i], 0);
EXPECT_LT(result[i], 1);
}
}
}
// Check the fuse status
TEST(Analyzer_seq_conv1, fuse_statis) {
AnalysisConfig cfg;
SetConfig(&cfg);
int num_ops;
auto fuse_statis = GetFuseStatis(cfg, &num_ops);
}
// Compare result of NativeConfig and AnalysisConfig
TEST(Analyzer_seq_conv1, compare) {
AnalysisConfig cfg;
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(cfg, input_slots_all);
}
} // namespace inference
} // namespace paddle
......@@ -38,6 +38,8 @@ DEFINE_bool(use_analysis, true,
namespace paddle {
namespace inference {
using contrib::AnalysisConfig;
void CompareResult(const std::vector<PaddleTensor> &outputs,
const std::vector<PaddleTensor> &ref_outputs) {
EXPECT_GT(outputs.size(), 0UL);
......@@ -45,11 +47,8 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
for (size_t i = 0; i < outputs.size(); i++) {
auto &out = outputs[i];
auto &ref_out = ref_outputs[i];
size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
[](int a, int b) { return a * b; });
size_t ref_size =
std::accumulate(ref_out.shape.begin(), ref_out.shape.end(), 1,
[](int a, int b) { return a * b; });
size_t size = VecReduceToInt(out.shape);
size_t ref_size = VecReduceToInt(ref_out.shape);
EXPECT_GT(size, 0);
EXPECT_EQ(size, ref_size);
EXPECT_EQ(out.dtype, ref_out.dtype);
......@@ -77,18 +76,15 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
std::unique_ptr<PaddlePredictor> CreateTestPredictor(
const AnalysisConfig &config, bool use_analysis = true) {
if (use_analysis) {
return CreatePaddlePredictor<AnalysisConfig, PaddleEngineKind::kAnalysis>(
config);
return CreatePaddlePredictor<contrib::AnalysisConfig,
PaddleEngineKind::kAnalysis>(config);
} else {
return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
config);
}
}
size_t GetSize(const PaddleTensor &out) {
return std::accumulate(out.shape.begin(), out.shape.end(), 1,
[](int a, int b) { return a * b; });
}
size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); }
std::unordered_map<std::string, int> GetFuseStatis(AnalysisConfig config,
int *num_ops) {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/fluid/operators/activation_op.h"
#include <string>
#include "paddle/fluid/operators/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h"
namespace paddle {
namespace operators {
......@@ -105,105 +106,105 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
}
};
__attribute__((unused)) constexpr char SigmoidDoc[] = R"DOC(
UNUSED constexpr char SigmoidDoc[] = R"DOC(
Sigmoid Activation Operator
$$out = \frac{1}{1 + e^{-x}}$$
)DOC";
__attribute__((unused)) constexpr char LogSigmoidDoc[] = R"DOC(
UNUSED constexpr char LogSigmoidDoc[] = R"DOC(
Logsigmoid Activation Operator
$$out = \\log \\frac{1}{1 + e^{-x}}$$
)DOC";
__attribute__((unused)) constexpr char ExpDoc[] = R"DOC(
UNUSED constexpr char ExpDoc[] = R"DOC(
Exp Activation Operator.
$out = e^x$
)DOC";
__attribute__((unused)) constexpr char ReluDoc[] = R"DOC(
UNUSED constexpr char ReluDoc[] = R"DOC(
Relu Activation Operator.
$out = \max(x, 0)$
)DOC";
__attribute__((unused)) constexpr char TanhDoc[] = R"DOC(
UNUSED constexpr char TanhDoc[] = R"DOC(
Tanh Activation Operator.
$$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC";
__attribute__((unused)) constexpr char TanhShrinkDoc[] = R"DOC(
UNUSED constexpr char TanhShrinkDoc[] = R"DOC(
TanhShrink Activation Operator.
$$out = x - \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC";
__attribute__((unused)) constexpr char SqrtDoc[] = R"DOC(
UNUSED constexpr char SqrtDoc[] = R"DOC(
Sqrt Activation Operator.
$out = \sqrt{x}$
)DOC";
__attribute__((unused)) constexpr char AbsDoc[] = R"DOC(
UNUSED constexpr char AbsDoc[] = R"DOC(
Abs Activation Operator.
$out = |x|$
)DOC";
__attribute__((unused)) constexpr char CeilDoc[] = R"DOC(
UNUSED constexpr char CeilDoc[] = R"DOC(
Ceil Activation Operator.
$out = ceil(x)$
)DOC";
__attribute__((unused)) constexpr char FloorDoc[] = R"DOC(
UNUSED constexpr char FloorDoc[] = R"DOC(
Floor Activation Operator.
$out = floor(x)$
)DOC";
__attribute__((unused)) constexpr char CosDoc[] = R"DOC(
UNUSED constexpr char CosDoc[] = R"DOC(
Cosine Activation Operator.
$out = cos(x)$
)DOC";
__attribute__((unused)) constexpr char SinDoc[] = R"DOC(
UNUSED constexpr char SinDoc[] = R"DOC(
Sine Activation Operator.
$out = sin(x)$
)DOC";
__attribute__((unused)) constexpr char RoundDoc[] = R"DOC(
UNUSED constexpr char RoundDoc[] = R"DOC(
Round Activation Operator.
$out = [x]$
)DOC";
__attribute__((unused)) constexpr char ReciprocalDoc[] = R"DOC(
UNUSED constexpr char ReciprocalDoc[] = R"DOC(
Reciprocal Activation Operator.
$$out = \\frac{1}{x}$$
)DOC";
__attribute__((unused)) constexpr char LogDoc[] = R"DOC(
UNUSED constexpr char LogDoc[] = R"DOC(
Log Activation Operator.
$out = \ln(x)$
......@@ -212,21 +213,21 @@ Natural logarithm of x.
)DOC";
__attribute__((unused)) constexpr char SquareDoc[] = R"DOC(
UNUSED constexpr char SquareDoc[] = R"DOC(
Square Activation Operator.
$out = x^2$
)DOC";
__attribute__((unused)) constexpr char SoftplusDoc[] = R"DOC(
UNUSED constexpr char SoftplusDoc[] = R"DOC(
Softplus Activation Operator.
$out = \ln(1 + e^{x})$
)DOC";
__attribute__((unused)) constexpr char SoftsignDoc[] = R"DOC(
UNUSED constexpr char SoftsignDoc[] = R"DOC(
Softsign Activation Operator.
$$out = \frac{x}{1 + |x|}$$
......
......@@ -23,8 +23,6 @@ namespace operators {
using Tensor = framework::Tensor;
using LoDTensor = framework::LoDTensor;
static constexpr int kROISize = 4;
template <typename T>
bool GT_E(T a, T b) {
return (a > b) || fabs(a - b) < 1e-4;
......
......@@ -46,6 +46,25 @@ static std::string gethash(const memory::dims& input_dims,
dims2str(paddings) + pooling_type + suffix;
}
static inline int ComputeCeiledOutput(int input_size, int kernel_size,
int padding, int stride) {
return (input_size - kernel_size + 2 * padding) / stride + 1;
}
static inline void CorrectOutputSize(
const std::vector<int>& src_tz, const std::vector<int>& dst_tz,
const std::vector<int>& kernel_size, const std::vector<int>& paddings,
const std::vector<int>& strides,
std::vector<int>& right_bot_padding) { // NOLINT
for (size_t i = 0; i < right_bot_padding.size(); i++) {
int desired_size = ComputeCeiledOutput(src_tz[i + 2], kernel_size[i],
paddings[i], strides[i]);
if (desired_size != dst_tz[i + 2]) {
right_bot_padding[i] += strides[i];
}
}
}
template <typename T>
class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
public:
......@@ -103,6 +122,13 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto pool_p =
std::static_pointer_cast<pooling_forward>(dev_ctx.GetBlob(key_pool_p));
if (pool_p == nullptr) {
const std::vector<int>& padding_left_top(paddings);
std::vector<int> padding_right_bottom(paddings);
bool ceil_mode = ctx.Attr<bool>("ceil_mode");
if (ceil_mode) {
CorrectOutputSize(src_tz, dst_tz, ksize, paddings, strides,
padding_right_bottom);
}
auto src_md = platform::MKLDNNMemDesc(
src_tz, platform::MKLDNNGetDataType<T>(), input_format);
......@@ -114,8 +140,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
mkldnn::memory::format::any);
std::shared_ptr<mkldnn::pooling_forward::primitive_desc> pool_pd =
CreatePrimitiveDesc(src_md, dst_md, strides, paddings, ksize,
pooling_type, mkldnn_engine);
CreatePrimitiveDesc(src_md, dst_md, strides, padding_left_top,
padding_right_bottom, ksize, pooling_type,
mkldnn_engine, ceil_mode);
// save pool_pd into global device context to be referred in backward path
dev_ctx.SetBlob(key_pool_pd, pool_pd);
......@@ -171,14 +198,16 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
private:
std::unique_ptr<mkldnn::pooling_forward::primitive_desc> CreatePrimitiveDesc(
const mkldnn::memory::desc& src, const mkldnn::memory::desc& dst,
const std::vector<int>& stride, const std::vector<int>& padding,
const std::vector<int>& kernel, const std::string& pooling_type,
const mkldnn::engine& engine) const {
const std::vector<int>& stride, const std::vector<int>& padding_left_top,
const std::vector<int>& padding_right_bot, const std::vector<int>& kernel,
const std::string& pooling_type, const mkldnn::engine& engine,
bool ceil_mode) const {
auto pool_desc = mkldnn::pooling_forward::desc(
mkldnn::prop_kind::forward,
pooling_type == "max" ? mkldnn::algorithm::pooling_max
: mkldnn::algorithm::pooling_avg,
src, dst, stride, kernel, padding, padding, mkldnn::padding_kind::zero);
src, dst, stride, kernel, padding_left_top, padding_right_bot,
mkldnn::padding_kind::zero);
auto p_pool_pd =
new mkldnn::pooling_forward::primitive_desc(pool_desc, engine);
......
......@@ -34,7 +34,7 @@ namespace operators {
using FluidDT = framework::proto::VarType_Type;
using TRT_DT = nvinfer1::DataType;
namespace {
namespace { // NOLINT
TRT_DT FluidDataType2TRT(FluidDT type) {
switch (type) {
......
......@@ -30,6 +30,8 @@ class TopkOp : public framework::OperatorWithKernel {
"Output(Indices) of TopkOp should not be null.");
auto input_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(input_dims.size(), 2,
"Rank of TopK op's input must be 2.");
const int k = static_cast<int>(ctx->Attrs().Get<int>("k"));
PADDLE_ENFORCE_GE(k, 1, "k must >= 1");
......
......@@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "paddle/fluid/platform/cudnn_helper.h"
#include <gtest/gtest.h>
......
......@@ -21,6 +21,7 @@ limitations under the License. */
#if defined(_WIN32)
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h
#define GOOGLE_GLOG_DLL_DECL
#endif
#ifdef PADDLE_WITH_CUDA
......@@ -47,7 +48,7 @@ limitations under the License. */
#include "paddle/fluid/platform/dynload/cublas.h"
#include "paddle/fluid/platform/dynload/cudnn.h"
#include "paddle/fluid/platform/dynload/curand.h"
#if !defined(__APPLE__) and !defined(_WIN32)
#if !defined(__APPLE__) && !defined(_WIN32)
#include "paddle/fluid/platform/dynload/nccl.h"
#endif // __APPLE__
#endif // PADDLE_WITH_CUDA
......@@ -216,7 +217,7 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
#endif
}
#if !defined(__APPLE__) and !defined(_WIN32)
#if !defined(__APPLE__) && !defined(_WIN32)
template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
ncclResult_t stat, const Args&... args) {
......@@ -260,14 +261,8 @@ inline void throw_on_error(T e) {
} \
} while (false)
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
__LINE__); \
} while (false)
#else
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__)
#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__);
#endif // REPLACE_ENFORCE_GLOG
#else // !_WIN32
......@@ -281,6 +276,12 @@ inline void throw_on_error(T e) {
#define PADDLE_ENFORCE(x, ...) x
#endif // !_WIN32
#define PADDLE_THROW_EOF() \
do { \
throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \
__LINE__); \
} while (false)
/*
* Some enforce helpers here, usage:
* int a = 1;
......@@ -294,7 +295,7 @@ inline void throw_on_error(T e) {
* extra messages is also supported, for example:
* PADDLE_ENFORCE(a, b, "some simple enforce failed between %d numbers", 2)
*/
#if !defined(_WIN32)
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, ==, !=, __VA_ARGS__)
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) \
......@@ -307,6 +308,7 @@ inline void throw_on_error(T e) {
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <, >=, __VA_ARGS__)
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) \
__PADDLE_BINARY_COMPARE(__VAL0, __VAL1, <=, >, __VA_ARGS__)
#define PADDLE_ENFORCE_NOT_NULL(__VAL, ...) \
do { \
if (UNLIKELY(nullptr == (__VAL))) { \
......@@ -326,6 +328,27 @@ inline void throw_on_error(T e) {
paddle::string::Sprintf("" __VA_ARGS__)); \
} \
} while (0)
#else
#define PADDLE_ENFORCE_EQ(__VAL0, __VAL1, ...) ((__VAL0) == (__VAL1))
#define PADDLE_ENFORCE_NE(__VAL0, __VAL1, ...) ((__VAL0) != (__VAL1))
#define PADDLE_ENFORCE_GT(__VAL0, __VAL1, ...) ((__VAL0) > (__VAL1))
#define PADDLE_ENFORCE_GE(__VAL0, __VAL1, ...) ((__VAL0) >= (__VAL1))
#define PADDLE_ENFORCE_LT(__VAL0, __VAL1, ...) ((__VAL0) < (__VAL1))
#define PADDLE_ENFORCE_LE(__VAL0, __VAL1, ...) ((__VAL0) <= (__VAL1))
#define __PADDLE_BINARY_COMPARE(__VAL0, __VAL1, __CMP, __INV_CMP, ...) \
do { \
if (!((__VAL0)__CMP(__VAL1))) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed."); \
} \
} while (0)
#define PADDLE_ENFORCE_NOT_NULL(__VAL1, ...) \
do { \
if (nullptr == (__VAL1)) { \
PADDLE_THROW("Windows disable the enforce. Enforce failed"); \
} \
} while (0)
#endif // !_WIN32
} // namespace platform
} // namespace paddle
......@@ -16,6 +16,9 @@ limitations under the License. */
#include <string>
#include <vector>
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include "gflags/gflags.h"
#include "glog/logging.h"
......
......@@ -48,6 +48,9 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker.def(
"kOpNameScopeAttrName",
framework::OpProtoAndCheckerMaker::OpNamescopeAttrName);
op_proto_and_checker_maker.def(
"kOpCreationCallstackAttrName",
framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName);
}
} // namespace pybind
......
......@@ -147,6 +147,7 @@ function cmake_gen() {
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR}
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF}
-DPY_VERSION=${PY_VERSION:-2.7}
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build}
========================================
EOF
# Disable UNITTEST_USE_VIRTUALENV in docker because
......@@ -178,7 +179,8 @@ EOF
-DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \
-DPY_VERSION=${PY_VERSION:-2.7}
-DPY_VERSION=${PY_VERSION:-2.7} \
-DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX:-/paddle/build}
}
......@@ -361,7 +363,7 @@ EOF
ctest --output-on-failure
# make install should also be test when unittest
make install -j `nproc`
pip install /usr/local/opt/paddle/share/wheels/*.whl
pip install ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl
if [[ ${WITH_FLUID_ONLY:-OFF} == "OFF" ]] ; then
paddle version
fi
......
......@@ -19,17 +19,8 @@ from .framework import *
# import all class inside executor into fluid module
from . import executor
from .executor import *
from . import trainer
from .trainer import Trainer
from .trainer import BeginEpochEvent
from .trainer import EndEpochEvent
from .trainer import BeginStepEvent
from .trainer import EndStepEvent
from .trainer import CheckpointConfig
from . import inferencer
from .inferencer import Inferencer
from . import io
from . import evaluator
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
from .. import core
from .. import executor
from .. import framework
from .. import io
from .. import parallel_executor
from .. import unique_name
from .trainer import check_and_get_place
__all__ = ['Inferencer', ]
class Inferencer(object):
"""
Inferencer High Level API.
Args:
infer_func (Python func): Infer function that will return predict Variable
param_path (str): The path where the inference model is saved by fluid.io.save_params
place (Place): place to do the inference
parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
Examples:
.. code-block:: python
def inference_program():
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
return y_predict
place = fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path="/tmp/model", place=place)
"""
def __init__(self, infer_func, param_path, place=None, parallel=False):
self.param_path = param_path
self.scope = core.Scope()
self.parallel = parallel
self.place = check_and_get_place(place)
self.inference_program = framework.Program()
with framework.program_guard(self.inference_program):
with unique_name.guard():
self.predict_var = infer_func()
with self._prog_and_scope_guard():
# load params from param_path into scope
io.load_params(executor.Executor(self.place), param_path)
if parallel:
with self._prog_and_scope_guard():
self.exe = parallel_executor.ParallelExecutor(
use_cuda=isinstance(self.place, core.CUDAPlace),
loss_name=self.predict_var.name)
else:
self.exe = executor.Executor(self.place)
self.inference_program = self.inference_program.clone(for_test=True)
def infer(self, inputs, return_numpy=True):
"""
Do Inference for Inputs
Args:
inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
return_numpy (bool): transform return value into numpy or not
Returns:
Tensor or Numpy: the predict value of the inference model for the inputs
Examples:
.. code-block:: python
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
results = inferencer.infer({'x': tensor_x})
"""
if not isinstance(inputs, dict):
raise ValueError(
"inputs should be a map of {'input_name': input_var}")
with self._prog_and_scope_guard():
results = self.exe.run(feed=inputs,
fetch_list=[self.predict_var.name],
return_numpy=return_numpy)
return results
@contextlib.contextmanager
def _prog_and_scope_guard(self):
with framework.program_guard(main_program=self.inference_program):
with executor.scope_guard(self.scope):
yield
此差异已折叠。
......@@ -18,6 +18,7 @@ import collections
import contextlib
import re
import six
import traceback
import numpy as np
......@@ -34,11 +35,12 @@ except ImportError as e:
except Exception as e:
raise e
from . import unique_name
import os
PADDLE_ON_MODEL_CE = os.environ.get('PADDLE_ON_MODEL_CE', None) is not None
__all__ = [
'Program',
'Operator',
'Parameter',
'default_startup_program',
'default_main_program',
'program_guard',
......@@ -490,7 +492,8 @@ class OpProtoHolder(object):
return {
core.op_proto_and_checker_maker.kOpRoleAttrName(),
core.op_proto_and_checker_maker.kOpRoleVarAttrName(),
core.op_proto_and_checker_maker.kOpNameScopeAttrName()
core.op_proto_and_checker_maker.kOpNameScopeAttrName(),
core.op_proto_and_checker_maker.kOpCreationCallstackAttrName()
}
......@@ -573,6 +576,11 @@ class Operator(object):
if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0:
del op_attrs[role_var_name]
if not PADDLE_ON_MODEL_CE:
callstack_var_name = op_maker.kOpCreationCallstackAttrName()
op_attrs[callstack_var_name] = list(
reversed(traceback.format_stack()))[1:]
if len(self.desc.type()) != 0:
return
if type is None:
......
......@@ -12,101 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
from . import core
from . import executor
from . import framework
from . import io
from . import parallel_executor
from . import unique_name
from .trainer import check_and_get_place
__all__ = ['Inferencer', ]
class Inferencer(object):
"""
Inferencer High Level API.
Args:
infer_func (Python func): Infer function that will return predict Variable
param_path (str): The path where the inference model is saved by fluid.io.save_params
place (Place): place to do the inference
parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
Examples:
.. code-block:: python
def inference_program():
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
return y_predict
place = fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path="/tmp/model", place=place)
"""
def __init__(self, infer_func, param_path, place=None, parallel=False):
self.param_path = param_path
self.scope = core.Scope()
self.parallel = parallel
self.place = check_and_get_place(place)
self.inference_program = framework.Program()
with framework.program_guard(self.inference_program):
with unique_name.guard():
self.predict_var = infer_func()
with self._prog_and_scope_guard():
# load params from param_path into scope
io.load_params(executor.Executor(self.place), param_path)
if parallel:
with self._prog_and_scope_guard():
self.exe = parallel_executor.ParallelExecutor(
use_cuda=isinstance(self.place, core.CUDAPlace),
loss_name=self.predict_var.name)
else:
self.exe = executor.Executor(self.place)
self.inference_program = self.inference_program.clone(for_test=True)
def infer(self, inputs, return_numpy=True):
"""
Do Inference for Inputs
Args:
inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
return_numpy (bool): transform return value into numpy or not
Returns:
Tensor or Numpy: the predict value of the inference model for the inputs
Examples:
.. code-block:: python
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
results = inferencer.infer({'x': tensor_x})
"""
if not isinstance(inputs, dict):
raise ValueError(
"inputs should be a map of {'input_name': input_var}")
with self._prog_and_scope_guard():
results = self.exe.run(feed=inputs,
fetch_list=[self.predict_var.name],
return_numpy=return_numpy)
return results
@contextlib.contextmanager
def _prog_and_scope_guard(self):
with framework.program_guard(main_program=self.inference_program):
with executor.scope_guard(self.scope):
yield
# NOTE: inferencer is moved into fluid.contrib.inferencer.
__all__ = []
......@@ -6471,6 +6471,8 @@ def _elementwise_op(helper):
assert y is not None, 'y cannot be None in {}'.format(op_type)
axis = helper.kwargs.get('axis', -1)
use_mkldnn = helper.kwargs.get('use_mkldnn', False)
out = helper.kwargs.get('out', None)
if out is None:
name = helper.kwargs.get('name', None)
if name is None:
out = helper.create_tmp_variable(dtype=x.dtype)
......@@ -6489,7 +6491,13 @@ def _elementwise_op(helper):
@templatedoc()
def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
def scale(x,
scale=1.0,
bias=0.0,
bias_after_scale=True,
out=None,
act=None,
name=None):
"""
${comment}
......@@ -6498,6 +6506,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
scale(${scale_type}): ${scale_comment}
bias(${bias_type}): ${bias_comment}
bias_after_scale(${bias_after_scale_type}): ${bias_after_scale_comment}
out(Tensor): Output tensor.
act(basestring|None): Activation applied to the output.
name(basestring|None): Name of the output.
......@@ -6506,6 +6515,7 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
"""
helper = LayerHelper('scale', **locals())
if out is None:
if name is None:
out = helper.create_tmp_variable(dtype=x.dtype)
else:
......@@ -6524,31 +6534,73 @@ def scale(x, scale=1.0, bias=0.0, bias_after_scale=True, act=None, name=None):
return helper.append_activation(out)
def elementwise_add(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
def elementwise_add(x,
y,
out=None,
axis=-1,
use_mkldnn=False,
act=None,
name=None):
return _elementwise_op(LayerHelper('elementwise_add', **locals()))
def elementwise_div(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
def elementwise_div(x,
y,
out=None,
axis=-1,
use_mkldnn=False,
act=None,
name=None):
return _elementwise_op(LayerHelper('elementwise_div', **locals()))
def elementwise_sub(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
def elementwise_sub(x,
y,
out=None,
axis=-1,
use_mkldnn=False,
act=None,
name=None):
return _elementwise_op(LayerHelper('elementwise_sub', **locals()))
def elementwise_mul(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
def elementwise_mul(x,
y,
out=None,
axis=-1,
use_mkldnn=False,
act=None,
name=None):
return _elementwise_op(LayerHelper('elementwise_mul', **locals()))
def elementwise_max(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
def elementwise_max(x,
y,
out=None,
axis=-1,
use_mkldnn=False,
act=None,
name=None):
return _elementwise_op(LayerHelper('elementwise_max', **locals()))
def elementwise_min(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
def elementwise_min(x,
y,
out=None,
axis=-1,
use_mkldnn=False,
act=None,
name=None):
return _elementwise_op(LayerHelper('elementwise_min', **locals()))
def elementwise_pow(x, y, axis=-1, use_mkldnn=False, act=None, name=None):
def elementwise_pow(x,
y,
out=None,
axis=-1,
use_mkldnn=False,
act=None,
name=None):
return _elementwise_op(LayerHelper('elementwise_pow', **locals()))
......@@ -6560,6 +6612,7 @@ for func in [
func.__doc__ = _generate_doc_string_(
op_proto,
additional_args_lines=[
"out (Tensor): The output tensor of elementwise op.",
"act (basestring|None): Activation applied to the output.",
"name (basestring|None): Name of the output."
])
......@@ -21,6 +21,7 @@ __activations_noattr__ = [
'exp',
'tanh',
'tanh_shrink',
'softshrink',
'sqrt',
'abs',
'ceil',
......@@ -52,7 +53,6 @@ __all__ = [
'slice',
'shape',
'maxout',
'softshrink',
]
for _OP in set(__all__):
......
......@@ -21,6 +21,7 @@ __all__ = [
"sequence_conv_pool",
"glu",
"scaled_dot_product_attention",
"img_conv_group",
]
......
......@@ -74,28 +74,7 @@ class ParallelExecutor(object):
build_strategy=None,
num_trainers=1,
trainer_id=0,
scope=None,
**kwargs):
if len(kwargs) != 0:
err_msg = ""
for key in kwargs:
if key in dir(ExecutionStrategy):
err_msg += \
"Setting {0} by constructor is deprecated. Use " \
"strategy=ExecutionStrategy(); strategy.{0}=xxx; " \
"pe=ParallelExecutor(exec_strategy=strategy) " \
"instead.\n ".format(key)
elif key in dir(BuildStrategy):
err_msg += \
"Setting {0} by constructor is deprecated. Use " \
"strategy=BuildStrategy(); See help(" \
"paddle.fluid.ParallelExecutor.BuildStrategy) \n".format(
key)
else:
err_msg += "Setting {0} by constructor is deprecated. Use strategy.\n".format(
key)
raise ValueError(err_msg)
scope=None):
self._places = []
self._act_places = []
if use_cuda:
......
......@@ -185,7 +185,17 @@ class WeightNormParamAttr(ParamAttr):
Args:
dim(list): The parameter's name. Default None.
kwargs: Any field in ParamAttr. Default None.
name(str): The parameter's name. Default None.
initializer(Initializer): The method to initial this parameter. Default None.
learning_rate(float): The parameter's learning rate. The learning rate when
optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`.
Default 1.0.
regularizer(WeightDecayRegularizer): Regularization factor. Default None.
trainable(bool): Whether this parameter is trainable. Default True.
gradient_clip(BaseGradientClipAttr): The method to clip this parameter's
gradient. Default None.
do_model_average(bool): Whether this parameter should do model average.
Default False.
Examples:
.. code-block:: python
......@@ -204,6 +214,21 @@ class WeightNormParamAttr(ParamAttr):
# these paramters for inference.
params_with_weight_norm = []
def __init__(self, dim=None, **kwargs):
super(WeightNormParamAttr, self).__init__(**kwargs)
def __init__(self,
dim=None,
name=None,
initializer=None,
learning_rate=1.0,
regularizer=None,
trainable=True,
gradient_clip=None,
do_model_average=False):
super(WeightNormParamAttr, self).__init__(
name=name,
initializer=initializer,
learning_rate=learning_rate,
regularizer=regularizer,
trainable=trainable,
gradient_clip=gradient_clip,
do_model_average=do_model_average)
self.dim = dim
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import contextlib
import numpy
import unittest
......@@ -57,11 +67,11 @@ def optimizer_func():
def train(use_cuda, train_program, params_dirname, inference_model_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program, place=place, optimizer_func=optimizer_func)
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
if event.step == 10:
test_metrics = trainer.test(
reader=test_reader, feed_order=['x', 'y'])
......@@ -91,7 +101,7 @@ def infer(use_cuda, inference_program, params_dirname=None):
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program, param_path=params_dirname, place=place)
batch_size = 10
......
......@@ -14,11 +14,22 @@
from __future__ import print_function
import sys
import paddle
import paddle.fluid as fluid
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.core as core
import numpy
import six
import os
import cifar10_small_test_set
......@@ -106,7 +117,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False)
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
avg_cost, accuracy = trainer.test(
reader=test_reader, feed_order=['pixel', 'label'])
......@@ -118,7 +129,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program,
optimizer_func=optimizer_func,
place=place,
......@@ -133,7 +144,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
def infer(use_cuda, inference_program, parallel, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program,
param_path=params_dirname,
place=place,
......
......@@ -14,11 +14,22 @@
from __future__ import print_function
import sys
import paddle
import paddle.fluid as fluid
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.core as core
import numpy
import six
import os
import cifar10_small_test_set
......@@ -83,7 +94,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
paddle.dataset.cifar.test10(), batch_size=BATCH_SIZE, drop_last=False)
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
avg_cost, accuracy = trainer.test(
reader=test_reader, feed_order=['pixel', 'label'])
......@@ -95,7 +106,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
return
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program,
place=place,
optimizer_func=optimizer_func,
......@@ -110,7 +121,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
def infer(use_cuda, inference_program, parallel, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program,
param_path=params_dirname,
place=place,
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import numpy as np
WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict()
......@@ -149,7 +159,7 @@ def optimize_func():
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program, place=place, optimizer_func=optimize_func)
feed_order = [
......@@ -164,7 +174,7 @@ def train(use_cuda, train_program, params_dirname):
# place)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.conll05.test(), batch_size=BATCH_SIZE)
avg_cost_set = trainer.test(
......@@ -184,7 +194,7 @@ def train(use_cuda, train_program, params_dirname):
if math.isnan(float(avg_cost)):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
elif isinstance(event, EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, list(map(np.array, event.metrics))))
if event.step == 1: # Run 2 iterations to speed CI
......@@ -204,7 +214,7 @@ def train(use_cuda, train_program, params_dirname):
def infer(use_cuda, inference_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
inference_program, param_path=params_dirname, place=place)
# Setup input by creating LoDTensor to represent sequence of words.
......
......@@ -13,17 +13,28 @@
# limitations under the License.
from __future__ import print_function
import contextlib
import sys
import numpy as np
import paddle
import paddle.fluid as fluid
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.framework as framework
import paddle.fluid.layers as pd
from paddle.fluid.executor import Executor
from functools import partial
import unittest
import os
dict_size = 30000
source_dict_dim = target_dict_dim = dict_size
......@@ -198,12 +209,12 @@ def train(use_cuda, is_sparse, is_local=True):
]
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
print('pass_id=' + str(event.epoch) + ' batch=' + str(event.step))
if event.step == 10:
trainer.stop()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=partial(train_program, is_sparse),
place=place,
optimizer_func=optimizer_func)
......
......@@ -14,14 +14,22 @@
from __future__ import print_function
import argparse
import sys
import paddle.fluid as fluid
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.core as core
import paddle
import six
import sys
import numpy
import unittest
import math
import sys
import os
......@@ -68,14 +76,14 @@ def optimizer_func():
def train(use_cuda, train_program, parallel, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program,
place=place,
optimizer_func=optimizer_func,
parallel=parallel)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
avg_cost, acc = trainer.test(
......@@ -91,7 +99,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
elif isinstance(event, EndStepEvent):
print(
("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch,
......@@ -112,7 +120,7 @@ def train(use_cuda, train_program, parallel, params_dirname):
def infer(use_cuda, inference_program, parallel, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program,
param_path=params_dirname,
place=place,
......
......@@ -14,14 +14,22 @@
from __future__ import print_function
import argparse
import sys
import paddle.fluid as fluid
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.core as core
import paddle
import six
import sys
import numpy
import unittest
import math
import sys
import os
......@@ -55,14 +63,14 @@ def optimizer_func():
def train(use_cuda, train_program, params_dirname, parallel):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program,
place=place,
optimizer_func=optimizer_func,
parallel=parallel)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
avg_cost, acc = trainer.test(
......@@ -94,7 +102,7 @@ def train(use_cuda, train_program, params_dirname, parallel):
def infer(use_cuda, inference_program, parallel, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program,
param_path=params_dirname,
place=place,
......
......@@ -19,6 +19,16 @@ import sys
import numpy as np
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import paddle.fluid.layers as layers
import paddle.fluid.nets as nets
......@@ -164,7 +174,7 @@ def optimizer_func():
def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program, place=place, optimizer_func=optimizer_func)
feed_order = [
......@@ -173,7 +183,7 @@ def train(use_cuda, train_program, params_dirname):
]
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
test_reader = paddle.batch(
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
avg_cost_set = trainer.test(
......@@ -208,7 +218,7 @@ def train(use_cuda, train_program, params_dirname):
def infer(use_cuda, inference_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
inference_program, param_path=params_dirname, place=place)
# Use the first data from paddle.dataset.movielens.test() as input.
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
from functools import partial
import numpy as np
......@@ -72,13 +82,13 @@ def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=partial(train_program, word_dict),
place=place,
optimizer_func=optimizer_func)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
avg_cost, acc = trainer.test(
......@@ -96,7 +106,7 @@ def train(use_cuda, train_program, params_dirname):
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
elif isinstance(event, EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, list(map(np.array, event.metrics))))
if event.step == 1: # Run 2 iterations to speed CI
......@@ -119,7 +129,7 @@ def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=params_dirname,
place=place)
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
from functools import partial
import numpy as np
......@@ -87,13 +97,13 @@ def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=partial(train_program, word_dict),
place=place,
optimizer_func=optimizer_func)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
avg_cost, acc = trainer.test(
......@@ -111,7 +121,7 @@ def train(use_cuda, train_program, params_dirname):
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
elif isinstance(event, EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, list(map(np.array, event.metrics))))
if event.step == 1: # Run 2 iterations to speed CI
......@@ -134,7 +144,7 @@ def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=params_dirname,
place=place)
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
from functools import partial
import numpy as np
......@@ -79,13 +89,13 @@ def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
trainer = fluid.Trainer(
trainer = Trainer(
train_func=partial(train_program, word_dict),
place=place,
optimizer_func=optimizer_func)
def event_handler(event):
if isinstance(event, fluid.EndEpochEvent):
if isinstance(event, EndEpochEvent):
test_reader = paddle.batch(
paddle.dataset.imdb.test(word_dict),
batch_size=BATCH_SIZE,
......@@ -105,7 +115,7 @@ def train(use_cuda, train_program, params_dirname):
event.epoch + 1, avg_cost, acc))
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
elif isinstance(event, fluid.EndStepEvent):
elif isinstance(event, EndStepEvent):
print("Step {0}, Epoch {1} Metrics {2}".format(
event.step, event.epoch, list(map(np.array, event.metrics))))
if event.step == 1: # Run 2 iterations to speed CI
......@@ -129,7 +139,7 @@ def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
word_dict = paddle.dataset.imdb.word_dict()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=partial(inference_program, word_dict),
param_path=params_dirname,
place=place)
......
......@@ -16,6 +16,16 @@ from __future__ import print_function
import paddle
import paddle.fluid as fluid
import sys
try:
from paddle.fluid.contrib.trainer import *
from paddle.fluid.contrib.inferencer import *
except ImportError:
print(
"In the fluid 1.0, the trainer and inferencer are moving to paddle.fluid.contrib",
file=sys.stderr)
from paddle.fluid.trainer import *
from paddle.fluid.inferencer import *
import numpy as np
import math
import sys
......@@ -95,7 +105,7 @@ def train(use_cuda, train_program, params_dirname):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
def event_handler(event):
if isinstance(event, fluid.EndStepEvent):
if isinstance(event, EndStepEvent):
outs = trainer.test(
reader=test_reader,
feed_order=['firstw', 'secondw', 'thirdw', 'forthw', 'nextw'])
......@@ -109,7 +119,7 @@ def train(use_cuda, train_program, params_dirname):
if math.isnan(avg_cost):
sys.exit("got NaN loss, training failed.")
trainer = fluid.Trainer(
trainer = Trainer(
train_func=train_program, optimizer_func=optimizer_func, place=place)
trainer.train(
......@@ -121,7 +131,7 @@ def train(use_cuda, train_program, params_dirname):
def infer(use_cuda, inference_program, params_dirname=None):
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
inferencer = fluid.Inferencer(
inferencer = Inferencer(
infer_func=inference_program, param_path=params_dirname, place=place)
# Setup inputs by creating 4 LoDTensors representing 4 words. Here each word
......
......@@ -659,5 +659,28 @@ class TestLoadSliceVar(TranspilerTest):
pserver2._slice_vars_and_attrs[idx][2].shape))
class TestNCCL2Transpile(TranspilerTest):
def test_nccl2_transpile(self):
if fluid.core.is_compiled_with_cuda(): #test nccl2 only with cuda
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
self.net_conf()
config = fluid.DistributeTranspilerConfig()
config.mode = "nccl2"
t = fluid.DistributeTranspiler(config=config)
t.transpile(
0,
trainers="127.0.0.1:6174,127.0.0.1:6175",
current_endpoint="127.0.0.1:6174",
startup_program=startup)
print([op.type for op in startup.global_block().ops])
self.assertEqual(startup.global_block().ops[-1].type, "gen_nccl_id")
self.assertIsNotNone(startup.global_block().vars.get("NCCLID"))
else:
pass
if __name__ == "__main__":
unittest.main()
......@@ -758,6 +758,14 @@ class TestBook(unittest.TestCase):
out = layers.expand(x, [1, 2])
print(str(program))
def test_softshrink(self):
program = Program()
with program_guard(program):
input = layers.data(name="input", shape=[16], dtype="float32")
out = layers.softshrink(input, name='softshrink')
self.assertIsNotNone(out)
print(str(program))
if __name__ == '__main__':
unittest.main()
......@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
set(mul_op.attr_names),
set([
"x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var",
"op_namescope"
"op_namescope", "op_callstack"
]))
self.assertEqual(mul_op.has_attr("x_num_col_dims"), True)
self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT)
......
此差异已折叠。
......@@ -136,6 +136,8 @@ class DistributeTranspilerConfig(object):
slice_var_up = True
split_method = None
min_block_size = 8192
# supported modes: pserver, nccl2
mode = "pserver"
print_log = False
......@@ -144,27 +146,30 @@ class DistributeTranspiler(object):
**DistributeTranspiler**
Convert the fluid program to distributed data-parallelism programs.
Supports two modes: pserver mode and nccl2 mode.
The main_program will be transformed to use a remote parameter server
to do parameter optimization. And the optimization graph will be put
into a parameter server program.
In pserver mode, the main_program will be transformed to use a remote
parameter server to do parameter optimization. And the optimization
graph will be put into a parameter server program.
In nccl2 mode, the transpiler will append a NCCL_ID broadcasting
op in startup_program to share the NCCL_ID across the job nodes.
After transpile_nccl2 called, you ***must*** pass trainer_id and
num_trainers argument to ParallelExecutor to enable NCCL2 distributed
mode.
Examples:
.. code-block:: python
# Define your model before these codes.
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "")
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist)
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
# for pserver mode
pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
current_endpoint = "192.168.0.1:6174"
trainer_id = 0
trainers = 4
role = os.getenv("PADDLE_TRAINING_ROLE")
t = distribute_transpiler.DistributeTranspiler()
t = fluid.DistributeTranspiler()
t.transpile(
trainer_id, pservers=pserver_endpoints, trainers=trainers)
if role == "PSERVER":
......@@ -173,6 +178,18 @@ class DistributeTranspiler(object):
pserver_program)
elif role == "TRAINER":
trainer_program = t.get_trainer_program()
# for nccl2 mode
config = fluid.DistributeTranspilerConfig()
config.mode = "nccl2"
t = fluid.DistributeTranspiler(config=config)
t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep)
exe = fluid.ParallelExecutor(
use_cuda,
loss_name=loss_var.name,
num_trainers=len(trainers.split(",)),
trainer_id=trainer_id
)
"""
def __init__(self, config=None):
......@@ -190,13 +207,41 @@ class DistributeTranspiler(object):
assert (self.config.min_block_size >= 8192)
assert (self.config.split_method.__bases__[0] == PSDispatcher)
def _transpile_nccl2(self,
trainer_id,
trainers,
current_endpoint,
startup_program=None):
if not startup_program:
startup_program = default_startup_program()
if trainer_id >= 0:
worker_endpoints = trainers.split(",")
# send NCCL_ID to others or recv from trainer 0
worker_endpoints.remove(current_endpoint)
nccl_id_var = startup_program.global_block().create_var(
name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW)
startup_program.global_block().append_op(
type="gen_nccl_id",
inputs={},
outputs={"NCCLID": nccl_id_var},
attrs={
"endpoint": current_endpoint,
"endpoint_list": worker_endpoints,
"trainer_id": trainer_id
})
return nccl_id_var
else:
raise ValueError("must set trainer_id > 0")
def transpile(self,
trainer_id,
program=None,
pservers="127.0.0.1:6174",
trainers=1,
sync_mode=True,
startup_program=None):
startup_program=None,
current_endpoint="127.0.0.1:6174"):
"""
Run the transpiler.
......@@ -207,10 +252,15 @@ class DistributeTranspiler(object):
default is fluid.default_main_program().
pservers (str): comma separated ip:port string for the pserver
list.
trainers (int): number of trainers in the distributed job.
trainers (int|str): in pserver mode this is the number of
trainers, in nccl2 mode this is a string of trainer
endpoints.
sync_mode (bool): Do sync training or not, default is True.
startup_program (Program|None): startup_program to transpile,
default is fluid.default_main_program().
current_endpoint (str): need pass current endpoint when
transpile as nccl2 distributed mode. In pserver mode
this argument is not used.
"""
if program is None:
program = default_main_program()
......@@ -220,6 +270,15 @@ class DistributeTranspiler(object):
self.startup_program = startup_program
self.origin_startup_program = self.startup_program.clone()
if self.config.mode == "nccl2":
assert (isinstance(trainers, str))
self._transpile_nccl2(
trainer_id,
trainers,
current_endpoint,
startup_program=startup_program)
return
self.trainer_num = trainers
self.sync_mode = sync_mode
self.trainer_id = trainer_id
......@@ -1082,7 +1141,7 @@ to transpile() call.")
if self.sync_mode else []
},
attrs={
"sync_mode": False,
"sync_mode": self.sync_mode,
"epmap": pserver_endpoints,
RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE,
OP_ROLE_VAR_ATTR_NAME: [
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册