diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ec632e20690eafdc558e24f160270a89b29ee41..e4442d254901e2524385452ebe5ac6f6df3056f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -212,7 +212,7 @@ endif() if (WITH_JEMALLOC) find_package(JeMalloc REQUIRED) include_directories(${JEMALLOC_INCLUDE_DIR}) - add_definitions(-DWITH_JEMALLOC) + add_definitions(-DPADDLE_WITH_JEMALLOC) endif() include(generic) # simplify cmake module @@ -276,9 +276,3 @@ add_subdirectory(paddle) if(WITH_PYTHON) add_subdirectory(python) endif() - -if(WITH_DOC) - find_package(Sphinx REQUIRED) - find_python_module(recommonmark REQUIRED) - add_subdirectory(doc) -endif() diff --git a/Dockerfile b/Dockerfile index acfd091265e26d6c29c561d166fed2504c0cff1c..fe0721e9b99b5e028df2f6228ff04cb56a567a3f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,12 +11,10 @@ RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ub # ENV variables ARG WITH_GPU ARG WITH_AVX -ARG WITH_DOC ENV WOBOQ OFF ENV WITH_GPU=${WITH_GPU:-ON} ENV WITH_AVX=${WITH_AVX:-ON} -ENV WITH_DOC=${WITH_DOC:-OFF} ENV HOME /root # Add bash enhancements diff --git a/cmake/FindSphinx.cmake b/cmake/FindSphinx.cmake deleted file mode 100644 index f74cd4ff8c9c2c52319b18ac37264167b3718eae..0000000000000000000000000000000000000000 --- a/cmake/FindSphinx.cmake +++ /dev/null @@ -1,147 +0,0 @@ -# - This module looks for Sphinx -# Find the Sphinx documentation generator -# -# This modules defines -# SPHINX_EXECUTABLE -# SPHINX_FOUND - -find_program(SPHINX_EXECUTABLE - NAMES sphinx-build - PATHS - /usr/bin - /usr/local/bin - /opt/local/bin - DOC "Sphinx documentation generator" -) - -if( NOT SPHINX_EXECUTABLE ) - set(_Python_VERSIONS - 2.7 2.6 2.5 2.4 2.3 2.2 2.1 2.0 1.6 1.5 - ) - - foreach( _version ${_Python_VERSIONS} ) - set( _sphinx_NAMES sphinx-build-${_version} ) - - find_program( SPHINX_EXECUTABLE - NAMES ${_sphinx_NAMES} - PATHS - /usr/bin - /usr/local/bin - /opt/loca/bin - DOC "Sphinx documentation generator" - ) - endforeach() -endif() - -include(FindPackageHandleStandardArgs) - -find_package_handle_standard_args(Sphinx DEFAULT_MSG - SPHINX_EXECUTABLE -) - - -option( SPHINX_HTML_OUTPUT "Build a single HTML with the whole content." ON ) -option( SPHINX_DIRHTML_OUTPUT "Build HTML pages, but with a single directory per document." OFF ) -option( SPHINX_HTMLHELP_OUTPUT "Build HTML pages with additional information for building a documentation collection in htmlhelp." OFF ) -option( SPHINX_QTHELP_OUTPUT "Build HTML pages with additional information for building a documentation collection in qthelp." OFF ) -option( SPHINX_DEVHELP_OUTPUT "Build HTML pages with additional information for building a documentation collection in devhelp." OFF ) -option( SPHINX_EPUB_OUTPUT "Build HTML pages with additional information for building a documentation collection in epub." OFF ) -option( SPHINX_LATEX_OUTPUT "Build LaTeX sources that can be compiled to a PDF document using pdflatex." OFF ) -option( SPHINX_MAN_OUTPUT "Build manual pages in groff format for UNIX systems." OFF ) -option( SPHINX_TEXT_OUTPUT "Build plain text files." OFF ) - - -mark_as_advanced( - SPHINX_EXECUTABLE - SPHINX_HTML_OUTPUT - SPHINX_DIRHTML_OUTPUT - SPHINX_HTMLHELP_OUTPUT - SPHINX_QTHELP_OUTPUT - SPHINX_DEVHELP_OUTPUT - SPHINX_EPUB_OUTPUT - SPHINX_LATEX_OUTPUT - SPHINX_MAN_OUTPUT - SPHINX_TEXT_OUTPUT -) - -function( Sphinx_add_target target_name builder conf cache source destination ) - add_custom_target( ${target_name} ALL - COMMAND ${SPHINX_EXECUTABLE} -b ${builder} - -d ${cache} - -c ${conf} - ${source} - ${destination} - COMMENT "Generating sphinx documentation: ${builder}" - COMMAND cd ${destination} && ln -sf ./index_*.html index.html - ) - - set_property( - DIRECTORY APPEND PROPERTY - ADDITIONAL_MAKE_CLEAN_FILES - ${destination} - ) -endfunction() - -# Target dependencies can be optionally listed at the end. -function( Sphinx_add_targets target_base_name conf source base_destination ) - - set( _dependencies ) - - foreach( arg IN LISTS ARGN ) - set( _dependencies ${_dependencies} ${arg} ) - endforeach() - - if( ${SPHINX_HTML_OUTPUT} ) - Sphinx_add_target( ${target_base_name}_html html ${conf} ${source} ${base_destination}/html ) - - add_dependencies( ${target_base_name}_html ${_dependencies} ) - endif() - - if( ${SPHINX_DIRHTML_OUTPUT} ) - Sphinx_add_target( ${target_base_name}_dirhtml dirhtml ${conf} ${source} ${base_destination}/dirhtml ) - - add_dependencies( ${target_base_name}_dirhtml ${_dependencies} ) - endif() - - if( ${SPHINX_QTHELP_OUTPUT} ) - Sphinx_add_target( ${target_base_name}_qthelp qthelp ${conf} ${source} ${base_destination}/qthelp ) - - add_dependencies( ${target_base_name}_qthelp ${_dependencies} ) - endif() - - if( ${SPHINX_DEVHELP_OUTPUT} ) - Sphinx_add_target( ${target_base_name}_devhelp devhelp ${conf} ${source} ${base_destination}/devhelp ) - - add_dependencies( ${target_base_name}_devhelp ${_dependencies} ) - endif() - - if( ${SPHINX_EPUB_OUTPUT} ) - Sphinx_add_target( ${target_base_name}_epub epub ${conf} ${source} ${base_destination}/epub ) - - add_dependencies( ${target_base_name}_epub ${_dependencies} ) - endif() - - if( ${SPHINX_LATEX_OUTPUT} ) - Sphinx_add_target( ${target_base_name}_latex latex ${conf} ${source} ${base_destination}/latex ) - - add_dependencies( ${target_base_name}_latex ${_dependencies} ) - endif() - - if( ${SPHINX_MAN_OUTPUT} ) - Sphinx_add_target( ${target_base_name}_man man ${conf} ${source} ${base_destination}/man ) - - add_dependencies( ${target_base_name}_man ${_dependencies} ) - endif() - - if( ${SPHINX_TEXT_OUTPUT} ) - Sphinx_add_target( ${target_base_name}_text text ${conf} ${source} ${base_destination}/text ) - - add_dependencies( ${target_base_name}_text ${_dependencies} ) - endif() - - if( ${BUILD_TESTING} ) - sphinx_add_target( ${target_base_name}_linkcheck linkcheck ${conf} ${source} ${base_destination}/linkcheck ) - - add_dependencies( ${target_base_name}_linkcheck ${_dependencies} ) - endif() -endfunction() diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index afd3342768701adba4ff0040bd1c762b1cd8739d..4acccd0899568184735db35f0d949ec0e8b67fff 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -325,6 +325,7 @@ paddle.fluid.layers.iou_similarity ArgSpec(args=['x', 'y', 'name'], varargs=None paddle.fluid.layers.box_coder ArgSpec(args=['prior_box', 'prior_box_var', 'target_box', 'code_type', 'box_normalized', 'name'], varargs=None, keywords=None, defaults=('encode_center_size', True, None)) paddle.fluid.layers.polygon_box_transform ArgSpec(args=['input', 'name'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.layers.yolov3_loss ArgSpec(args=['x', 'gtbox', 'gtlabel', 'anchors', 'class_num', 'ignore_thresh', 'loss_weight_xy', 'loss_weight_wh', 'loss_weight_conf_target', 'loss_weight_conf_notarget', 'loss_weight_class', 'name'], varargs=None, keywords=None, defaults=(None, None, None, None, None, None)) +paddle.fluid.layers.multiclass_nms ArgSpec(args=['bboxes', 'scores', 'score_threshold', 'nms_top_k', 'keep_top_k', 'nms_threshold', 'normalized', 'nms_eta', 'background_label', 'name'], varargs=None, keywords=None, defaults=(0.3, True, 1.0, 0, None)) paddle.fluid.layers.accuracy ArgSpec(args=['input', 'label', 'k', 'correct', 'total'], varargs=None, keywords=None, defaults=(1, None, None)) paddle.fluid.layers.auc ArgSpec(args=['input', 'label', 'curve', 'num_thresholds', 'topk', 'slide_steps'], varargs=None, keywords=None, defaults=('ROC', 4095, 1, 1)) paddle.fluid.layers.exponential_decay ArgSpec(args=['learning_rate', 'decay_steps', 'decay_rate', 'staircase'], varargs=None, keywords=None, defaults=(False,)) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 2ba2437de66f31549a87f20360dbb97b48ea6fbe..66f11dedbaccd7febcd75fa7ade9c68b6c42022c 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -1,4 +1,3 @@ - #windows treat symbolic file as a real file, which is different with unix #We create a hidden file and compile it instead of origin source file. function(windows_symbolic TARGET) @@ -207,3 +206,24 @@ endif (NOT WIN32) cc_library(dlpack_tensor SRCS dlpack_tensor.cc DEPS tensor dlpack) cc_test(dlpack_tensor_test SRCS dlpack_tensor_test.cc DEPS dlpack_tensor glog) + +# Get the current working branch +execute_process( + COMMAND git rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE PADDLE_BRANCH + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +# Get the latest abbreviated commit hash of the working branch +execute_process( + COMMAND git log -1 --format=%h + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE PADDLE_COMMIT + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +message(STATUS "commit: ${PADDLE_COMMIT}") +message(STATUS "branch: ${PADDLE_BRANCH}") + +configure_file(commit.h.in commit.h) diff --git a/paddle/fluid/framework/commit.h.in b/paddle/fluid/framework/commit.h.in new file mode 100644 index 0000000000000000000000000000000000000000..3a33ece624443a99083ae29abb70254a5ac40a3d --- /dev/null +++ b/paddle/fluid/framework/commit.h.in @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace paddle { +namespace framework { + +static std::string paddle_commit() { + return "@PADDLE_COMMIT@"; +} + +static std::string paddle_compile_branch() { + return "@PADDLE_BRANCH@"; +} + +static std::string paddle_version() { + return "@PADDLE_VERSION@"; +} + +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 83fc6ee2e299f5fa18d5cc6f220c0be6a66e709d..47488d4dea79f285769f29c93f7888a7f783f070 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -156,6 +156,8 @@ class Autograd { for (auto it : candidate->pre_ops_) { for (OpBase* pre_op : it.second) { if (!pre_op) continue; + VLOG(5) << "op dep " << candidate->op_desc_->Type() << " <---- " + << it.first << " <---- " << pre_op->op_desc_->Type(); if (visited.find(pre_op) == visited.end()) { visited.insert(pre_op); queue.push_back(pre_op); diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index dc97433a5102b39d03ea5cac3157c027f9d67c98..78205486c5534ac0c61cc6d545bdafa4dfc95695 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -28,6 +28,7 @@ #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/imperative/type_defs.h" @@ -140,16 +141,24 @@ class VarBase { void RunBackward(); void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name, - int pre_op_out_idx, bool stop_gradient) { + int pre_op_out_idx, bool pre_op_stop_gradient) { pre_op_ = pre_op; pre_op_out_name_ = pre_op_out_name; pre_op_out_idx_ = pre_op_out_idx; - stop_gradient_ = stop_gradient; + if (pre_op_stop_gradient) { + stop_gradient_ = pre_op_stop_gradient; + } } void ClearGradient() { - delete grads_; - grads_ = new VarBase(true); + VLOG(1) << "clear gradient of " << var_desc_->Name(); + if (grads_ && grads_->var_ && grads_->var_->IsInitialized()) { + auto grads_t = grads_->var_->GetMutable(); + operators::math::set_constant( + *(platform::DeviceContextPool::Instance().Get( + grads_->var_->Get().place())), + grads_t, 0.0); + } } framework::LoDTensor& GradValue(); diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 0a68b373bebb2804bbca8a32104c61beef47df1a..bc39d11ba00a6a7c386162a1f9201c6f992c8692 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -85,11 +85,12 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, op->input_vars_ = inputs; for (auto it : op->input_vars_) { auto& invars = invars_map[it.first]; + invars.reserve(it.second.size()); for (VarBase* inp : it.second) { PADDLE_ENFORCE_NOT_NULL(inp->var_, "op %s input %s nullptr", op->op_desc_->Type(), inp->var_desc_->Name()); - invars.push_back(inp->var_); + invars.emplace_back(inp->var_); vars[inp->var_desc_->Name()] = inp; if (inp->PreOp()) { op->pre_ops_[it.first].push_back(inp->PreOp()); @@ -106,9 +107,10 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, for (auto it : op->output_vars_) { auto& outvars = outvars_map[it.first]; const std::vector& outputs = it.second; + outvars.reserve(outputs.size()); for (size_t i = 0; i < outputs.size(); ++i) { VarBase* out = outputs[i]; - outvars.push_back(out->var_); + outvars.emplace_back(out->var_); vars[out->var_desc_->Name()] = out; framework::VarDesc* var_desc = block->FindVar(out->var_desc_->Name()); diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index a2546ead93c3baeb8029f6451d8a60dcc75f8571..2f31b182af7293488719e41a92b2ea78709bda02 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -132,7 +132,7 @@ struct Argument { DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int); DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int); DECL_ARGUMENT_FIELD(tensorrt_precision_mode, TensorRtPrecisionMode, - contrib::AnalysisConfig::Precision); + AnalysisConfig::Precision); // Memory optimized related. DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool); diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h index 120f6ef27d49ae59ec36304dc3742cd9ca0afa4b..59107f28080dceb0a58e17d42281db5f3773de56 100644 --- a/paddle/fluid/inference/analysis/helper.h +++ b/paddle/fluid/inference/analysis/helper.h @@ -32,7 +32,7 @@ limitations under the License. */ #ifdef _WIN32 #include #include -#define GCC_ATTRIBUTE(attr__) ; +#define GCC_ATTRIBUTE(attr__) #define MKDIR(path) _mkdir(path) #else #include diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 99611ce84b23896dd173831a03d77c6e0252d998..fe3c841186c35ea28c1d44007d91de5b997c1388 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -71,7 +71,7 @@ void IRPassManager::CreatePasses(Argument *argument, new framework::ProgramDesc *(&argument->main_program())); bool enable_int8 = argument->tensorrt_precision_mode() == - contrib::AnalysisConfig::Precision::kInt8; + AnalysisConfig::Precision::kInt8; pass->Set("enable_int8", new bool(enable_int8)); std::string model_opt_cache_dir = diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 8efd514bd8397f099fd07321ad7e5d4ca253e229..eecab238a88e90399eb70f17caa57633af4e2a69 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -22,7 +22,7 @@ namespace paddle { -PassStrategy *contrib::AnalysisConfig::pass_builder() const { +PassStrategy *AnalysisConfig::pass_builder() const { if (!pass_builder_.get()) { if (use_gpu_) { LOG(INFO) << "Create GPU IR passes"; @@ -42,27 +42,27 @@ PassStrategy *contrib::AnalysisConfig::pass_builder() const { return pass_builder_.get(); } -contrib::AnalysisConfig::AnalysisConfig(const std::string &model_dir) { +AnalysisConfig::AnalysisConfig(const std::string &model_dir) { model_dir_ = model_dir; Update(); } -contrib::AnalysisConfig::AnalysisConfig(const std::string &prog_file, - const std::string ¶ms_file) { +AnalysisConfig::AnalysisConfig(const std::string &prog_file, + const std::string ¶ms_file) { prog_file_ = prog_file; params_file_ = params_file; Update(); } -void contrib::AnalysisConfig::SetModel(const std::string &prog_file_path, - const std::string ¶ms_file_path) { +void AnalysisConfig::SetModel(const std::string &prog_file_path, + const std::string ¶ms_file_path) { prog_file_ = prog_file_path; params_file_ = params_file_path; Update(); } -void contrib::AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb, - int device_id) { +void AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb, + int device_id) { #ifdef PADDLE_WITH_CUDA use_gpu_ = true; memory_pool_init_size_mb_ = memory_pool_init_size_mb; @@ -74,13 +74,13 @@ void contrib::AnalysisConfig::EnableUseGpu(uint64_t memory_pool_init_size_mb, Update(); } -void contrib::AnalysisConfig::DisableGpu() { +void AnalysisConfig::DisableGpu() { use_gpu_ = false; Update(); } -contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) { +AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { #define CP_MEMBER(member__) member__ = other.member__; // Model related. @@ -130,7 +130,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) { Update(); } -void contrib::AnalysisConfig::EnableMKLDNN() { +void AnalysisConfig::EnableMKLDNN() { #ifdef PADDLE_WITH_MKLDNN pass_builder()->EnableMKLDNN(); use_mkldnn_ = true; @@ -142,9 +142,9 @@ void contrib::AnalysisConfig::EnableMKLDNN() { Update(); } -void contrib::AnalysisConfig::EnableTensorRtEngine( +void AnalysisConfig::EnableTensorRtEngine( int workspace_size, int max_batch_size, int min_subgraph_size, - contrib::AnalysisConfig::Precision precision_mode) { + AnalysisConfig::Precision precision_mode) { #ifdef PADDLE_WITH_CUDA if (!use_gpu()) { LOG(ERROR) << "To use TensorRT engine, please call EnableGpu() first"; @@ -165,7 +165,7 @@ void contrib::AnalysisConfig::EnableTensorRtEngine( } // TODO(Superjomn) refactor this, buggy. -void contrib::AnalysisConfig::Update() { +void AnalysisConfig::Update() { auto info = SerializeInfoCache(); if (info == serialized_info_cache_) return; @@ -225,7 +225,7 @@ void contrib::AnalysisConfig::Update() { } } -std::string contrib::AnalysisConfig::SerializeInfoCache() { +std::string AnalysisConfig::SerializeInfoCache() { std::stringstream ss; ss << model_dir_; ss << prog_file_; @@ -260,14 +260,14 @@ std::string contrib::AnalysisConfig::SerializeInfoCache() { return ss.str(); } -void contrib::AnalysisConfig::SetCpuMathLibraryNumThreads( +void AnalysisConfig::SetCpuMathLibraryNumThreads( int cpu_math_library_num_threads) { cpu_math_library_num_threads_ = cpu_math_library_num_threads; Update(); } -float contrib::AnalysisConfig::fraction_of_gpu_memory_for_pool() const { +float AnalysisConfig::fraction_of_gpu_memory_for_pool() const { #ifdef PADDLE_WITH_CUDA // Get the GPU memory details and calculate the fraction of memory for the // GPU memory pool. @@ -282,8 +282,8 @@ float contrib::AnalysisConfig::fraction_of_gpu_memory_for_pool() const { #endif } -void contrib::AnalysisConfig::EnableMemoryOptim( - bool static_optim, bool force_update_static_cache) { +void AnalysisConfig::EnableMemoryOptim(bool static_optim, + bool force_update_static_cache) { enable_memory_optim_ = true; static_memory_optim_ = static_optim; static_memory_optim_force_update_ = force_update_static_cache; @@ -291,14 +291,14 @@ void contrib::AnalysisConfig::EnableMemoryOptim( Update(); } -bool contrib::AnalysisConfig::enable_memory_optim() const { +bool AnalysisConfig::enable_memory_optim() const { return enable_memory_optim_; } -void contrib::AnalysisConfig::SetModelBuffer(const char *prog_buffer, - size_t prog_buffer_size, - const char *param_buffer, - size_t param_buffer_size) { +void AnalysisConfig::SetModelBuffer(const char *prog_buffer, + size_t prog_buffer_size, + const char *param_buffer, + size_t param_buffer_size) { prog_file_ = std::string(prog_buffer, prog_buffer + prog_buffer_size); params_file_ = std::string(param_buffer, param_buffer + param_buffer_size); model_from_memory_ = true; @@ -306,7 +306,7 @@ void contrib::AnalysisConfig::SetModelBuffer(const char *prog_buffer, Update(); } -NativeConfig contrib::AnalysisConfig::ToNativeConfig() const { +NativeConfig AnalysisConfig::ToNativeConfig() const { NativeConfig config; config.model_dir = model_dir_; config.prog_file = prog_file_; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 7d97aea714ab1bd653965c1900f900b52c3b4616..14d6ba8c56dc3fe04e27bccadd5a5155547398a4 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -47,7 +47,6 @@ DECLARE_bool(profile); namespace paddle { -using contrib::AnalysisConfig; using inference::Singleton; #if PADDLE_WITH_TENSORRT using inference::tensorrt::TRTInt8Calibrator; @@ -123,6 +122,15 @@ bool AnalysisPredictor::PrepareProgram( if (!program) { if (!LoadProgramDesc()) return false; + // If not cloned, the parameters should be loaded. + // If config_.ir_optim() is True, parameters is loaded in + // OptimizeInferenceProgram(), but other persistable variables + // (like RAW type var) are not created in scope. + // If config_.ir_optim() is False, parameters is loaded in LoadParameters(), + // still need to create other persistable variables. + // So in both case, create persistable variables at first. + executor_->CreateVariables(*inference_program_, 0, true, sub_scope_); + // Optimize the program, and load parameters and modify them in the // scope_. // This will change the scope_ address. @@ -130,15 +138,6 @@ bool AnalysisPredictor::PrepareProgram( status_ir_optim_enabled_ = true; OptimizeInferenceProgram(); } else { - // If the parent_scope is passed, we assert that the persistable variables - // are already created, so just create the no persistable variables. - - // If not cloned, the parameters should be loaded - // OptimizeInferenceProgram. - // So in both cases, just the local variables are needed to load, not the - // parematers. - executor_->CreateVariables(*inference_program_, 0, true, sub_scope_); - // Load parameters LOG(INFO) << "load parameters "; LoadParameters(); @@ -376,7 +375,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { } argument_.SetIrAnalysisPasses(passes); argument_.SetAnalysisPasses(config_.pass_builder()->AnalysisPasses()); - argument_.SetScopeNotOwned(const_cast(scope_.get())); + argument_.SetScopeNotOwned(scope_.get()); Analyzer().Run(&argument_); PADDLE_ENFORCE(argument_.scope_valid()); @@ -726,11 +725,15 @@ bool AnalysisPredictor::need_collect_var_shapes_for_memory_optim() { return need; } +std::string AnalysisPredictor::GetSeriazlizedProgram() const { + return inference_program_->Proto()->SerializeAsString(); +} + template <> -std::unique_ptr CreatePaddlePredictor( - const contrib::AnalysisConfig &config) { - return CreatePaddlePredictor(config); +std::unique_ptr CreatePaddlePredictor( + const AnalysisConfig &config) { + return CreatePaddlePredictor( + config); } } // namespace paddle diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index 921aa90952d6cf37ef84890b6ad7340cad6a8eb6..014df4ee8b6d86232212736c43a9aff32ffee011 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -33,7 +33,6 @@ using inference::analysis::Argument; using inference::analysis::Analyzer; using framework::proto::ProgramDesc; using framework::NaiveExecutor; -using contrib::AnalysisConfig; /** \brief This predictor is based on the original native predictor with IR and * Analysis support. @@ -75,6 +74,8 @@ class AnalysisPredictor : public PaddlePredictor { void SetMkldnnThreadID(int tid); + std::string GetSeriazlizedProgram() const override; + protected: // For memory optimization. bool need_collect_var_shapes_for_memory_optim(); @@ -121,7 +122,7 @@ class AnalysisPredictor : public PaddlePredictor { #endif private: - contrib::AnalysisConfig config_; + AnalysisConfig config_; Argument argument_; std::unique_ptr executor_; platform::Place place_; diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index 4688e93d7102109d2c7ece9ba37bc8f2d311dcf1..6d11b461082d0ed8ba08c9e280bba86737b86e71 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -24,7 +24,6 @@ DEFINE_string(dirname, "", "dirname to tests."); namespace paddle { -using contrib::AnalysisConfig; TEST(AnalysisPredictor, analysis_off) { AnalysisConfig config; @@ -215,6 +214,8 @@ TEST(AnalysisPredictor, memory_optim) { { // The first predictor help to cache the memory optimize strategy. auto predictor = CreatePaddlePredictor(config); + LOG(INFO) << "serialized program: " << predictor->GetSeriazlizedProgram(); + ASSERT_FALSE(predictor->GetSeriazlizedProgram().empty()); // Run several times to check the parameters are not reused by mistake. for (int i = 0; i < 5; i++) { diff --git a/paddle/fluid/inference/api/api.cc b/paddle/fluid/inference/api/api.cc index 9be059c73e20ebeeff2c4b6e8e5502e4a56fd0d6..6cd18277d63200f5bccf180a7ae3196b0ce126ff 100644 --- a/paddle/fluid/inference/api/api.cc +++ b/paddle/fluid/inference/api/api.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include +#include "paddle/fluid/framework/commit.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" @@ -97,4 +99,12 @@ void PaddleBuf::Free() { } } +std::string get_version() { + std::stringstream ss; + ss << "version: " << framework::paddle_version() << "\n"; + ss << "commit: " << framework::paddle_commit() << "\n"; + ss << "branch: " << framework::paddle_compile_branch() << "\n"; + return ss.str(); +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/api_impl_tester.cc b/paddle/fluid/inference/api/api_impl_tester.cc index 54895679ca37362c7267677af80274b8de95e296..e82cb53bf073d3d1ab9a518218edaf430728463f 100644 --- a/paddle/fluid/inference/api/api_impl_tester.cc +++ b/paddle/fluid/inference/api/api_impl_tester.cc @@ -295,7 +295,7 @@ TEST(inference_api_native, image_classification_gpu) { #endif TEST(PassBuilder, Delete) { - contrib::AnalysisConfig config; + AnalysisConfig config; config.DisableGpu(); config.pass_builder()->DeletePass("attention_lstm_fuse_pass"); const auto& passes = config.pass_builder()->AllPasses(); diff --git a/paddle/fluid/inference/api/api_tester.cc b/paddle/fluid/inference/api/api_tester.cc index 7a579610eefda24c911edd28b5f3a178aa10ab1e..2c450ef7cead4d5c3870d5e9186eb221e5dc19a0 100644 --- a/paddle/fluid/inference/api/api_tester.cc +++ b/paddle/fluid/inference/api/api_tester.cc @@ -61,4 +61,10 @@ TEST(paddle_inference_api, demo) { predictor->Run({}, &outputs); } +TEST(paddle_inference_api, get_version) { + LOG(INFO) << "paddle version:\n" << get_version(); + auto version = get_version(); + ASSERT_FALSE(version.empty()); +} + } // namespace paddle diff --git a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc index 338a0cec161f352781f132aea71dd56f68840c62..f7da55c9ae368763786c1b1fd3e86d942c5e9fe8 100644 --- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc @@ -36,7 +36,7 @@ namespace demo { */ void Main() { std::unique_ptr predictor; - paddle::contrib::AnalysisConfig config; + paddle::AnalysisConfig config; config.EnableUseGpu(100, 0); config.SetModel(FLAGS_modeldir + "/__model__", FLAGS_modeldir + "/__params__"); diff --git a/paddle/fluid/inference/api/demo_ci/vis_demo.cc b/paddle/fluid/inference/api/demo_ci/vis_demo.cc index 5320992b7e78f4aa0ea8950af03038c1953dd027..0d2c418c56db620c71d99b64ee79b18be427cc34 100644 --- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc @@ -34,7 +34,6 @@ DEFINE_bool(use_gpu, false, "Whether use gpu."); namespace paddle { namespace demo { -using contrib::AnalysisConfig; /* * Use the native and analysis fluid engine to inference the demo. */ diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 5b899b26d60dec3634d7016c925143e1ae26992d..9d9ed6a39d8324002a8850deae9bb8dd5af7ef9b 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -29,11 +29,6 @@ namespace paddle { class AnalysisPredictor; -// == -// -// ----------------------------------------------------------------------------------- -// NOTE: The following APIs are not mature yet, we are still working on them. -namespace contrib { // NOTE WIP, not stable yet. struct AnalysisConfig { @@ -260,5 +255,4 @@ struct AnalysisConfig { mutable std::unique_ptr pass_builder_; }; -} // namespace contrib } // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 46b510fd1ec94c59032b8f41a2ac4d6aa87dc150..8ac8bc529183edc2f8f888ca7ba14611acaadc10 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -215,6 +215,14 @@ class PaddlePredictor { */ virtual ~PaddlePredictor() = default; + /** \brief Get the serialized model program that executes in inference phase. + * Its data type is ProgramDesc, which is a protobuf message. + */ + virtual std::string GetSeriazlizedProgram() const { + assert(false); // Force raise error. + return "NotImplemented"; + } + /** The common configs for all the predictors. */ struct Config { @@ -288,4 +296,6 @@ std::unique_ptr CreatePaddlePredictor(const ConfigT& config); int PaddleDtypeSize(PaddleDType dtype); +std::string get_version(); + } // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h index 919f5d55f88c3a6473f66371e2f3d91f3c4721c5..5815bc9a1464293e0a56f05e34183580eac96cea 100644 --- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h +++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h @@ -13,16 +13,16 @@ // limitations under the License. #pragma once + +#include +#include #include #include -#include +#include // NOLINT #include #include #include #include - -#include -#include #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 07b9e0e051bce13f6caeca54a664019c55d80fa6..aa3da397ff67dd06dd750d336a49056baedaaab6 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -128,6 +128,11 @@ inference_analysis_api_test_with_fake_data(test_analyzer_resnet50 inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_conv "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet_depthwise_conv" analyzer_resnet50_tester.cc "mobilenet_model.tar.gz" SERIAL) +# bert, max_len=20 +set(BERT_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/bert20") +download_model_and_data(${BERT_INSTALL_DIR} "bert_model.tar.gz" "bert_data_len20.txt.tar.gz") +inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert_tester.cc SERIAL) + # anakin if (WITH_ANAKIN AND WITH_MKL) # only needed in CI # anakin rnn1 diff --git a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc new file mode 100644 index 0000000000000000000000000000000000000000..f646fd6d91c81b6738e4fc5278739307fa5f99b5 --- /dev/null +++ b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc @@ -0,0 +1,223 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/inference/tests/api/tester_helper.h" + +namespace paddle { +namespace inference { + +using paddle::PaddleTensor; + +template +void GetValueFromStream(std::stringstream *ss, T *t) { + (*ss) >> (*t); +} + +template <> +void GetValueFromStream(std::stringstream *ss, std::string *t) { + *t = ss->str(); +} + +// Split string to vector +template +void Split(const std::string &line, char sep, std::vector *v) { + std::stringstream ss; + T t; + for (auto c : line) { + if (c != sep) { + ss << c; + } else { + GetValueFromStream(&ss, &t); + v->push_back(std::move(t)); + ss.str({}); + ss.clear(); + } + } + + if (!ss.str().empty()) { + GetValueFromStream(&ss, &t); + v->push_back(std::move(t)); + ss.str({}); + ss.clear(); + } +} + +template +constexpr paddle::PaddleDType GetPaddleDType(); + +template <> +constexpr paddle::PaddleDType GetPaddleDType() { + return paddle::PaddleDType::INT64; +} + +template <> +constexpr paddle::PaddleDType GetPaddleDType() { + return paddle::PaddleDType::FLOAT32; +} + +// Parse tensor from string +template +bool ParseTensor(const std::string &field, paddle::PaddleTensor *tensor) { + std::vector data; + Split(field, ':', &data); + if (data.size() < 2) return false; + + std::string shape_str = data[0]; + + std::vector shape; + Split(shape_str, ' ', &shape); + + std::string mat_str = data[1]; + + std::vector mat; + Split(mat_str, ' ', &mat); + + tensor->shape = shape; + auto size = + std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()) * + sizeof(T); + tensor->data.Resize(size); + std::copy(mat.begin(), mat.end(), static_cast(tensor->data.data())); + tensor->dtype = GetPaddleDType(); + + return true; +} + +// Parse input tensors from string +bool ParseLine(const std::string &line, + std::vector *tensors) { + std::vector fields; + Split(line, ';', &fields); + + if (fields.size() < 5) return false; + + tensors->clear(); + tensors->reserve(5); + + int i = 0; + // src_id + paddle::PaddleTensor src_id; + ParseTensor(fields[i++], &src_id); + tensors->push_back(src_id); + + // pos_id + paddle::PaddleTensor pos_id; + ParseTensor(fields[i++], &pos_id); + tensors->push_back(pos_id); + + // segment_id + paddle::PaddleTensor segment_id; + ParseTensor(fields[i++], &segment_id); + tensors->push_back(segment_id); + + // self_attention_bias + paddle::PaddleTensor self_attention_bias; + ParseTensor(fields[i++], &self_attention_bias); + tensors->push_back(self_attention_bias); + + // next_segment_index + paddle::PaddleTensor next_segment_index; + ParseTensor(fields[i++], &next_segment_index); + tensors->push_back(next_segment_index); + + return true; +} + +bool LoadInputData(std::vector> *inputs) { + if (FLAGS_infer_data.empty()) { + LOG(ERROR) << "please set input data path"; + return false; + } + + std::ifstream fin(FLAGS_infer_data); + std::string line; + int sample = 0; + + // The unit-test dataset only have 10 samples, each sample have 5 feeds. + while (std::getline(fin, line)) { + std::vector feed_data; + ParseLine(line, &feed_data); + inputs->push_back(std::move(feed_data)); + sample++; + if (!FLAGS_test_all_data && sample == FLAGS_batch_size) break; + } + LOG(INFO) << "number of samples: " << sample; + + return true; +} + +void SetConfig(AnalysisConfig *config) { config->SetModel(FLAGS_infer_model); } + +void profile(bool use_mkldnn = false) { + AnalysisConfig config; + SetConfig(&config); + + if (use_mkldnn) { + config.EnableMKLDNN(); + } + + std::vector outputs; + std::vector> inputs; + LoadInputData(&inputs); + TestPrediction(reinterpret_cast(&config), + inputs, &outputs, FLAGS_num_threads); +} + +TEST(Analyzer_bert, profile) { profile(); } +#ifdef PADDLE_WITH_MKLDNN +TEST(Analyzer_bert, profile_mkldnn) { profile(true); } +#endif + +// Check the fuse status +TEST(Analyzer_bert, fuse_statis) { + AnalysisConfig cfg; + SetConfig(&cfg); + int num_ops; + auto predictor = CreatePaddlePredictor(cfg); + auto fuse_statis = GetFuseStatis( + static_cast(predictor.get()), &num_ops); + LOG(INFO) << "num_ops: " << num_ops; +} + +// Compare result of NativeConfig and AnalysisConfig +void compare(bool use_mkldnn = false) { + AnalysisConfig cfg; + SetConfig(&cfg); + if (use_mkldnn) { + cfg.EnableMKLDNN(); + } + + std::vector> inputs; + LoadInputData(&inputs); + CompareNativeAndAnalysis( + reinterpret_cast(&cfg), inputs); +} + +TEST(Analyzer_bert, compare) { compare(); } +#ifdef PADDLE_WITH_MKLDNN +TEST(Analyzer_bert, compare_mkldnn) { compare(true /* use_mkldnn */); } +#endif + +// Compare Deterministic result +TEST(Analyzer_bert, compare_determine) { + AnalysisConfig cfg; + SetConfig(&cfg); + + std::vector> inputs; + LoadInputData(&inputs); + CompareDeterministic(reinterpret_cast(&cfg), + inputs); +} +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index e78ab942d113323fecf5510dca85fb5db734efc8..735e4fb563788438ee49ff6308d11f4dbe4962be 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -19,7 +19,6 @@ DEFINE_int32(max_turn_num, 9, namespace paddle { namespace inference { -using contrib::AnalysisConfig; constexpr int32_t kMaxTurnLen = 50; @@ -165,7 +164,7 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, input_slots->push_back(std::move(response_mask_tensor)); } -void SetConfig(contrib::AnalysisConfig *cfg) { +void SetConfig(AnalysisConfig *cfg) { cfg->SetModel(FLAGS_infer_model + "/__model__", FLAGS_infer_model + "/param"); cfg->SwitchSpecifyInputNames(); cfg->SwitchIrOptim(true); @@ -187,7 +186,7 @@ void SetInput(std::vector> *inputs) { // Easy for profiling independently. void profile(bool use_mkldnn = false) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); if (use_mkldnn) { @@ -223,7 +222,7 @@ TEST(Analyzer_dam, profile_mkldnn) { profile(true /* use_mkldnn */); } // Check the fuse status TEST(Analyzer_dam, fuse_statis) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); int num_ops; @@ -256,7 +255,7 @@ void compare(bool use_mkldnn = false) { TEST(Analyzer_dam, compare_with_static_memory_optim) { // The small dam will core in CI, but works in local. if (FLAGS_max_turn_num == 9) { - contrib::AnalysisConfig cfg, cfg1; + AnalysisConfig cfg, cfg1; DataRecord data(FLAGS_infer_data, FLAGS_batch_size); std::vector> input_slots_all; @@ -282,7 +281,7 @@ TEST(Analyzer_dam, compare_with_static_memory_optim) { TEST(Analyzer_dam, compare_with_dynamic_memory_optim) { // The small dam will core in CI, but works in local. if (FLAGS_max_turn_num == 9) { - contrib::AnalysisConfig cfg, cfg1; + AnalysisConfig cfg, cfg1; DataRecord data(FLAGS_infer_data, FLAGS_batch_size); std::vector> input_slots_all; diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index b9666e01adb23e0cbd9257bc55081c3a5001e887..347672eaae314aa42096d48a3b044014f2ddbf84 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -18,8 +18,6 @@ namespace paddle { namespace inference { namespace analysis { -using contrib::AnalysisConfig; - struct DataRecord { std::vector data; std::vector lod; diff --git a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc index 529a0174c8542f5226e70ef4a47bde069220ecc2..089f655c180d784af66af60277bdbf32a6019599 100644 --- a/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc @@ -16,7 +16,6 @@ namespace paddle { namespace inference { -using contrib::AnalysisConfig; struct DataRecord { std::vector> query, title; @@ -75,7 +74,7 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -void SetConfig(contrib::AnalysisConfig *cfg) { +void SetConfig(AnalysisConfig *cfg) { cfg->SetModel(FLAGS_infer_model); cfg->DisableGpu(); cfg->SwitchSpecifyInputNames(); @@ -95,7 +94,7 @@ void SetInput(std::vector> *inputs) { // Easy for profiling independently. void profile(bool use_mkldnn = false) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); std::vector outputs; @@ -130,7 +129,7 @@ TEST(Analyzer_MM_DNN, profile_mkldnn) { profile(true /* use_mkldnn */); } // Check the fuse status TEST(Analyzer_MM_DNN, fuse_statis) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); int num_ops; @@ -141,7 +140,7 @@ TEST(Analyzer_MM_DNN, fuse_statis) { // Compare result of NativeConfig and AnalysisConfig void compare(bool use_mkldnn = false) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); if (use_mkldnn) { diff --git a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc index 6fef79dc4608acd6eee679ad4939e7684db98f5b..a70aa7a6ac41121a0c8ea397ebc7e24e4b206d12 100644 --- a/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_ner_tester.cc @@ -16,7 +16,6 @@ namespace paddle { namespace inference { -using contrib::AnalysisConfig; struct DataRecord { std::vector> word, mention; @@ -76,7 +75,7 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data) { } } -void SetConfig(contrib::AnalysisConfig *cfg, bool memory_load = false) { +void SetConfig(AnalysisConfig *cfg, bool memory_load = false) { if (memory_load) { std::string buffer_prog, buffer_param; ReadBinaryFile(FLAGS_infer_model + "/__model__", &buffer_prog); @@ -105,7 +104,7 @@ void SetInput(std::vector> *inputs) { // Easy for profiling independently. void profile(bool memory_load = false) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg, memory_load); std::vector outputs; @@ -136,7 +135,7 @@ TEST(Analyzer_Chinese_ner, profile_memory_load) { // Check the fuse status TEST(Analyzer_Chinese_ner, fuse_statis) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); int num_ops; @@ -152,7 +151,7 @@ TEST(Analyzer_Chinese_ner, fuse_statis) { // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_Chinese_ner, compare) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); std::vector> input_slots_all; diff --git a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc index ad2c46e48d5a34a457a615f313f1ac3cc916b200..3f6c933f2bcc6ed5410cb95a48f5ee6869280fe4 100644 --- a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc @@ -16,7 +16,6 @@ namespace paddle { namespace inference { -using contrib::AnalysisConfig; struct DataRecord { std::vector> query_basic, query_phrase, title_basic, @@ -103,7 +102,7 @@ void PrepareInputs(std::vector *input_slots, DataRecord *data, } } -void SetConfig(contrib::AnalysisConfig *cfg) { +void SetConfig(AnalysisConfig *cfg) { cfg->SetModel(FLAGS_infer_model); cfg->DisableGpu(); cfg->SwitchSpecifyInputNames(); @@ -123,7 +122,7 @@ void SetInput(std::vector> *inputs) { // Easy for profiling independently. TEST(Analyzer_Pyramid_DNN, profile) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); std::vector outputs; @@ -147,7 +146,7 @@ TEST(Analyzer_Pyramid_DNN, profile) { // Check the fuse status TEST(Analyzer_Pyramid_DNN, fuse_statis) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); int num_ops; @@ -158,7 +157,7 @@ TEST(Analyzer_Pyramid_DNN, fuse_statis) { // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_Pyramid_DNN, compare) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); std::vector> input_slots_all; diff --git a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc index 22e6366fb5cba6c7a0cde9c0c5f50f56c2e23b05..c27c39f40a2067dd2bd2150e4b1e53eab7cdf06e 100644 --- a/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc @@ -20,7 +20,6 @@ namespace paddle { namespace inference { using namespace framework; // NOLINT -using namespace contrib; // NOLINT struct DataRecord { std::vector>> link_step_data_all; @@ -223,7 +222,7 @@ void SetInput(std::vector> *inputs) { // Easy for profiling independently. TEST(Analyzer_rnn1, profile) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); cfg.DisableGpu(); cfg.SwitchIrDebug(); @@ -237,7 +236,7 @@ TEST(Analyzer_rnn1, profile) { // Check the fuse status TEST(Analyzer_rnn1, fuse_statis) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); int num_ops; @@ -254,7 +253,7 @@ TEST(Analyzer_rnn1, fuse_statis) { // Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_rnn1, compare) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); std::vector> input_slots_all; @@ -276,7 +275,7 @@ TEST(Analyzer_rnn1, compare_determine) { // Test Multi-Thread. TEST(Analyzer_rnn1, multi_thread) { - contrib::AnalysisConfig cfg; + AnalysisConfig cfg; SetConfig(&cfg); std::vector outputs; diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index f3e75ffbb5962885bd926af50b764bec561cc454..ca04c1365cbbffcb4a2786cde9ab240cc20aa3d8 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -20,7 +20,6 @@ limitations under the License. */ namespace paddle { namespace inference { namespace analysis { -using contrib::AnalysisConfig; struct Record { std::vector data; diff --git a/paddle/fluid/inference/tests/api/config_printer.h b/paddle/fluid/inference/tests/api/config_printer.h index ecc10bafd650e52dfb73e8dd4329c697ff4f4ccc..b0c23fbd534847c8aad244749761e9c072148796 100644 --- a/paddle/fluid/inference/tests/api/config_printer.h +++ b/paddle/fluid/inference/tests/api/config_printer.h @@ -58,9 +58,8 @@ std::ostream &operator<<(std::ostream &os, const NativeConfig &config) { return os; } -std::ostream &operator<<(std::ostream &os, - const contrib::AnalysisConfig &config) { - os << GenSpaces(num_spaces) << "contrib::AnalysisConfig {\n"; +std::ostream &operator<<(std::ostream &os, const AnalysisConfig &config) { + os << GenSpaces(num_spaces) << "AnalysisConfig {\n"; num_spaces++; os << config.ToNativeConfig(); if (!config.model_from_memory()) { diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index b1f7a3464ac6027faffe283bccaf9793eae939e1..c743354e0e73097d23609c159cd14f73bc78e8ff 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -65,7 +65,7 @@ float Random(float low, float high) { void PrintConfig(const PaddlePredictor::Config *config, bool use_analysis) { const auto *analysis_config = - reinterpret_cast(config); + reinterpret_cast(config); if (use_analysis) { LOG(INFO) << *analysis_config; return; @@ -109,9 +109,9 @@ void CompareResult(const std::vector &outputs, std::unique_ptr CreateTestPredictor( const PaddlePredictor::Config *config, bool use_analysis = true) { const auto *analysis_config = - reinterpret_cast(config); + reinterpret_cast(config); if (use_analysis) { - return CreatePaddlePredictor(*analysis_config); + return CreatePaddlePredictor(*analysis_config); } auto native_config = analysis_config->ToNativeConfig(); return CreatePaddlePredictor(native_config); diff --git a/paddle/fluid/inference/tests/api/trt_models_tester.cc b/paddle/fluid/inference/tests/api/trt_models_tester.cc index db7109b7505d4fe4dcfcf88f303aa262bc5b44fb..d70b324a4a103eaede09fedb6422d2d130bffbaf 100644 --- a/paddle/fluid/inference/tests/api/trt_models_tester.cc +++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc @@ -42,9 +42,9 @@ void SetConfig(ConfigType* config, std::string model_dir, bool use_gpu, } template <> -void SetConfig(contrib::AnalysisConfig* config, - std::string model_dir, bool use_gpu, - bool use_tensorrt, int batch_size) { +void SetConfig(AnalysisConfig* config, std::string model_dir, + bool use_gpu, bool use_tensorrt, + int batch_size) { if (!FLAGS_prog_filename.empty() && !FLAGS_param_filename.empty()) { config->SetModel(model_dir + "/" + FLAGS_prog_filename, model_dir + "/" + FLAGS_param_filename); @@ -75,11 +75,11 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) { std::vector outputs; if (use_analysis || use_tensorrt) { - contrib::AnalysisConfig config; + AnalysisConfig config; config.EnableUseGpu(100, 0); config.pass_builder()->TurnOnDebug(); - SetConfig(&config, model_dir, true, use_tensorrt, - FLAGS_batch_size); + SetConfig(&config, model_dir, true, use_tensorrt, + FLAGS_batch_size); TestPrediction(reinterpret_cast(&config), inputs_all, &outputs, FLAGS_num_threads, true); } else { @@ -99,18 +99,18 @@ void compare(std::string model_dir, bool use_tensorrt) { SetFakeImageInput(&inputs_all, model_dir, false, "__model__", ""); } - contrib::AnalysisConfig analysis_config; - SetConfig(&analysis_config, model_dir, true, - use_tensorrt, FLAGS_batch_size); + AnalysisConfig analysis_config; + SetConfig(&analysis_config, model_dir, true, use_tensorrt, + FLAGS_batch_size); CompareNativeAndAnalysis( reinterpret_cast(&analysis_config), inputs_all); } void compare_continuous_input(std::string model_dir, bool use_tensorrt) { - contrib::AnalysisConfig analysis_config; - SetConfig(&analysis_config, model_dir, true, - use_tensorrt, FLAGS_batch_size); + AnalysisConfig analysis_config; + SetConfig(&analysis_config, model_dir, true, use_tensorrt, + FLAGS_batch_size); auto config = reinterpret_cast(&analysis_config); auto native_pred = CreateTestPredictor(config, false); diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc index 5d8684f083bda8499000c9fd0a7617cf129db13b..8759ec8096cf102ab85d2c2a91eddc23a6ed0e50 100644 --- a/paddle/fluid/memory/allocation/legacy_allocator.cc +++ b/paddle/fluid/memory/allocation/legacy_allocator.cc @@ -13,9 +13,15 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/legacy_allocator.h" + #include #include #include + +#ifdef PADDLE_WITH_JEMALLOC +#include +#endif + #include "glog/logging.h" #include "paddle/fluid/memory/detail/buddy_allocator.h" #include "paddle/fluid/memory/detail/system_allocator.h" @@ -95,7 +101,11 @@ struct NaiveAllocator { template <> void *Alloc(const platform::CPUPlace &place, size_t size) { VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); +#ifdef PADDLE_WITH_JEMALLOC + void *p = malloc(size); +#else void *p = GetCPUBuddyAllocator()->Alloc(size); +#endif if (FLAGS_init_allocated_mem) { memset(p, 0xEF, size); } @@ -107,12 +117,21 @@ template <> void Free(const platform::CPUPlace &place, void *p, size_t size) { VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place); +#ifdef PADDLE_WITH_JEMALLOC + free(p); +#else GetCPUBuddyAllocator()->Free(p); +#endif } template <> size_t Used(const platform::CPUPlace &place) { +#ifdef PADDLE_WITH_JEMALLOC + // fake the result of used memory when PADDLE_WITH_JEMALLOC is ON + return 0U; +#else return GetCPUBuddyAllocator()->Used(); +#endif } #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index 2395b181485429784e0f3dff6d056b84268ef245..f357e3ccf905309e6656f3fa87fbee45dc357c1e 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -9,9 +9,9 @@ http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - limitations under the License. */ +#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/poly_util.h" @@ -35,30 +35,45 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { auto box_dims = ctx->GetInputDim("BBoxes"); auto score_dims = ctx->GetInputDim("Scores"); + auto score_size = score_dims.size(); if (ctx->IsRuntime()) { + PADDLE_ENFORCE(score_size == 2 || score_size == 3, + "The rank of Input(Scores) must be 2 or 3"); PADDLE_ENFORCE_EQ(box_dims.size(), 3, - "The rank of Input(BBoxes) must be 3."); - PADDLE_ENFORCE_EQ(score_dims.size(), 3, - "The rank of Input(Scores) must be 3."); - PADDLE_ENFORCE(box_dims[2] == 4 || box_dims[2] == 8 || - box_dims[2] == 16 || box_dims[2] == 24 || - box_dims[2] == 32, - "The 2nd dimension of Input(BBoxes) must be 4 or 8, " - "represents the layout of coordinate " - "[xmin, ymin, xmax, ymax] or " - "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or " - "8 points: [xi, yi] i= 1,2,...,8 or " - "12 points: [xi, yi] i= 1,2,...,12 or " - "16 points: [xi, yi] i= 1,2,...,16"); - PADDLE_ENFORCE_EQ(box_dims[1], score_dims[2], - "The 1st dimensiong of Input(BBoxes) must be equal to " - "3rd dimension of Input(Scores), which represents the " - "predicted bboxes."); + "The rank of Input(BBoxes) must be 3"); + if (score_size == 3) { + PADDLE_ENFORCE(box_dims[2] == 4 || box_dims[2] == 8 || + box_dims[2] == 16 || box_dims[2] == 24 || + box_dims[2] == 32, + "The last dimension of Input(BBoxes) must be 4 or 8, " + "represents the layout of coordinate " + "[xmin, ymin, xmax, ymax] or " + "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or " + "8 points: [xi, yi] i= 1,2,...,8 or " + "12 points: [xi, yi] i= 1,2,...,12 or " + "16 points: [xi, yi] i= 1,2,...,16"); + PADDLE_ENFORCE_EQ( + box_dims[1], score_dims[2], + "The 2nd dimension of Input(BBoxes) must be equal to " + "last dimension of Input(Scores), which represents the " + "predicted bboxes."); + } else { + PADDLE_ENFORCE(box_dims[2] == 4, + "The last dimension of Input(BBoxes) must be 4"); + PADDLE_ENFORCE_EQ(box_dims[1], score_dims[1], + "The 2nd dimension of Input(BBoxes)" + "must be equal to the 2nd dimension" + " of Input(Scores)"); + } } // Here the box_dims[0] is not the real dimension of output. // It will be rewritten in the computing kernel. - ctx->SetOutputDim("Out", {box_dims[1], box_dims[2] + 2}); + if (score_size == 3) { + ctx->SetOutputDim("Out", {box_dims[1], box_dims[2] + 2}); + } else { + ctx->SetOutputDim("Out", {-1, box_dims[2] + 2}); + } } protected: @@ -123,8 +138,9 @@ static inline T JaccardOverlap(const T* box1, const T* box2, const T inter_ymin = std::max(box1[1], box2[1]); const T inter_xmax = std::min(box1[2], box2[2]); const T inter_ymax = std::min(box1[3], box2[3]); - const T inter_w = inter_xmax - inter_xmin; - const T inter_h = inter_ymax - inter_ymin; + T norm = normalized ? static_cast(0.) : static_cast(1.); + T inter_w = inter_xmax - inter_xmin + norm; + T inter_h = inter_ymax - inter_ymin + norm; const T inter_area = inter_w * inter_h; const T bbox1_area = BBoxArea(box1, normalized); const T bbox2_area = BBoxArea(box2, normalized); @@ -139,7 +155,7 @@ T PolyIoU(const T* box1, const T* box2, const size_t box_size, T bbox2_area = PolyArea(box2, box_size, normalized); T inter_area = PolyOverlapArea(box1, box2, box_size, normalized); if (bbox1_area == 0 || bbox2_area == 0 || inter_area == 0) { - // If coordinate values are is invalid + // If coordinate values are invalid // if area size <= 0, return 0. return T(0.); } else { @@ -147,12 +163,35 @@ T PolyIoU(const T* box1, const T* box2, const size_t box_size, } } +template +void SliceOneClass(const platform::DeviceContext& ctx, + const framework::Tensor& items, const int class_id, + framework::Tensor* one_class_item) { + T* item_data = one_class_item->mutable_data(ctx.GetPlace()); + const T* items_data = items.data(); + const int64_t num_item = items.dims()[0]; + const int class_num = items.dims()[1]; + if (items.dims().size() == 3) { + int item_size = items.dims()[2]; + for (int i = 0; i < num_item; ++i) { + std::memcpy(item_data + i * item_size, + items_data + i * class_num * item_size + class_id * item_size, + sizeof(T) * item_size); + } + } else { + for (int i = 0; i < num_item; ++i) { + item_data[i] = items_data[i * class_num + class_id]; + } + } +} + template class MultiClassNMSKernel : public framework::OpKernel { public: void NMSFast(const Tensor& bbox, const Tensor& scores, const T score_threshold, const T nms_threshold, const T eta, - const int64_t top_k, std::vector* selected_indices) const { + const int64_t top_k, std::vector* selected_indices, + const bool normalized) const { // The total boxes for each instance. int64_t num_boxes = bbox.dims()[0]; // 4: [xmin ymin xmax ymax] @@ -178,15 +217,16 @@ class MultiClassNMSKernel : public framework::OpKernel { T overlap = T(0.); // 4: [xmin ymin xmax ymax] if (box_size == 4) { - overlap = JaccardOverlap(bbox_data + idx * box_size, - bbox_data + kept_idx * box_size, true); + overlap = + JaccardOverlap(bbox_data + idx * box_size, + bbox_data + kept_idx * box_size, normalized); } // 8: [x1 y1 x2 y2 x3 y3 x4 y4] or 16, 24, 32 if (box_size == 8 || box_size == 16 || box_size == 24 || box_size == 32) { - overlap = - PolyIoU(bbox_data + idx * box_size, - bbox_data + kept_idx * box_size, box_size, true); + overlap = PolyIoU(bbox_data + idx * box_size, + bbox_data + kept_idx * box_size, box_size, + normalized); } keep = overlap <= adaptive_threshold; } else { @@ -205,37 +245,58 @@ class MultiClassNMSKernel : public framework::OpKernel { void MultiClassNMS(const framework::ExecutionContext& ctx, const Tensor& scores, const Tensor& bboxes, + const int scores_size, std::map>* indices, int* num_nmsed_out) const { int64_t background_label = ctx.Attr("background_label"); int64_t nms_top_k = ctx.Attr("nms_top_k"); int64_t keep_top_k = ctx.Attr("keep_top_k"); + bool normalized = ctx.Attr("normalized"); T nms_threshold = static_cast(ctx.Attr("nms_threshold")); T nms_eta = static_cast(ctx.Attr("nms_eta")); T score_threshold = static_cast(ctx.Attr("score_threshold")); + auto& dev_ctx = ctx.template device_context(); - int64_t class_num = scores.dims()[0]; - int64_t predict_dim = scores.dims()[1]; int num_det = 0; + + int64_t class_num = scores_size == 3 ? scores.dims()[0] : scores.dims()[1]; + Tensor bbox_slice, score_slice; for (int64_t c = 0; c < class_num; ++c) { if (c == background_label) continue; - Tensor score = scores.Slice(c, c + 1); - NMSFast(bboxes, score, score_threshold, nms_threshold, nms_eta, nms_top_k, - &((*indices)[c])); + if (scores_size == 3) { + score_slice = scores.Slice(c, c + 1); + bbox_slice = bboxes; + } else { + score_slice.Resize({scores.dims()[0], 1}); + bbox_slice.Resize({scores.dims()[0], 4}); + SliceOneClass(dev_ctx, scores, c, &score_slice); + SliceOneClass(dev_ctx, bboxes, c, &bbox_slice); + } + NMSFast(bbox_slice, score_slice, score_threshold, nms_threshold, nms_eta, + nms_top_k, &((*indices)[c]), normalized); + if (scores_size == 2) { + std::stable_sort((*indices)[c].begin(), (*indices)[c].end()); + } num_det += (*indices)[c].size(); } *num_nmsed_out = num_det; const T* scores_data = scores.data(); if (keep_top_k > -1 && num_det > keep_top_k) { + const T* sdata; std::vector>> score_index_pairs; for (const auto& it : *indices) { int label = it.first; - const T* sdata = scores_data + label * predict_dim; + if (scores_size == 3) { + sdata = scores_data + label * scores.dims()[1]; + } else { + score_slice.Resize({scores.dims()[0], 1}); + SliceOneClass(dev_ctx, scores, label, &score_slice); + sdata = score_slice.data(); + } const std::vector& label_indices = it.second; for (size_t j = 0; j < label_indices.size(); ++j) { int idx = label_indices[j]; - PADDLE_ENFORCE_LT(idx, predict_dim); score_index_pairs.push_back( std::make_pair(sdata[idx], std::make_pair(label, idx))); } @@ -252,31 +313,55 @@ class MultiClassNMSKernel : public framework::OpKernel { int idx = score_index_pairs[j].second.second; new_indices[label].push_back(idx); } + if (scores_size == 2) { + for (const auto& it : new_indices) { + int label = it.first; + std::stable_sort(new_indices[label].begin(), + new_indices[label].end()); + } + } new_indices.swap(*indices); *num_nmsed_out = keep_top_k; } } - void MultiClassOutput(const Tensor& scores, const Tensor& bboxes, + void MultiClassOutput(const platform::DeviceContext& ctx, + const Tensor& scores, const Tensor& bboxes, const std::map>& selected_indices, - Tensor* outs) const { + const int scores_size, Tensor* outs) const { + int64_t class_num = scores.dims()[1]; int64_t predict_dim = scores.dims()[1]; int64_t box_size = bboxes.dims()[1]; - int64_t out_dim = bboxes.dims()[1] + 2; + if (scores_size == 2) { + box_size = bboxes.dims()[2]; + } + int64_t out_dim = box_size + 2; auto* scores_data = scores.data(); auto* bboxes_data = bboxes.data(); auto* odata = outs->data(); - + const T* sdata; + Tensor bbox; + bbox.Resize({scores.dims()[0], box_size}); int count = 0; for (const auto& it : selected_indices) { int label = it.first; - const T* sdata = scores_data + label * predict_dim; const std::vector& indices = it.second; + if (scores_size == 2) { + SliceOneClass(ctx, bboxes, label, &bbox); + } else { + sdata = scores_data + label * predict_dim; + } for (size_t j = 0; j < indices.size(); ++j) { int idx = indices[j]; - const T* bdata = bboxes_data + idx * box_size; - odata[count * out_dim] = label; // label - odata[count * out_dim + 1] = sdata[idx]; // score + odata[count * out_dim] = label; // label + const T* bdata; + if (scores_size == 3) { + bdata = bboxes_data + idx * box_size; + odata[count * out_dim + 1] = sdata[idx]; // score + } else { + bdata = bbox.data() + idx * box_size; + odata[count * out_dim + 1] = *(scores_data + idx * class_num + label); + } // xmin, ymin, xmax, ymax or multi-points coordinates std::memcpy(odata + count * out_dim + 2, bdata, box_size * sizeof(T)); count++; @@ -285,52 +370,64 @@ class MultiClassNMSKernel : public framework::OpKernel { } void Compute(const framework::ExecutionContext& ctx) const override { - auto* boxes = ctx.Input("BBoxes"); - auto* scores = ctx.Input("Scores"); + auto* boxes = ctx.Input("BBoxes"); + auto* scores = ctx.Input("Scores"); auto* outs = ctx.Output("Out"); auto score_dims = scores->dims(); - - int64_t batch_size = score_dims[0]; - int64_t class_num = score_dims[1]; - int64_t predict_dim = score_dims[2]; - int64_t box_dim = boxes->dims()[2]; - int64_t out_dim = boxes->dims()[2] + 2; + auto score_size = score_dims.size(); + auto& dev_ctx = ctx.template device_context(); std::vector>> all_indices; std::vector batch_starts = {0}; - for (int64_t i = 0; i < batch_size; ++i) { - Tensor ins_score = scores->Slice(i, i + 1); - ins_score.Resize({class_num, predict_dim}); - - Tensor ins_boxes = boxes->Slice(i, i + 1); - ins_boxes.Resize({predict_dim, box_dim}); - + int64_t batch_size = score_dims[0]; + int64_t box_dim = boxes->dims()[2]; + int64_t out_dim = box_dim + 2; + int num_nmsed_out = 0; + Tensor boxes_slice, scores_slice; + int n = score_size == 3 ? batch_size : boxes->lod().back().size() - 1; + for (int i = 0; i < n; ++i) { + if (score_size == 3) { + scores_slice = scores->Slice(i, i + 1); + scores_slice.Resize({score_dims[1], score_dims[2]}); + boxes_slice = boxes->Slice(i, i + 1); + boxes_slice.Resize({score_dims[2], box_dim}); + } else { + auto boxes_lod = boxes->lod().back(); + scores_slice = scores->Slice(boxes_lod[i], boxes_lod[i + 1]); + boxes_slice = boxes->Slice(boxes_lod[i], boxes_lod[i + 1]); + } std::map> indices; - int num_nmsed_out = 0; - MultiClassNMS(ctx, ins_score, ins_boxes, &indices, &num_nmsed_out); + MultiClassNMS(ctx, scores_slice, boxes_slice, score_size, &indices, + &num_nmsed_out); all_indices.push_back(indices); batch_starts.push_back(batch_starts.back() + num_nmsed_out); } int num_kept = batch_starts.back(); if (num_kept == 0) { - T* od = outs->mutable_data({1}, ctx.GetPlace()); + T* od = outs->mutable_data({1, 1}, ctx.GetPlace()); od[0] = -1; + batch_starts = {0, 1}; } else { outs->mutable_data({num_kept, out_dim}, ctx.GetPlace()); - for (int64_t i = 0; i < batch_size; ++i) { - Tensor ins_score = scores->Slice(i, i + 1); - ins_score.Resize({class_num, predict_dim}); - - Tensor ins_boxes = boxes->Slice(i, i + 1); - ins_boxes.Resize({predict_dim, box_dim}); - + for (int i = 0; i < n; ++i) { + if (score_size == 3) { + scores_slice = scores->Slice(i, i + 1); + boxes_slice = boxes->Slice(i, i + 1); + scores_slice.Resize({score_dims[1], score_dims[2]}); + boxes_slice.Resize({score_dims[2], box_dim}); + } else { + auto boxes_lod = boxes->lod().back(); + scores_slice = scores->Slice(boxes_lod[i], boxes_lod[i + 1]); + boxes_slice = boxes->Slice(boxes_lod[i], boxes_lod[i + 1]); + } int64_t s = batch_starts[i]; int64_t e = batch_starts[i + 1]; if (e > s) { Tensor out = outs->Slice(s, e); - MultiClassOutput(ins_score, ins_boxes, all_indices[i], &out); + MultiClassOutput(dev_ctx, scores_slice, boxes_slice, all_indices[i], + score_dims.size(), &out); } } } @@ -346,17 +443,24 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { AddInput("BBoxes", - "(Tensor) A 3-D Tensor with shape " + "Two types of bboxes are supported:" + "1. (Tensor) A 3-D Tensor with shape " "[N, M, 4 or 8 16 24 32] represents the " "predicted locations of M bounding bboxes, N is the batch size. " "Each bounding box has four coordinate values and the layout is " - "[xmin, ymin, xmax, ymax], when box size equals to 4."); + "[xmin, ymin, xmax, ymax], when box size equals to 4." + "2. (LoDTensor) A 3-D Tensor with shape [M, C, 4]" + "M is the number of bounding boxes, C is the class number"); AddInput("Scores", - "(Tensor) A 3-D Tensor with shape [N, C, M] represents the " + "Two types of scores are supported:" + "1. (Tensor) A 3-D Tensor with shape [N, C, M] represents the " "predicted confidence predictions. N is the batch size, C is the " "class number, M is number of bounding boxes. For each category " "there are total M scores which corresponding M bounding boxes. " - " Please note, M is equal to the 1st dimension of BBoxes. "); + " Please note, M is equal to the 2nd dimension of BBoxes. " + "2. (LoDTensor) A 2-D LoDTensor with shape [M, C]. " + "M is the number of bbox, C is the class number. In this case, " + "Input BBoxes should be the second case with shape [M, C, 4]."); AddAttr( "background_label", "(int, defalut: 0) " @@ -384,6 +488,10 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker { "(int64_t) " "Number of total bboxes to be kept per image after NMS " "step. -1 means keeping all bboxes after NMS step."); + AddAttr("normalized", + "(bool, default true) " + "Whether detections are normalized.") + .SetDefault(true); AddOutput("Out", "(LoDTensor) A 2-D LoDTensor with shape [No, 6] represents the " "detections. Each row has 6 values: " @@ -399,24 +507,21 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker { AddComment(R"DOC( This operator is to do multi-class non maximum suppression (NMS) on a batched of boxes and scores. - In the NMS step, this operator greedily selects a subset of detection bounding boxes that have high scores larger than score_threshold, if providing this threshold, then selects the largest nms_top_k confidences scores if nms_top_k is larger than -1. Then this operator pruns away boxes that have high IOU (intersection over union) overlap with already selected boxes by adaptive threshold NMS based on parameters of nms_threshold and nms_eta. - Aftern NMS step, at most keep_top_k number of total bboxes are to be kept per image if keep_top_k is larger than -1. - This operator support multi-class and batched inputs. It applying NMS independently for each class. The outputs is a 2-D LoDTenosr, for each image, the offsets in first dimension of LoDTensor are called LoD, the number of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0, means there is no detected bbox for this image. If there is no detected boxes -for all images, all the elements in LoD are 0, and the Out only contains one -value which is -1. +for all images, all the elements in LoD are set to {1}, and the Out only +contains one value which is -1. )DOC"); } }; diff --git a/paddle/fluid/operators/distributed/proto_encoder_helper.h b/paddle/fluid/operators/distributed/proto_encoder_helper.h index 27ca1f4edc04f5fca54b1a6340243634a596939c..e9f06f54327875c0568c571627e9effb998e15be 100644 --- a/paddle/fluid/operators/distributed/proto_encoder_helper.h +++ b/paddle/fluid/operators/distributed/proto_encoder_helper.h @@ -85,7 +85,7 @@ class ProtoEncodeHelper { #define REPLACE_ENFORCE_GLOG 1 // Make sure callers didn't do operations that went over max_size promised if (paddle::platform::is_error(p_ <= limit_)) { - paddle::platform::throw_on_error(p_ <= limit_); + paddle::platform::throw_on_error(p_ <= limit_, ""); } #undef REPLACE_ENFORCE_GLOG } diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 15413785bab3c0fd77244141e8f1840ca0cc1356..142d38f0609d963ce3ff45c595b8432b0e5edd21 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -71,9 +71,8 @@ struct EnforceNotMet : public std::exception { } } - template - EnforceNotMet(const char* f, int l, ARGS... args) { - Init(string::Sprintf(args...), f, l); + EnforceNotMet(const std::string& str, const char* f, int l) { + Init(str, f, l); } const char* what() const noexcept override { return err_str_.c_str(); } @@ -142,28 +141,23 @@ struct EOFException : public std::exception { inline bool is_error(bool stat) { return !stat; } -template -inline typename std::enable_if::type throw_on_error( - bool stat, const Args&... args) { +inline void throw_on_error(bool stat, const std::string& msg) { #ifndef REPLACE_ENFORCE_GLOG - throw std::runtime_error(string::Sprintf(args...)); + throw std::runtime_error(msg); #else - LOG(FATAL) << string::Sprintf(args...); + LOG(FATAL) << msg; #endif } #ifdef PADDLE_WITH_CUDA -inline bool is_error(cudaError_t e) { return UNLIKELY(e); } +inline bool is_error(cudaError_t e) { return e != cudaSuccess; } -template -inline typename std::enable_if::type throw_on_error( - cudaError_t e, const Args&... args) { +inline void throw_on_error(cudaError_t e, const std::string& msg) { #ifndef REPLACE_ENFORCE_GLOG - throw thrust::system_error(e, thrust::cuda_category(), - string::Sprintf(args...)); + throw thrust::system_error(e, thrust::cuda_category(), msg); #else - LOG(FATAL) << string::Sprintf(args...); + LOG(FATAL) << msg; #endif } @@ -171,14 +165,12 @@ inline bool is_error(curandStatus_t stat) { return stat != CURAND_STATUS_SUCCESS; } -template -inline typename std::enable_if::type throw_on_error( - curandStatus_t stat, const Args&... args) { +inline void throw_on_error(curandStatus_t stat, const std::string& msg) { #ifndef REPLACE_ENFORCE_GLOG throw thrust::system_error(cudaErrorLaunchFailure, thrust::cuda_category(), - string::Sprintf(args...)); + msg); #else - LOG(FATAL) << string::Sprintf(args...); + LOG(FATAL) << msg; #endif } @@ -186,14 +178,11 @@ inline bool is_error(cudnnStatus_t stat) { return stat != CUDNN_STATUS_SUCCESS; } -template -inline typename std::enable_if::type throw_on_error( - cudnnStatus_t stat, const Args&... args) { +inline void throw_on_error(cudnnStatus_t stat, const std::string& msg) { #ifndef REPLACE_ENFORCE_GLOG - throw std::runtime_error(platform::dynload::cudnnGetErrorString(stat) + - string::Sprintf(args...)); + throw std::runtime_error(platform::dynload::cudnnGetErrorString(stat) + msg); #else - LOG(FATAL) << string::Sprintf(args...); + LOG(FATAL) << platform::dynload::cudnnGetErrorString(stat) << msg; #endif } @@ -201,9 +190,7 @@ inline bool is_error(cublasStatus_t stat) { return stat != CUBLAS_STATUS_SUCCESS; } -template -inline typename std::enable_if::type throw_on_error( - cublasStatus_t stat, const Args&... args) { +inline void throw_on_error(cublasStatus_t stat, const std::string& msg) { std::string err; if (stat == CUBLAS_STATUS_NOT_INITIALIZED) { err = "CUBLAS: not initialized, "; @@ -225,87 +212,45 @@ inline typename std::enable_if::type throw_on_error( err = "CUBLAS: license error, "; } #ifndef REPLACE_ENFORCE_GLOG - throw std::runtime_error(err + string::Sprintf(args...)); + throw std::runtime_error(err + msg); #else - LOG(FATAL) << err << string::Sprintf(args...); + LOG(FATAL) << err << msg; #endif } #if !defined(__APPLE__) && !defined(_WIN32) -template -inline typename std::enable_if::type throw_on_error( - ncclResult_t stat, const Args&... args) { - if (stat == ncclSuccess) { - return; - } else { +inline bool is_error(ncclResult_t nccl_result) { + return nccl_result != ncclSuccess; +} + +inline void throw_on_error(ncclResult_t stat, const std::string& msg) { #ifndef REPLACE_ENFORCE_GLOG - throw std::runtime_error(platform::dynload::ncclGetErrorString(stat) + - string::Sprintf(args...)); + throw std::runtime_error(platform::dynload::ncclGetErrorString(stat) + msg); #else - LOG(FATAL) << platform::dynload::ncclGetErrorString(stat) - << string::Sprintf(args...); + LOG(FATAL) << platform::dynload::ncclGetErrorString(stat) << msg; #endif - } } #endif // __APPLE__ and windows #endif // PADDLE_WITH_CUDA -template -inline void throw_on_error(T e) { - throw_on_error(e, ""); -} - -#define PADDLE_THROW(...) \ - throw ::paddle::platform::EnforceNotMet(__FILE__, __LINE__, __VA_ARGS__) - -#define __PADDLE_THROW_ERROR_I(_, _9, _8, _7, _6, _5, _4, _3, _2, X_, ...) X_; - -#define __THROW_ON_ERROR_ONE_ARG(COND, ARG) \ - ::paddle::platform::throw_on_error(COND, ::paddle::string::Sprintf(ARG)); - -#ifdef _WIN32 -#define __PADDLE_THROW_ON_ERROR(COND, ...) \ - __THROW_ON_ERROR_ONE_ARG(COND, __VA_ARGS__) -#else // _WIN32 -#define __PADDLE_THROW_ON_ERROR(COND, ...) \ - __PADDLE_THROW_ERROR_I( \ - __VA_ARGS__, ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ - ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ - ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ - ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ - ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ - ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ - ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ - ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ - __THROW_ON_ERROR_ONE_ARG(COND, __VA_ARGS__)) -#endif // _WIN32 - -#define __PADDLE_UNARY_COMPARE(COND, ...) \ - do { \ - auto __cond = COND; \ - if (UNLIKELY(::paddle::platform::is_error(__cond))) { \ - __PADDLE_THROW_ON_ERROR(__cond, __VA_ARGS__); \ - } \ +#define PADDLE_THROW(...) \ + throw ::paddle::platform::EnforceNotMet( \ + ::paddle::string::Sprintf(__VA_ARGS__), __FILE__, __LINE__) + +#define PADDLE_ENFORCE(COND, ...) \ + do { \ + auto __cond__ = (COND); \ + if (UNLIKELY(::paddle::platform::is_error(__cond__))) { \ + try { \ + ::paddle::platform::throw_on_error( \ + __cond__, ::paddle::string::Sprintf(__VA_ARGS__)); \ + } catch (...) { \ + throw ::paddle::platform::EnforceNotMet(std::current_exception(), \ + __FILE__, __LINE__); \ + } \ + } \ } while (0) -#ifndef REPLACE_ENFORCE_GLOG -#define __PADDLE_ENFORCE_I(COND, ...) \ - do { \ - try { \ - __PADDLE_UNARY_COMPARE(COND, __VA_ARGS__); \ - } catch (...) { \ - throw ::paddle::platform::EnforceNotMet(std::current_exception(), \ - __FILE__, __LINE__); \ - } \ - } while (0) - -#else -#define __PADDLE_ENFORCE_I(COND, ...) __PADDLE_UNARY_COMPARE(COND, __VA_ARGS__); -#endif // REPLACE_ENFORCE_GLOG - -#define __PADDLE_ENFORCE(__args) __PADDLE_ENFORCE_I __args -#define PADDLE_ENFORCE(...) __PADDLE_ENFORCE((__VA_ARGS__)) - #define PADDLE_THROW_EOF() \ do { \ throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \ diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index 8df8e32098697540f02d488c873f5ae7fb29828e..6ae21ee8294bedc388f837aad3e20a2b9aca98a2 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -64,7 +64,7 @@ class NCCLGroupGuard { } inline ~NCCLGroupGuard() { - CHECK_EQ(dynload::ncclGroupEnd(), ncclSuccess); + PADDLE_ENFORCE(dynload::ncclGroupEnd()); NCCLMutex().unlock(); } }; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index e05667d2c7e9ce5c64cfacee4919cd36d7383c0c..39e47be606c07ed216c9fe2ff8fa75552b8b7c76 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -33,7 +33,6 @@ using paddle::PaddlePredictor; using paddle::NativeConfig; using paddle::NativePaddlePredictor; using paddle::AnalysisPredictor; -using paddle::contrib::AnalysisConfig; static void BindPaddleDType(py::module *m); static void BindPaddleBuf(py::module *m); diff --git a/paddle/fluid/string/printf.h b/paddle/fluid/string/printf.h index 0b94b60018aac3a61edfda4d7ecb762e9fe70673..16bb3771f2e9bcc07028ef2039fed8691f9aab97 100644 --- a/paddle/fluid/string/printf.h +++ b/paddle/fluid/string/printf.h @@ -84,6 +84,8 @@ void Fprintf(std::ostream& out, const char* fmt, const Args&... args) { tinyformat::vformat(out, fmt, tinyformat::makeFormatList(args...)); } +inline std::string Sprintf() { return ""; } + template std::string Sprintf(const Args&... args) { std::ostringstream oss; diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index c2156a436ec73d03082fa08b6250dc77b2cee19f..1135caf4f8c32901d93270d372fdaac702acf006 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -173,7 +173,6 @@ function cmake_gen() { -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} ${PYTHON_FLAGS} -DWITH_DSO=ON - -DWITH_DOC=${WITH_DOC:-OFF} -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF} -DWITH_DISTRIBUTE=${distibuted_flag} @@ -208,7 +207,6 @@ EOF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release} \ ${PYTHON_FLAGS} \ -DWITH_DSO=ON \ - -DWITH_DOC=${WITH_DOC:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AMD_GPU=${WITH_AMD_GPU:-OFF} \ -DWITH_DISTRIBUTE=${distibuted_flag} \ @@ -528,31 +526,6 @@ function bind_test() { wait } - -function gen_docs() { - mkdir -p ${PADDLE_ROOT}/build - cd ${PADDLE_ROOT}/build - cat <= 2: + if batch_id >= batch_num: break - x_data = np.array( + dy_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape( 128, 1) - img = to_variable(x_data) + img = to_variable(dy_x_data) label = to_variable(y_data) label._stop_gradient = True @@ -136,6 +137,7 @@ class TestImperativeMnist(unittest.TestCase): avg_loss._backward() sgd.minimize(avg_loss) + mnist.clear_gradients() dy_param_value = {} for param in fluid.default_main_program().global_block( ).all_parameters(): @@ -175,10 +177,10 @@ class TestImperativeMnist(unittest.TestCase): static_param_init_value[static_param_name_list[i]] = out[i] for batch_id, data in enumerate(train_reader()): - if batch_id >= 2: + if batch_id >= batch_num: break - x_data = np.array( + static_x_data = np.array( [x[0].reshape(1, 28, 28) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape( [128, 1]) @@ -186,7 +188,7 @@ class TestImperativeMnist(unittest.TestCase): fetch_list = [avg_loss.name] fetch_list.extend(static_param_name_list) out = exe.run(fluid.default_main_program(), - feed={"pixel": x_data, + feed={"pixel": static_x_data, "label": y_data}, fetch_list=fetch_list) @@ -196,11 +198,12 @@ class TestImperativeMnist(unittest.TestCase): static_param_value[static_param_name_list[i - 1]] = out[i] for key, value in six.iteritems(static_param_init_value): - self.assertTrue( - np.allclose(value.all(), dy_param_init_value[key].all())) - self.assertTrue(np.allclose(static_out.all(), dy_out.all())) + self.assertTrue(np.allclose(value, dy_param_init_value[key])) + + self.assertTrue(np.allclose(static_out, dy_out)) + for key, value in six.iteritems(static_param_value): - self.assertTrue(np.allclose(value.all(), dy_param_value[key].all())) + self.assertTrue(np.allclose(value, dy_param_value[key])) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index 87a72dd04e376cf9225e275d862b0cbbb9774e2c..c27fd0b8024a8fa3310a62de34299fb621e2902f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -264,6 +264,7 @@ class TestImperativeResnet(unittest.TestCase): )] = np_array optimizer.minimize(avg_loss) + resnet.clear_gradients() dy_param_value = {} for param in fluid.default_main_program().global_block( diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index c13f03e86f3e375026b04a31d51ac1a5223360ef..e7bc1601a54c8615e0e787d74145aa4987b6cb88 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -58,7 +58,8 @@ class TestBook(unittest.TestCase): def test_simple_conv2d(self): program = Program() with program_guard(program, startup_program=Program()): - images = layers.data(name='pixel', shape=[3, 48, 48], dtype='int32') + images = layers.data( + name='pixel', shape=[3, 48, 48], dtype='float32') layers.conv2d(input=images, num_filters=3, filter_size=[4, 4]) print(str(program)) diff --git a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py index 9778bd694de4b21f3ff723846c77a8ad0dceb57b..8fc391a1ff2529460b038979c0c7d0a9d905a7e0 100644 --- a/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py +++ b/python/paddle/fluid/tests/unittests/test_multiclass_nms_op.py @@ -19,7 +19,7 @@ import copy from op_test import OpTest -def iou(box_a, box_b): +def iou(box_a, box_b, norm): """Apply intersection-over-union overlap between box_a and box_b """ xmin_a = min(box_a[0], box_a[2]) @@ -32,8 +32,10 @@ def iou(box_a, box_b): xmax_b = max(box_b[0], box_b[2]) ymax_b = max(box_b[1], box_b[3]) - area_a = (ymax_a - ymin_a) * (xmax_a - xmin_a) - area_b = (ymax_b - ymin_b) * (xmax_b - xmin_b) + area_a = (ymax_a - ymin_a + (norm == False)) * (xmax_a - xmin_a + + (norm == False)) + area_b = (ymax_b - ymin_b + (norm == False)) * (xmax_b - xmin_b + + (norm == False)) if area_a <= 0 and area_b <= 0: return 0.0 @@ -42,17 +44,21 @@ def iou(box_a, box_b): xb = min(xmax_a, xmax_b) yb = min(ymax_a, ymax_b) - inter_area = max(xb - xa, 0.0) * max(yb - ya, 0.0) - - box_a_area = (box_a[2] - box_a[0]) * (box_a[3] - box_a[1]) - box_b_area = (box_b[2] - box_b[0]) * (box_b[3] - box_b[1]) + inter_area = max(xb - xa + (norm == False), + 0.0) * max(yb - ya + (norm == False), 0.0) iou_ratio = inter_area / (area_a + area_b - inter_area) return iou_ratio -def nms(boxes, scores, score_threshold, nms_threshold, top_k=200, eta=1.0): +def nms(boxes, + scores, + score_threshold, + nms_threshold, + top_k=200, + normalized=True, + eta=1.0): """Apply non-maximum suppression at test time to avoid detecting too many overlapping bounding boxes for a given object. Args: @@ -87,7 +93,7 @@ def nms(boxes, scores, score_threshold, nms_threshold, top_k=200, eta=1.0): for k in range(len(selected_indices)): if keep: kept_idx = selected_indices[k] - overlap = iou(boxes[idx], boxes[kept_idx]) + overlap = iou(boxes[idx], boxes[kept_idx], normalized) keep = True if overlap <= adaptive_threshold else False else: break @@ -99,16 +105,24 @@ def nms(boxes, scores, score_threshold, nms_threshold, top_k=200, eta=1.0): def multiclass_nms(boxes, scores, background, score_threshold, nms_threshold, - nms_top_k, keep_top_k): - class_num = scores.shape[0] - priorbox_num = scores.shape[1] + nms_top_k, keep_top_k, normalized, shared): + if shared: + class_num = scores.shape[0] + priorbox_num = scores.shape[1] + else: + box_num = scores.shape[0] + class_num = scores.shape[1] selected_indices = {} num_det = 0 for c in range(class_num): if c == background: continue - indices = nms(boxes, scores[c], score_threshold, nms_threshold, - nms_top_k) + if shared: + indices = nms(boxes, scores[c], score_threshold, nms_threshold, + nms_top_k, normalized) + else: + indices = nms(boxes[:, c, :], scores[:, c], score_threshold, + nms_threshold, nms_top_k, normalized) selected_indices[c] = indices num_det += len(indices) @@ -116,7 +130,10 @@ def multiclass_nms(boxes, scores, background, score_threshold, nms_threshold, score_index = [] for c, indices in selected_indices.items(): for idx in indices: - score_index.append((scores[c][idx], c, idx)) + if shared: + score_index.append((scores[c][idx], c, idx)) + else: + score_index.append((scores[idx][c], c, idx)) sorted_score_index = sorted( score_index, key=lambda tup: tup[0], reverse=True) @@ -127,24 +144,75 @@ def multiclass_nms(boxes, scores, background, score_threshold, nms_threshold, selected_indices[c] = [] for s, c, idx in sorted_score_index: selected_indices[c].append(idx) + if not shared: + for labels in selected_indices: + selected_indices[labels].sort() num_det = keep_top_k return selected_indices, num_det -def batched_multiclass_nms(boxes, scores, background, score_threshold, - nms_threshold, nms_top_k, keep_top_k): +def lod_multiclass_nms(boxes, scores, background, score_threshold, + nms_threshold, nms_top_k, keep_top_k, box_lod, + normalized): + det_outs = [] + lod = [] + head = 0 + for n in range(len(box_lod[0])): + box = boxes[head:head + box_lod[0][n]] + score = scores[head:head + box_lod[0][n]] + head = head + box_lod[0][n] + nmsed_outs, nmsed_num = multiclass_nms( + box, + score, + background, + score_threshold, + nms_threshold, + nms_top_k, + keep_top_k, + normalized, + shared=False) + if nmsed_num == 0: + #lod.append(1) + continue + lod.append(nmsed_num) + for c, indices in nmsed_outs.items(): + for idx in indices: + xmin, ymin, xmax, ymax = box[idx, c, :] + det_outs.append([c, score[idx][c], xmin, ymin, xmax, ymax]) + if len(lod) == 0: + lod.append(1) + + return det_outs, lod + + +def batched_multiclass_nms(boxes, + scores, + background, + score_threshold, + nms_threshold, + nms_top_k, + keep_top_k, + normalized=True): batch_size = scores.shape[0] det_outs = [] lod = [] for n in range(batch_size): - nmsed_outs, nmsed_num = multiclass_nms(boxes[n], scores[n], background, - score_threshold, nms_threshold, - nms_top_k, keep_top_k) - lod.append(nmsed_num) - if nmsed_num == 0: continue + nmsed_outs, nmsed_num = multiclass_nms( + boxes[n], + scores[n], + background, + score_threshold, + nms_threshold, + nms_top_k, + keep_top_k, + normalized, + shared=True) + if nmsed_num == 0: + continue + lod.append(nmsed_num) tmp_det_out = [] for c, indices in nmsed_outs.items(): for idx in indices: @@ -154,7 +222,8 @@ def batched_multiclass_nms(boxes, scores, background, score_threshold, sorted_det_out = sorted( tmp_det_out, key=lambda tup: tup[0], reverse=False) det_outs.extend(sorted_det_out) - + if len(lod) == 0: + lod += [1] return det_outs, lod @@ -168,7 +237,6 @@ class TestMulticlassNMSOp(OpTest): M = 1200 C = 21 BOX_SIZE = 4 - background = 0 nms_threshold = 0.3 nms_top_k = 400 @@ -206,6 +274,7 @@ class TestMulticlassNMSOp(OpTest): 'keep_top_k': keep_top_k, 'score_threshold': score_threshold, 'nms_eta': 1.0, + 'normalized': True, } def test_check_output(self): @@ -219,13 +288,70 @@ class TestMulticlassNMSOpNoOutput(TestMulticlassNMSOp): self.score_threshold = 2.0 +class TestMulticlassNMSLoDInput(OpTest): + def set_argument(self): + self.score_threshold = 0.01 + + def setUp(self): + self.set_argument() + M = 1200 + C = 21 + BOX_SIZE = 4 + box_lod = [[1200]] + background = 0 + nms_threshold = 0.3 + nms_top_k = 400 + keep_top_k = 200 + score_threshold = self.score_threshold + normalized = False + + scores = np.random.random((M, C)).astype('float32') + + def softmax(x): + shiftx = x - np.max(x).clip(-64.) + exps = np.exp(shiftx) + return exps / np.sum(exps) + + scores = np.apply_along_axis(softmax, 1, scores) + + boxes = np.random.random((M, C, BOX_SIZE)).astype('float32') + boxes[:, :, 0] = boxes[:, :, 0] * 10 + boxes[:, :, 1] = boxes[:, :, 1] * 10 + boxes[:, :, 2] = boxes[:, :, 2] * 10 + 10 + boxes[:, :, 3] = boxes[:, :, 3] * 10 + 10 + + nmsed_outs, lod = lod_multiclass_nms( + boxes, scores, background, score_threshold, nms_threshold, + nms_top_k, keep_top_k, box_lod, normalized) + nmsed_outs = [-1] if not nmsed_outs else nmsed_outs + nmsed_outs = np.array(nmsed_outs).astype('float32') + self.op_type = 'multiclass_nms' + self.inputs = { + 'BBoxes': (boxes, box_lod), + 'Scores': (scores, box_lod), + } + self.outputs = {'Out': (nmsed_outs, [lod])} + self.attrs = { + 'background_label': 0, + 'nms_threshold': nms_threshold, + 'nms_top_k': nms_top_k, + 'keep_top_k': keep_top_k, + 'score_threshold': score_threshold, + 'nms_eta': 1.0, + 'normalized': normalized, + } + + def test_check_output(self): + self.check_output() + + class TestIOU(unittest.TestCase): def test_iou(self): box1 = np.array([4.0, 3.0, 7.0, 5.0]).astype('float32') box2 = np.array([3.0, 4.0, 6.0, 8.0]).astype('float32') expt_output = np.array([2.0 / 16.0]).astype('float32') - calc_output = np.array([iou(box1, box2)]).astype('float32') + calc_output = np.array([iou(box1, box2, True)]).astype('float32') self.assertTrue(np.allclose(calc_output, expt_output))