diff --git a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/CMakeLists.txt b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/CMakeLists.txt index 4f148869f84e42fbc6bdb29ba42f5a9b274b2397..ad86983d91219d4e71d32dc9a4a1632add362ecc 100755 --- a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/CMakeLists.txt +++ b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/CMakeLists.txt @@ -47,6 +47,8 @@ else () find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/share/OpenCV NO_DEFAULT_PATH) include_directories("${PADDLE_LIB}/paddle/include") link_directories("${PADDLE_LIB}/paddle/lib") + link_directories("${PADDLE_LIB}/third_party/install/paddle2onnx/lib") + link_directories("${PADDLE_LIB}/third_party/install/onnxruntime/lib") endif () include_directories(${OpenCV_INCLUDE_DIRS}) diff --git a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/README.md b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/README.md index ae168ae8faea0c2cbffe1342459bcfc50ad3e47c..397ac01cec59be85eed84f15ba0b634049a9a416 100644 --- a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/README.md +++ b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/README.md @@ -91,7 +91,7 @@ cd build cmake .. \ -DWITH_CONTRIB=OFF \ -DWITH_MKL=ON \ - -DWITH_MKLDNN=ON \ + -DWITH_MKLDNN=OFF \ -DWITH_TESTING=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DWITH_INFERENCE_API_TEST=OFF \ @@ -205,9 +205,6 @@ make -j * gpu_id:使用的GPU卡号; * gpu_mem:显存; * cpu_math_library_num_threads:底层科学计算库所用线程的数量; - * use_mkldnn:是否使用MKLDNN加速; - * use_tensorrt: 是否使用tensorRT进行加速; - * use_fp16:是否使用半精度浮点数进行计算,该选项仅在use_tensorrt为true时有效; * cls_model_path:预测模型结构文件路径; * cls_params_path:预测模型参数文件路径; * resize_short_size:预处理时图像缩放大小; diff --git a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/include/cls.h b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/include/cls.h index f7a8711e7d48bf48c9f068de22003650e39c9bc5..618be51d9a55d30bd6525877294d5d11f82a6b7c 100644 --- a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/include/cls.h +++ b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/include/cls.h @@ -40,16 +40,12 @@ public: const std::string ¶ms_path, const bool &use_gpu, const int &gpu_id, const int &gpu_mem, const int &cpu_math_library_num_threads, - const bool &use_mkldnn, const bool &use_tensorrt, - const bool &use_fp16, const int &resize_short_size, + const int &resize_short_size, const int &crop_size) { this->use_gpu_ = use_gpu; this->gpu_id_ = gpu_id; this->gpu_mem_ = gpu_mem; this->cpu_math_library_num_threads_ = cpu_math_library_num_threads; - this->use_mkldnn_ = use_mkldnn; - this->use_tensorrt_ = use_tensorrt; - this->use_fp16_ = use_fp16; this->resize_short_size_ = resize_short_size; this->crop_size_ = crop_size; @@ -70,9 +66,6 @@ private: int gpu_id_ = 0; int gpu_mem_ = 4000; int cpu_math_library_num_threads_ = 4; - bool use_mkldnn_ = false; - bool use_tensorrt_ = false; - bool use_fp16_ = false; std::vector mean_ = {0.485f, 0.456f, 0.406f}; std::vector scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f}; diff --git a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/include/cls_config.h b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/include/cls_config.h index 231738b4b5ae21026a1b3d25919ccc187368a903..7b0fac0d85387d81b51a1b549e505b08c4dd9ec3 100644 --- a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/include/cls_config.h +++ b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/include/cls_config.h @@ -39,11 +39,6 @@ public: this->cpu_math_library_num_threads = stoi(config_map_["cpu_math_library_num_threads"]); - this->use_mkldnn = bool(stoi(config_map_["use_mkldnn"])); - - this->use_tensorrt = bool(stoi(config_map_["use_tensorrt"])); - this->use_fp16 = bool(stoi(config_map_["use_fp16"])); - this->cls_model_path.assign(config_map_["cls_model_path"]); this->cls_params_path.assign(config_map_["cls_params_path"]); @@ -61,11 +56,6 @@ public: int cpu_math_library_num_threads = 1; - bool use_mkldnn = false; - - bool use_tensorrt = false; - bool use_fp16 = false; - std::string cls_model_path; std::string cls_params_path; diff --git a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/src/cls.cpp b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/src/cls.cpp index febf4c70c2c104670ffe3df0284aa0598cd34015..1ba8102988550be82e98501f253a3899b9ebba3f 100644 --- a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/src/cls.cpp +++ b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/src/cls.cpp @@ -24,20 +24,8 @@ void Classifier::LoadModel(const std::string &model_path, if (this->use_gpu_) { config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); - if (this->use_tensorrt_) { - config.EnableTensorRtEngine( - 1 << 20, 1, 3, - this->use_fp16_ ? paddle_infer::Config::Precision::kHalf - : paddle_infer::Config::Precision::kFloat32, - false, false); - } } else { config.DisableGpu(); - if (this->use_mkldnn_) { - config.EnableMKLDNN(); - // cache 10 different shapes for mkldnn to avoid memory leak - config.SetMkldnnCacheCapacity(10); - } config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_); } diff --git a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/src/main.cpp b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/src/main.cpp index 919bcabe3e6ecd2e09338f05f9340c72f26eb35c..82a564e1777968f7e8229b06acb691ddaa2f824d 100644 --- a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/src/main.cpp +++ b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/src/main.cpp @@ -61,8 +61,7 @@ int main(int argc, char **argv) { Classifier classifier(config.cls_model_path, config.cls_params_path, config.use_gpu, config.gpu_id, config.gpu_mem, - config.cpu_math_library_num_threads, config.use_mkldnn, - config.use_tensorrt, config.use_fp16, + config.cpu_math_library_num_threads, config.resize_short_size, config.crop_size); double elapsed_time = 0.0; diff --git a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/tools/build.sh b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/tools/build.sh index b2ec278e67bfd628d5c429d1212526260d2a9b69..b60bd7bdd1c03e81cbdd154448fc8951cc0f390c 100755 --- a/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/tools/build.sh +++ b/tutorials/mobilenetv3_prod/Step6/deploy/inference_cpp/tools/build.sh @@ -2,7 +2,6 @@ OPENCV_DIR=../opencv-3.4.7/opencv3/ LIB_DIR=../paddle_inference/ CUDA_LIB_DIR=/usr/local/cuda/lib64 CUDNN_LIB_DIR=/usr/lib64 -TENSORRT_DIR=/usr/local/TensorRT-7.2.3.4 BUILD_DIR=build rm -rf ${BUILD_DIR} diff --git a/tutorials/mobilenetv3_prod/Step6/deploy/inference_python/README.md b/tutorials/mobilenetv3_prod/Step6/deploy/inference_python/README.md index 2814a6ade10cb5812b8844867a0cef9d73d6736c..6f467bc158de46c973d83ed3148b982281cd798a 100644 --- a/tutorials/mobilenetv3_prod/Step6/deploy/inference_python/README.md +++ b/tutorials/mobilenetv3_prod/Step6/deploy/inference_python/README.md @@ -13,7 +13,7 @@ ## 1. 简介 -Paddle Inference 是飞桨的原生推理库, 作用于服务器端和云端,提供高性能的推理能力。相比于直接基于预训练模型进行预测,Paddle Inference可使用MKLDNN、CUDNN、TensorRT进行预测加速,从而实现更优的推理性能。 +Paddle Inference 是飞桨的原生推理库, 作用于服务器端和云端,提供高性能的推理能力。 本文档主要基于Paddle Inference的mobilenet_v3_small模型推理。 diff --git a/tutorials/mobilenetv3_prod/Step6/deploy/inference_python/infer.py b/tutorials/mobilenetv3_prod/Step6/deploy/inference_python/infer.py index 24d58bd99e25e21a3eca8672a47e28d78ccb55a0..5bbcdd6046f66ba96ec3a5de319b0a351fd2f884 100644 --- a/tutorials/mobilenetv3_prod/Step6/deploy/inference_python/infer.py +++ b/tutorials/mobilenetv3_prod/Step6/deploy/inference_python/infer.py @@ -80,6 +80,8 @@ class InferenceEngine(object): config.enable_use_gpu(1000, 0) else: config.disable_gpu() + # The thread num should not be greater than the number of cores in the CPU. + config.set_cpu_math_library_num_threads(4) # enable memory optim config.enable_memory_optim() diff --git a/tutorials/mobilenetv3_prod/Step6/test_tipc/README.md b/tutorials/mobilenetv3_prod/Step6/test_tipc/README.md index 238c3802ce8c198ca37411761e005b2b5fac8e36..0a928deda27276a1dc176647d702b81b614eab68 100644 --- a/tutorials/mobilenetv3_prod/Step6/test_tipc/README.md +++ b/tutorials/mobilenetv3_prod/Step6/test_tipc/README.md @@ -16,6 +16,7 @@ - Slim训练部署:包括PACT在线量化、离线量化。 - 更多训练环境:包括Windows GPU/CPU、Linux NPU、Linux DCU等多种环境。 + | 算法论文 | 模型名称 | 模型类型 | 基础
训练预测 | 更多
训练方式 | 更多
部署方式 | Slim
训练部署 | 更多
训练环境 | | :--- | :--- | :----: | :--------: | :----: | :----: | :----: | :----: | | MobileNetV3 | mobilenet_v3_small | 分类 | 支持 | 混合精度 | PYTHON 服务化部署
Paddle2ONNX 部署| PACT量化
离线量化 | Windows GPU/CPU | diff --git a/tutorials/mobilenetv3_prod/Step6/test_tipc/common_func.sh b/tutorials/mobilenetv3_prod/Step6/test_tipc/common_func.sh index 9fe74f0d7728f4cec7ce6ab855d577834854c3fa..8a3415430759a78c840614d6421a3750c6ef643a 100644 --- a/tutorials/mobilenetv3_prod/Step6/test_tipc/common_func.sh +++ b/tutorials/mobilenetv3_prod/Step6/test_tipc/common_func.sh @@ -65,10 +65,11 @@ function status_check(){ last_status=$1 # the exit code run_command=$2 run_log=$3 + model_name=$4 if [ $last_status -eq 0 ]; then - echo -e "\033[33m Run successfully with command - ${run_command}! \033[0m" | tee -a ${run_log} + echo -e "\033[33m Run successfully with command - ${model_name} - ${run_command}! \033[0m" | tee -a ${run_log} else - echo -e "\033[33m Run failed with command - ${run_command}! \033[0m" | tee -a ${run_log} + echo -e "\033[33m Run failed with command - ${model_name} - ${run_command}! \033[0m" | tee -a ${run_log} fi } diff --git a/tutorials/mobilenetv3_prod/Step6/test_tipc/configs/mobilenet_v3_small/inference_cpp.txt b/tutorials/mobilenetv3_prod/Step6/test_tipc/configs/mobilenet_v3_small/inference_cpp.txt index d7eafc9cf85e8b01c9342c2c3aa10c5bead2aa6f..cc4cc848f422520e7ae84128e9978ba1fc6f24d9 100755 --- a/tutorials/mobilenetv3_prod/Step6/test_tipc/configs/mobilenet_v3_small/inference_cpp.txt +++ b/tutorials/mobilenetv3_prod/Step6/test_tipc/configs/mobilenet_v3_small/inference_cpp.txt @@ -1,14 +1,12 @@ # model load config +model_name mobilenet_v3_small use_gpu 0 gpu_id 0 gpu_mem 4000 cpu_math_library_num_threads 10 -use_mkldnn 1 -use_tensorrt 0 -use_fp16 0 # cls config cls_model_path ./deploy/inference_cpp/mobilenet_v3_small_infer/inference.pdmodel cls_params_path ./deploy/inference_cpp/mobilenet_v3_small_infer/inference.pdiparams resize_short_size 256 -crop_size 224 +crop_size 224 \ No newline at end of file diff --git a/tutorials/mobilenetv3_prod/Step6/test_tipc/prepare.sh b/tutorials/mobilenetv3_prod/Step6/test_tipc/prepare.sh index 4ea8bb9725cae4120293757b9b9b462f6ec420ae..0dd45279b5b730e442b3599e8cda2eca57129a78 100644 --- a/tutorials/mobilenetv3_prod/Step6/test_tipc/prepare.sh +++ b/tutorials/mobilenetv3_prod/Step6/test_tipc/prepare.sh @@ -18,7 +18,7 @@ model_name=$(func_parser_value "${lines[1]}") trainer_list=$(func_parser_value "${lines[12]}") - +pip install -r requirements.txt if [ ${MODE} = "lite_train_lite_infer" ];then # prepare lite data tar -xf ./test_images/lite_data.tar @@ -54,6 +54,55 @@ elif [ ${MODE} = "serving_infer" ];then wget -nc -P ./inference https://paddle-model-ecology.bj.bcebos.com/model/mobilenetv3_reprod/mobilenet_v3_small_infer.tar --no-check-certificate cd ./inference && tar xf mobilenet_v3_small_infer.tar && cd ../ fi +elif [ ${MODE} = "cpp_infer" ];then + PADDLEInfer=$3 + # wget model + wget -nc -P ./deploy/inference_cpp/ https://paddle-model-ecology.bj.bcebos.com/model/mobilenetv3_reprod/mobilenet_v3_small_infer.tar --no-check-certificate + cd ./deploy/inference_cpp/ && tar xf mobilenet_v3_small_infer.tar + if [ "" = "$PADDLEInfer" ];then + wget -nc https://paddle-inference-lib.bj.bcebos.com/2.2.2/cxx_c/Linux/GPU/x86-64_gcc8.2_avx_mkl_cuda11.1_cudnn8.1.1_trt7.2.3.4/paddle_inference.tgz --no-check-certificate + else + wget -nc $PADDLEInfer --no-check-certificate + fi + tar zxf paddle_inference.tgz + if [ ! -d "paddle_inference" ]; then + ln -s paddle_inference_install_dir paddle_inference + fi + wget -nc https://paddleocr.bj.bcebos.com/libs/opencv/opencv-3.4.7.tar.gz --no-check-certificate + tar zxf opencv-3.4.7.tar.gz + # build opencv + cd opencv-3.4.7/ + root_path=$PWD + install_path=${root_path}/opencv3 + build_dir=${root_path}/build + + rm -rf ${build_dir} + mkdir ${build_dir} + cd ${build_dir} + + cmake .. \ + -DCMAKE_INSTALL_PREFIX=${install_path} \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_SHARED_LIBS=OFF \ + -DWITH_IPP=OFF \ + -DBUILD_IPP_IW=OFF \ + -DWITH_LAPACK=OFF \ + -DWITH_EIGEN=OFF \ + -DCMAKE_INSTALL_LIBDIR=lib64 \ + -DWITH_ZLIB=ON \ + -DBUILD_ZLIB=ON \ + -DWITH_JPEG=ON \ + -DBUILD_JPEG=ON \ + -DWITH_PNG=ON \ + -DBUILD_PNG=ON \ + -DWITH_TIFF=ON \ + -DBUILD_TIFF=ON + make -j + make install + cd ../../ + # build cpp + bash tools/build.sh + elif [ ${MODE} = "paddle2onnx_infer" ];then # get data tar -xf ./test_images/lite_data.tar diff --git a/tutorials/mobilenetv3_prod/Step6/test_tipc/test_inference_cpp.sh b/tutorials/mobilenetv3_prod/Step6/test_tipc/test_inference_cpp.sh index ad60905753b7faa60a185a63548a245aade6d841..0df3fbbdbb656c5cb7362838da642f963c7e4381 100644 --- a/tutorials/mobilenetv3_prod/Step6/test_tipc/test_inference_cpp.sh +++ b/tutorials/mobilenetv3_prod/Step6/test_tipc/test_inference_cpp.sh @@ -28,14 +28,9 @@ IFS=$'\n' lines=(${dataline}) # parser load config -use_gpu_key=$(func_parser_key_cpp "${lines[1]}") -use_gpu_value=$(func_parser_value_cpp "${lines[1]}") -use_mkldnn_key=$(func_parser_key_cpp "${lines[5]}") -use_mkldnn_value=$(func_parser_value_cpp "${lines[5]}") -use_tensorrt_key=$(func_parser_key_cpp "${lines[6]}") -use_tensorrt_value=$(func_parser_value_cpp "${lines[6]}") -use_fp16_key=$(func_parser_key_cpp "${lines[7]}") -use_fp16_value=$(func_parser_value_cpp "${lines[7]}") +model_name=$(func_parser_value_cpp "${lines[1]}") +use_gpu_key=$(func_parser_key_cpp "${lines[2]}") +use_gpu_value=$(func_parser_value_cpp "${lines[2]}") LOG_PATH="./log/infer_cpp" mkdir -p ${LOG_PATH} @@ -43,22 +38,10 @@ status_log="${LOG_PATH}/results_infer_cpp.log" function func_infer_cpp(){ # inference cpp - if test $use_gpu_value -gt 0; then - if test $use_tensorrt_value -gt 0; then - if test $use_fp16_value -gt 0; then - _save_log_path="${LOG_PATH}/infer_cpp_${use_gpu_key}_${use_tensorrt_key}_${use_fp16_key}.log" - else - _save_log_path="${LOG_PATH}/infer_cpp_${use_gpu_key}_${use_tensorrt_key}.log" - fi - else - _save_log_path="${LOG_PATH}/infer_cpp_${use_gpu_key}.log" - fi + if test $use_gpu_value -gt 0; then + _save_log_path="${LOG_PATH}/infer_cpp_use_cpu.log" else - if test $use_mkldnn_value -gt 0; then - _save_log_path="${LOG_PATH}/infer_cpp_use_cpu_${use_mkldnn_key}.log" - else - _save_log_path="${LOG_PATH}/infer_cpp_use_cpu.log" - fi + _save_log_path="${LOG_PATH}/infer_cpp_${use_gpu_key}.log" fi # run infer cpp inference_cpp_cmd="./deploy/inference_cpp/build/clas_system" @@ -66,7 +49,7 @@ function func_infer_cpp(){ infer_cpp_full_cmd="${inference_cpp_cmd} ${FILENAME} ${inference_cpp_img} > ${_save_log_path} 2>&1 " eval $infer_cpp_full_cmd last_status=${PIPESTATUS[0]} - status_check $last_status "${infer_cpp_full_cmd}" "${status_log}" + status_check $last_status "${infer_cpp_full_cmd}" "${status_log}" "${model_name}" } echo "################### run test cpp inference ###################" diff --git a/tutorials/mobilenetv3_prod/Step6/test_tipc/test_ptq_inference_python.sh b/tutorials/mobilenetv3_prod/Step6/test_tipc/test_ptq_inference_python.sh index 3fd84c70fe8c4d075b0406f37454ebac430d3428..c17fdd72eb4366941a6d61b09eb248271f849f28 100644 --- a/tutorials/mobilenetv3_prod/Step6/test_tipc/test_ptq_inference_python.sh +++ b/tutorials/mobilenetv3_prod/Step6/test_tipc/test_ptq_inference_python.sh @@ -65,7 +65,7 @@ function func_inference(){ eval $command last_status=${PIPESTATUS[0]} eval "cat ${_save_log_path}" - status_check $last_status "${command}" "${status_log}" + status_check $last_status "${command}" "${status_log}" "${model_name}" done # gpu elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then @@ -80,7 +80,7 @@ function func_inference(){ eval $command last_status=${PIPESTATUS[0]} eval "cat ${_save_log_path}" - status_check $last_status "${command}" "${status_log}" + status_check $last_status "${command}" "${status_log}" "${model_name}" done else echo "Does not support hardware other than CPU and GPU Currently!" @@ -107,7 +107,7 @@ if [ ${MODE} = "whole_infer" ]; then echo $export_cmd # eval $export_cmd status_export=$? - status_check $status_export "${export_cmd}" "${status_log}" + status_check $status_export "${export_cmd}" "${status_log}" "${model_name}" save_infer_dir=${output_dir_value} #run inference diff --git a/tutorials/mobilenetv3_prod/Step6/test_tipc/test_train_inference_python.sh b/tutorials/mobilenetv3_prod/Step6/test_tipc/test_train_inference_python.sh index 9f0c83a2cc15082664cc5217d5799deb95597522..ef6b5ced493b17f15bb8f2c9ece8ba4c3ee82695 100644 --- a/tutorials/mobilenetv3_prod/Step6/test_tipc/test_train_inference_python.sh +++ b/tutorials/mobilenetv3_prod/Step6/test_tipc/test_train_inference_python.sh @@ -87,7 +87,7 @@ function func_inference(){ eval $command last_status=${PIPESTATUS[0]} eval "cat ${_save_log_path}" - status_check $last_status "${command}" "${status_log}" + status_check $last_status "${command}" "${status_log}" "${model_name}" done # gpu elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then @@ -101,7 +101,7 @@ function func_inference(){ eval $command last_status=${PIPESTATUS[0]} eval "cat ${_save_log_path}" - status_check $last_status "${command}" "${status_log}" + status_check $last_status "${command}" "${status_log}" "${model_name}" done else echo "Does not support hardware other than CPU and GPU Currently!" @@ -131,7 +131,7 @@ if [ ${MODE} = "whole_infer" ]; then echo $export_cmd eval $export_cmd status_export=$? - status_check $status_export "${export_cmd}" "${status_log}" + status_check $status_export "${export_cmd}" "${status_log}" "${model_name}" else save_infer_dir=${save_infer_dir} fi @@ -196,7 +196,7 @@ else fi # run train eval $cmd - status_check $? "${cmd}" "${status_log}" + status_check $? "${cmd}" "${status_log}" "${model_name}" # TODO ensure model name set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}") @@ -205,7 +205,7 @@ else eval ${env} eval_cmd="${python} ${eval_py} ${set_eval_pretrain}" eval $eval_cmd - status_check $? "${eval_cmd}" "${status_log}" + status_check $? "${eval_cmd}" "${status_log}" "${model_name}" fi # run export model if [ ${run_export} != "null" ]; then @@ -215,7 +215,7 @@ else set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}") export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key}" eval $export_cmd - status_check $? "${export_cmd}" "${status_log}" + status_check $? "${export_cmd}" "${status_log}" "${model_name}" #run inference eval $env diff --git a/tutorials/tipc/infer_cpp/infer_cpp.md b/tutorials/tipc/infer_cpp/infer_cpp.md index a0fe25043f135cb738929844769a4ce2ef5f442c..38d50416885f8ebcf55befe7e1a775e60c148867 100644 --- a/tutorials/tipc/infer_cpp/infer_cpp.md +++ b/tutorials/tipc/infer_cpp/infer_cpp.md @@ -71,20 +71,8 @@ void Classifier::LoadModel(const std::string &model_path, if (this->use_gpu_) { config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); - if (this->use_tensorrt_) { - config.EnableTensorRtEngine( - 1 << 20, 1, 3, - this->use_fp16_ ? paddle_infer::Config::Precision::kHalf - : paddle_infer::Config::Precision::kFloat32, - false, false); - } } else { config.DisableGpu(); - if (this->use_mkldnn_) { - config.EnableMKLDNN(); - // cache 10 different shapes for mkldnn to avoid memory leak - config.SetMkldnnCacheCapacity(10); - } config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_); } diff --git a/tutorials/tipc/infer_cpp/test_infer_cpp.md b/tutorials/tipc/infer_cpp/test_infer_cpp.md index e66544b768360e571d5ad95ae958d85a0b1cb8c9..54b00438700f19dac0120127537571ca200c5ffc 100644 --- a/tutorials/tipc/infer_cpp/test_infer_cpp.md +++ b/tutorials/tipc/infer_cpp/test_infer_cpp.md @@ -21,7 +21,7 @@ ## 1. 简介 -Paddle Inference 是飞桨的原生推理库, 作用于服务器端和云端,提供高性能的推理能力。相比于直接基于预训练模型进行预测,Paddle Inference可使用MKLDNN、CUDNN、TensorRT进行预测加速,从而实现更优的推理性能。 +Paddle Inference 是飞桨的原生推理库, 作用于服务器端和云端,提供高性能的推理能力。 更多关于Paddle Inference推理引擎的介绍,可以参考[Paddle Inference官网教程](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/inference_cn.html)。 本文档主要介绍飞桨模型在 Linux GPU/CPU 下基于C++预测引擎的推理过程开发。 @@ -39,15 +39,15 @@ run_scripts configs_path img_path ``` * `run_scripts`:最终编译好的可执行命令。 -* `configs_path`:设置模型路径、是否使用GPU、是否开启mkldnn、是否开启TensorRT等。 +* `configs_path`:设置模型路径、是否使用GPU等。 * `img_path`:待预测的图像路径。 ### 2.2 配置文件解析 -完整的`inference_cpp.txt`配置文件共有14行,包含两个方面的内容。 -* 运行环境参数配置:第1~8行 -* 模型参数配置:第10~14行 +完整的`inference_cpp.txt`配置文件共有12行,包含两个方面的内容。 +* 运行环境参数配置:第1~5行 +* 模型参数配置:第7~11行 具体内容见[inference_cpp.txt](../../mobilenetv3_prod/Step6/test_tipc/configs/mobilenet_v3_small/inference_cpp.txt) @@ -62,17 +62,15 @@ run_scripts configs_path img_path | 行号 | 参考内容 | 含义 | key是否需要修改 | value是否需要修改 | 修改内容 | |----|-------------------------------------|---------------|-----------|-------------|----------------------------------| -| 2 | use_gpu | 是否使用GPU | 否 | 是 | value根据是否使用GPU进行修改 | -| 3 | gpu_id | 使用的GPU卡号 | 否 | 是 | value修改为自己的GPU ID | -| 4 | gpu_mem | 显存 | 否 | 是 | value修改为自己的GPU 显存 | -| 5 | cpu_math_library_num_thread | 底层科学计算库所用线程的数量 | 否 | 是 | value修改为合适的线程数 | -| 6 | use_mkldnn | 是否使用MKLDNN加速 | 否 | 是 | value根据是否使用MKLDNN进行修改 | -| 7 | use_tensorrt | 是否使用tensorRT进行加速 | 否 | 是 | value根据是否使用tensorRT进行修改 | -| 8 | use_fp16 | 是否使用半精度浮点数进行计算,该选项仅在use_tensorrt为true时有效 | 否 | 是 | value根据在开启tensorRT时是否使用半精度进行修改| -| 11 | cls_model_path | 预测模型结构文件路径 | 否 | 是 | value修改为预测模型结构文件路径 | -| 12 | cls_params_path | 预测模型参数文件路径 | 否 | 是 | vvalue修改为预测模型参数文件路径 | -| 13 | resize_short_size | 预处理时图像缩放大小 | 否 | 是 | value修改为预处理时图像缩放大小 -| 14 | crop_size | 预处理时图像裁剪后的大小 | 否 | 是 | value修改为预处理时图像裁剪后的大小 +| 2 | model_name | 模型名称 | 否 | 是 | value根据模型名称修改 | +| 3 | use_gpu | 是否使用GPU | 否 | 是 | value根据是否使用GPU进行修改 | +| 4 | gpu_id | 使用的GPU卡号 | 否 | 是 | value修改为自己的GPU ID | +| 5 | gpu_mem | 显存 | 否 | 是 | value修改为自己的GPU 显存 | +| 6 | cpu_math_library_num_thread | 底层科学计算库所用线程的数量 | 否 | 是 | value修改为合适的线程数 | +| 9 | cls_model_path | 预测模型结构文件路径 | 否 | 是 | value修改为预测模型结构文件路径 | +| 10 | cls_params_path | 预测模型参数文件路径 | 否 | 是 | vvalue修改为预测模型参数文件路径 | +| 11 | resize_short_size | 预处理时图像缩放大小 | 否 | 是 | value修改为预处理时图像缩放大小 +| 12 | crop_size | 预处理时图像裁剪后的大小 | 否 | 是 | value修改为预处理时图像裁剪后的大小 @@ -104,172 +102,7 @@ run_scripts configs_path img_path ### 3.3 准备推理所需代码 -基于预测引擎的推理过程包含4个步骤:初始化预测引擎、预处理、推理、后处理。 - -#### 3.3.1 初始化预测引擎 - -**【基本内容】** -该部分主要根据配置文件对预测引擎进行初始化,包括设置模型结构和参数文件路径、是否使用GPU、是否开启MKLDNN、是否开启TensorRT等。 -**【实战】** -以mobilenet_v3_small模型为例,推理引擎初始化函数实现如下,其中模型结构和参数文件路径、是否使用GPU、是否开启MKLDNN等内容都是可以配置的。 -主要实现在[cls.cpp](../../mobilenetv3_prod/Step6/deploy/inference_cpp/src/cls.cpp) -```c++ -void Classifier::LoadModel(const std::string &model_path, - const std::string ¶ms_path) { - paddle_infer::Config config; - config.SetModel(model_path, params_path); - - if (this->use_gpu_) { - config.EnableUseGpu(this->gpu_mem_, this->gpu_id_); - if (this->use_tensorrt_) { - config.EnableTensorRtEngine( - 1 << 20, 1, 3, - this->use_fp16_ ? paddle_infer::Config::Precision::kHalf - : paddle_infer::Config::Precision::kFloat32, - false, false); - } - } else { - config.DisableGpu(); - if (this->use_mkldnn_) { - config.EnableMKLDNN(); - // cache 10 different shapes for mkldnn to avoid memory leak - config.SetMkldnnCacheCapacity(10); - } - config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_); - } - - config.SwitchUseFeedFetchOps(false); - // true for multiple input - config.SwitchSpecifyInputNames(true); - - config.SwitchIrOptim(true); - - config.EnableMemoryOptim(); - config.DisableGlogInfo(); - - this->predictor_ = CreatePredictor(config); -} - -``` -#### 3.3.2 预处理 -**【基本内容】** -该部分主要用来读取指定图像,对其进行数据变换,转化为符合模型推理所需要的输入格式, -**【实战】** -以mobilenet_v3_small模型为例,使用的数据预处理如下: - -* resize -* crop -* normalize -* RGB -> CHW - -主要实现在[preprocess_op.cpp](../../mobilenetv3_prod/Step6/deploy/inference_cpp/src/preprocess_op.cpp)中。 -```c++ -//Resize -class ResizeImg { -public: - virtual void Run(const cv::Mat &img, cv::Mat &resize_img, int max_size_len); -}; -//Crop -class CenterCropImg { -public: - virtual void Run(cv::Mat &im, const int crop_size = 224); -}; -//Norm -class Normalize { -public: - virtual void Run(cv::Mat *im, const std::vector &mean, - const std::vector &scale, const bool is_scale = true); -}; -// RGB -> CHW -class Permute { -public: - virtual void Run(const cv::Mat *im, float *data); -}; -``` -#### 3.3.3 推理 -**【基本内容】** -前向推理是主要步骤,会将预处理好的输入图像输出到预测引擎中,得到输出结果。 -**【实战】** -以mobilenet_v3_small模型为例,前向推理主要实现在[cls.cpp](../../mobilenetv3_prod/Step6/deploy/inference_cpp/src/cls.cpp)。 -```C++ - auto input_names = this->predictor_->GetInputNames(); - auto input_t = this->predictor_->GetInputHandle(input_names[0]); - input_t->Reshape({1, 3, resize_img.rows, resize_img.cols}); - auto start = std::chrono::system_clock::now(); - input_t->CopyFromCpu(input.data()); - this->predictor_->Run(); - - std::vector out_data; - auto output_names = this->predictor_->GetOutputNames(); - auto output_t = this->predictor_->GetOutputHandle(output_names[0]); - std::vector output_shape = output_t->shape(); - int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, - std::multiplies()); - out_data.resize(out_num); - output_t->CopyToCpu(out_data.data()); -``` - -#### 3.3.4 后处理 -**【基本内容】** -模型最后的输出可能是数组,一般并不是我们最后想要获取的结果,因此需要对模型的输出做后处理。 -**【实战】** -以mobilenet_v3_small模型为例,模型输出的是一个一维的数组,代表输入图片分类到每个类目的概率,为了得到有实际含义的输出,需要获取该数组中最大值的位置和大小,mobilenet_v3_small的后处理代码如下所示。 - -```c++ -int maxPosition = max_element(out_data.begin(), out_data.end()) - out_data.begin(); -int score = out_data[maxPosition]; -``` - - -### 3.4 编译得到可执行代码 -**【基本内容】** -在准备好相应的代码后需要开始准备编译,这里可以利用cmake来实现。 -**【实战】** -以mobilenet_v3_small模型为例,代码示例如:[CMakeLists.txt](../../mobilenetv3_prod/Step6/deploy/inference_cpp/CMakeLists.txt) -```bash -set(DEPS ${DEPS} ${OpenCV_LIBS}) -AUX_SOURCE_DIRECTORY(./src SRCS) -add_executable(${DEMO_NAME} ${SRCS}) -target_link_libraries(${DEMO_NAME} ${DEPS}) -``` -执行脚本: -```bash -OPENCV_DIR=../opencv-3.4.7/opencv3/ -LIB_DIR=../paddle_inference/ -CUDA_LIB_DIR=/usr/local/cuda/lib64 -CUDNN_LIB_DIR=/usr/lib64 -TENSORRT_DIR=/usr/local/TensorRT-7.2.3.4 - -BUILD_DIR=build -rm -rf ${BUILD_DIR} -mkdir ${BUILD_DIR} -cd ${BUILD_DIR} -cmake .. \ - -DPADDLE_LIB=${LIB_DIR} \ - -DWITH_MKL=ON \ - -DWITH_GPU=OFF \ - -DWITH_STATIC_LIB=OFF \ - -DUSE_TENSORRT=OFF \ - -DOPENCV_DIR=${OPENCV_DIR} \ - -DCUDNN_LIB=${CUDNN_LIB_DIR} \ - -DCUDA_LIB=${CUDA_LIB_DIR} \ - -make -j -``` - -上述命令中,Paddle C++预测库、opencv等其他依赖库的地址需要换成自己机器上的实际地址。 - -* `OPENCV_DIR`为opencv编译安装的地址(本例中为`opencv-3.4.7/opencv3`文件夹的路径); - -* `LIB_DIR`为下载的Paddle预测库(`paddle_inference`文件夹),或编译生成的Paddle预测库(`build/paddle_inference_install_dir`文件夹)的路径; - -* `CUDA_LIB_DIR`为cuda库文件地址,在docker中一般为`/usr/local/cuda/lib64`; - -* `CUDNN_LIB_DIR`为cudnn库文件地址,在docker中一般为`/usr/lib64`。 - -* `TENSORRT_DIR`是tensorrt库文件地址,在dokcer中一般为`/usr/local/TensorRT-7.2.3.4/`,TensorRT需要结合GPU使用。 - -在执行上述命令,编译完成之后,会在当前路径下生成`build`文件夹,其中生成一个名为`clas_system`的可执行文件。 +基于预测引擎的推理过程包含4个步骤:初始化预测引擎、预处理、推理、后处理。参考[文档](./infer_cpp.md)准备预测引擎推理代码并编译成功。 @@ -299,10 +132,11 @@ mobilenet_v3_small的测试开发配置文件可以参考:[inference_cpp.txt]( **【基本内容】** -基于修改完的配置,运行 +基于修改完的配置,运行方法如下 ```bash -bash test_tipc/test_inference_cpp.sh ${your_params_file} +bash test_tipc/prepare.sh ${your_params_file} cpp_infer +bash test_tipc/test_inference_cpp.sh ${your_params_file} ``` **【注意事项】** @@ -310,7 +144,7 @@ bash test_tipc/test_inference_cpp.sh ${your_params_file} 如果运行失败,会输出具体的报错命令,可以根据输出的报错命令排查下配置文件的问题并修改,示例报错如下所示。 ``` -Run failed with command - ./deploy/inference_cpp/build/clas_system test_tipc/configs/mobilenet_v3_small/inference_cpp.txt ./images/demo.jpg > ./log/infer_cpp/infer_cpp_use_cpu_use_mkldnn.log 2>&1 ! +Run failed with command - ./deploy/inference_cpp/build/clas_system test_tipc/configs/mobilenet_v3_small/inference_cpp.txt ./images/demo.jpg > ./log/infer_cpp/infer_cpp_use_cpu.log 2>&1 ! ``` **【实战】** @@ -318,16 +152,17 @@ Run failed with command - ./deploy/inference_cpp/build/clas_system test_tipc/con 以mobilenet_v3_small的`Linux GPU/CPU C++推理功能测试` 为例,命令如下所示。 ```bash +bash test_tipc/prepare.sh test_tipc/configs/mobilenet_v3_small/inference_cpp.txt cpp_infer bash test_tipc/test_inference_cpp.sh test_tipc/configs/mobilenet_v3_small/inference_cpp.txt ``` 输出结果如下,表示命令运行成功。 ```bash -Run successfully with command - ./deploy/inference_cpp/build/clas_system test_tipc/configs/mobilenet_v3_small/inference_cpp.txt ./images/demo.jpg > ./log/infer_cpp/infer_cpp_use_cpu_use_mkldnn.log 2>&1 ! +Run successfully with command - mobilenet_v3_small - ./deploy/inference_cpp/build/clas_system test_tipc/configs/mobilenet_v3_small/inference_cpp.txt ./images/demo.jpg > ./log/infer_cpp/infer_cpp_use_cpu.log 2>&1 ! ``` -也可以在`./log/infer_cpp/infer_cpp_use_cpu_use_mkldnn.log`中查看详细的输出结果。 +也可以在`./log/infer_cpp/infer_cpp_use_cpu.log`中查看详细的输出结果。 **【核验】** diff --git a/tutorials/tipc/train_infer_python/infer_python.md b/tutorials/tipc/train_infer_python/infer_python.md index 7e2cba75594f0c8d781d2fb417c797e543c38618..73e324d3d2a46dc129d8dde6245f960bab0cae97 100644 --- a/tutorials/tipc/train_infer_python/infer_python.md +++ b/tutorials/tipc/train_infer_python/infer_python.md @@ -201,21 +201,10 @@ class InferenceEngine(object): config.enable_memory_optim() if args.use_gpu: config.enable_use_gpu(100, 0) - config.enable_tensorrt_engine(workspace_size=1 << 30, - max_batch_size=10, - min_subgraph_size=5, - precision_mode=PrecisionType.Float32, - use_static=False, - use_calib_mode=False) - config.set_trt_dynamic_shape_info( - min_input_shape={"input": [1, 3, 1, 1]}, - max_input_shape={"input": [10, 3, 1200, 1200]}, - optim_input_shape={"input": [1, 3, 224, 224]}) else: - # If not specific mkldnn, you can set the blas thread. + config.disable_gpu() # The thread num should not be greater than the number of cores in the CPU. config.set_cpu_math_library_num_threads(4) - config.enable_mkldnn() # creat predictor predictor = create_predictor(config) # get input and output tensor property diff --git a/tutorials/tipc/train_infer_python/test_train_infer_python.md b/tutorials/tipc/train_infer_python/test_train_infer_python.md index 0253e9f81fdfad16935f4ed4fe12bbd6c95c82eb..d4ba616bb04e3117e7bfab38b6c66856950ae456 100644 --- a/tutorials/tipc/train_infer_python/test_train_infer_python.md +++ b/tutorials/tipc/train_infer_python/test_train_infer_python.md @@ -214,15 +214,37 @@ python deploy/inference_python/infer.py --model-dir=./mobilenet_v3_small_infer/ -### 3.2 准备数据与环境 +### 3.2 准备数据与环境与规范训练日志 **【基本内容】** -1. 数据集:为方便快速验证训练/评估/推理过程,需要准备一个小数据集(训练集和验证集各8~16张图像即可,压缩后数据大小建议在`20M`以内),放在`lite_data`文件夹下。 +1. 数据集:为方便快速验证训练/评估/推理过程,需要准备一个小数据集(训练集和验证集各8~16张图像即可,压缩后数据大小建议在`20M`以内,确保基础训练推理总时间不超过十分钟),放在`lite_data`文件夹下。 相关文档可以参考[论文复现赛指南3.2章节](../../../docs/lwfx/ArticleReproduction_CV.md),代码可以参考`基于ImageNet准备小数据集的脚本`:[prepare.py](https://github.com/littletomatodonkey/AlexNet-Prod/blob/tipc/pipeline/Step2/prepare.py)。 -2. 环境:安装好PaddlePaddle即可进行基础训练推理测试开发 +2. 规范训练日志格式:训练日志中,除了打印loss、精度等信息,还需要有以下信息: + +- reader_cost:1个Step数据加载用时,单位:秒(sec)。 + 1). N个Step打印1条日志时,reader_cost为N个Step数据加载用时的平均值 + 2). 建议(但不强制)使用DataLoader,而非DataFeeder +- batch_cost:1个Step训练用时,单位:秒(sec)。batch_cost = reader_cost + model.forward()(训练)时间。 +- ips:单卡每秒处理的样本数,单位如:images/sec、sequences/sec、tokens/sec、words/sec、frames/sec等。 +- samples: samples代表上次打印到本次打印,新完成训练的样本数量。对于每个Step样本数可能不同的模型(大多为NLP模型),需要计算samples。 + + 最终打印期望格式如下: + ``` + ..., ... , loss: 0.12345, avg_reader_cost: 0.12345 sec, avg_batch_cost: 0.12345 sec, avg_samples: 100, avg_ips: 0.12345 images/sec + ``` + - avg_reader_cost、avg_batch_cost、avg_samples、avg_ips算法如下: + 假如,模型每N个Step打印1次日志,每Step reader用时为:`R1, R2,...Rn`,每Step训练用时:`T1, T2,...Tn`,每Step **单卡BatchSize** 为`S1, S2,...Sn`。 + `avg_reader_cost = sum(R1, R2,...Rn) / N` + `avg_batch_cost = avg_reader_cost + sum(T1, T2,...Tn) / N` + `avg_samples = sum(S1, S2,...Sn) / N` + `avg_ips = samples / batch_cost ` + ips 单位如:images/sec、sequences/sec、tokens/sec、words/sec、frames/sec等。 + + +3. 环境:安装好PaddlePaddle即可进行基础训练推理测试开发 **【注意事项】** @@ -258,6 +280,9 @@ python deploy/inference_python/infer.py --model-dir=./mobilenet_v3_small_infer/ mobilenet_v3_small的测试开发配置文件可以参考:[train_infer_python.txt](../../mobilenetv3_prod/Step6/test_tipc/configs/mobilenet_v3_small/train_infer_python.txt)。 +为了确保基础训练、推理总时间不超过10分钟,需要在`train_infer_python.txt`中设置较小的`epoch`数或者迭代次数。以[train_infer_python.txt](../../mobilenetv3_prod/Step6/test_tipc/configs/mobilenet_v3_small/train_infer_python.txt)为例,设置`--epochs:lite_train_lite_infer=5`,数值5表示训练5个epoch,不同模型根据训练耗时调整该数值,确保运行耗时不超过10分钟。 + + ### 3.5 验证配置正确性