diff --git a/deploy/pptracking/README.md b/deploy/pptracking/README.md new file mode 100644 index 0000000000000000000000000000000000000000..eaba45c3d989d6ae818388f44d17896e12330687 --- /dev/null +++ b/deploy/pptracking/README.md @@ -0,0 +1,51 @@ +# 实时跟踪系统PP-Tracking + +PP-Tracking是基于飞桨深度学习框架的业界首个开源实时跟踪系统。针对实际业务的难点痛点,PP-Tracking内置行人车辆跟踪、跨镜头跟踪、多类别跟踪、小目标跟踪及流量计数等能力与产业应用,同时提供可视化开发界面。模型集成多目标跟踪,目标检测,ReID轻量级算法,进一步提升PP-Tracking在服务器端部署性能。同时支持python,C++部署,适配Linux,Nvidia Jetson多平台环境。 + +
+ +
+ +
+ +
+ 视频来源:VisDrone2021, BDD100K开源数据集
+ + +### 一、快速开始 + +PP-Tracking提供了简洁的可视化界面,无需开发即可实现多种跟踪功能,可以参考[PP-Tracking可视化界面使用文档]()快速上手体验 + +### 二、算法介绍 + +PP-Tracking集成了多目标跟踪,目标检测,ReID轻量级算法,提升跟踪系统实时性能。多目标跟踪算法基于FairMOT进行优化,实现了服务器端轻量级模型,同时基于不同应用场景提供了针对性的预训练模型。 + +模型训练评估方法请参考[多目标跟踪快速开始](../../configs/mot/README_cn.md#快速开始) + +PP-Tracking中提供的多场景预训练模型及导出模型列表如下: + +| 场景 | 数据集 | 精度(MOTA) | NX模型预测速度(FPS) | 配置文件 | 模型权重 | 预测部署模型 | +| :---------:|:--------------- | :-------: | :------: | :------: |:---: | :---: | +| 行人跟踪 | MOT17 | 65.3 | 23.9 | [配置文件](../../configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.tar) | +| 行人小目标跟踪 | VisDrone-pedestrian | 40.5 | 8.35 | [配置文件](../../configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.tar) | +| 车辆跟踪 | BDD100k-vehicle | 32.6 | 24.3 | [配置文件](../../configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.tar) | +| 车辆小目标跟踪 | VisDrone-vehicle | 39.8 | 22.8 | [配置文件](../../configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.tar) +| 多类别跟踪 | BDD100k | - | 12.5 | [配置文件]() | [下载链接]() | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100k_mcmot.tar) | +| 多类别小目标跟踪 | VisDrone | 20.4 | 6.74 | [配置文件](../../configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.tar) | + +**注:** + +1. 模型预测速度为TensorRT FP16速度,测试环境为CUDA 10.2,JETPACK 4.5.1,TensorRT 7.1 +2. 更多跟踪模型请参考[多目标跟踪模型库](../../configs/mot/README_cn.md#模型库) + +检测模型使用轻量级特色模型PP-PicoDet,具体请参考[PP-PicoDet文档](../../configs/picodet) + +ReID模型使用超轻量骨干网络模型PP-LCNet, 具体请参考[PP-LCNet模型介绍](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models/PP-LCNet.md) + +### 三、Python端预测部署 + +PP-Tracking 使用python预测部署教程请参考[PP-Tracking python部署文档](python/README.md) + +### 四、C++端预测部署 + +PP-Tracking 使用c++预测部署教程请参考[PP-Tracking c++部署文档](cpp/README.md) diff --git a/deploy/pptracking/cpp/CMakeLists.txt b/deploy/pptracking/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..3656cde94a914f3df903571254d9303a02fdac79 --- /dev/null +++ b/deploy/pptracking/cpp/CMakeLists.txt @@ -0,0 +1,242 @@ +cmake_minimum_required(VERSION 3.0) +project(PaddleObjectDetector CXX C) + +option(WITH_MKL "Compile demo with MKL/OpenBlas support,defaultuseMKL." ON) +option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON) +option(WITH_TENSORRT "Compile demo with TensorRT." OFF) + +SET(PADDLE_DIR "" CACHE PATH "Location of libraries") +SET(PADDLE_LIB_NAME "" CACHE STRING "libpaddle_inference") +SET(OPENCV_DIR "" CACHE PATH "Location of libraries") +SET(CUDA_LIB "" CACHE PATH "Location of libraries") +SET(CUDNN_LIB "" CACHE PATH "Location of libraries") +SET(TENSORRT_INC_DIR "" CACHE PATH "Compile demo with TensorRT") +SET(TENSORRT_LIB_DIR "" CACHE PATH "Compile demo with TensorRT") + +include(cmake/yaml-cpp.cmake) + +include_directories("${CMAKE_SOURCE_DIR}/") +include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include") +link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib") + +set(SRCS src/main.cc src/preprocess_op.cc src/pipeline.cc src/jde_predictor.cc src/sde_predictor.cc src/tracker.cc src/trajectory.cc src/lapjv.cpp src/postprocess.cc) + +macro(safe_set_static_flag) + foreach(flag_var + CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE + CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) + if(${flag_var} MATCHES "/MD") + string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") + endif(${flag_var} MATCHES "/MD") + endforeach(flag_var) +endmacro() + +if (WITH_MKL) + ADD_DEFINITIONS(-DUSE_MKL) +endif() + +if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "") + message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir") +endif() +message("PADDLE_DIR IS:" ${PADDLE_DIR}) + +if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "") + message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv") +endif() + +include_directories("${CMAKE_SOURCE_DIR}/") +include_directories("${PADDLE_DIR}/") +include_directories("${PADDLE_DIR}/third_party/install/protobuf/include") +include_directories("${PADDLE_DIR}/third_party/install/glog/include") +include_directories("${PADDLE_DIR}/third_party/install/gflags/include") +include_directories("${PADDLE_DIR}/third_party/install/xxhash/include") +if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include") + include_directories("${PADDLE_DIR}/third_party/install/snappy/include") +endif() +if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include") + include_directories("${PADDLE_DIR}/third_party/install/snappystream/include") +endif() +include_directories("${PADDLE_DIR}/third_party/boost") +include_directories("${PADDLE_DIR}/third_party/eigen3") + +if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") + link_directories("${PADDLE_DIR}/third_party/install/snappy/lib") +endif() +if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") + link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib") +endif() + +link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib") +link_directories("${PADDLE_DIR}/third_party/install/glog/lib") +link_directories("${PADDLE_DIR}/third_party/install/gflags/lib") +link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib") +link_directories("${PADDLE_DIR}/paddle/lib/") +link_directories("${CMAKE_CURRENT_BINARY_DIR}") + + + +if (WIN32) + include_directories("${PADDLE_DIR}/paddle/fluid/inference") + include_directories("${PADDLE_DIR}/paddle/include") + link_directories("${PADDLE_DIR}/paddle/fluid/inference") + find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH) + +else () + find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/share/OpenCV NO_DEFAULT_PATH) + include_directories("${PADDLE_DIR}/paddle/include") + link_directories("${PADDLE_DIR}/paddle/lib") +endif () +include_directories(${OpenCV_INCLUDE_DIRS}) + +if (WIN32) + add_definitions("/DGOOGLE_GLOG_DLL_DECL=") + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -o2 -fopenmp -std=c++11") + set(CMAKE_STATIC_LIBRARY_PREFIX "") +endif() + +# TODO let users define cuda lib path +if (WITH_GPU) + if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "") + message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda-8.0/lib64") + endif() + if (NOT WIN32) + if (NOT DEFINED CUDNN_LIB) + message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn_v7.4/cuda/lib64") + endif() + endif(NOT WIN32) +endif() + + +if (NOT WIN32) + if (WITH_TENSORRT AND WITH_GPU) + include_directories("${TENSORRT_INC_DIR}/") + link_directories("${TENSORRT_LIB_DIR}/") + endif() +endif(NOT WIN32) + +if (NOT WIN32) + set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph") + if(EXISTS ${NGRAPH_PATH}) + include(GNUInstallDirs) + include_directories("${NGRAPH_PATH}/include") + link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}") + set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() +endif() + +if(WITH_MKL) + include_directories("${PADDLE_DIR}/third_party/install/mklml/include") + if (WIN32) + set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib + ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib) + else () + set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} + ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX}) + execute_process(COMMAND cp -r ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} /usr/lib) + endif () + set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn") + if(EXISTS ${MKLDNN_PATH}) + include_directories("${MKLDNN_PATH}/include") + if (WIN32) + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib) + else () + set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) + endif () + endif() +else() + set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX}) +endif() + + +if (WIN32) + if(EXISTS "${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(DEPS + ${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(DEPS + ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() +endif() + + +if (WIN32) + set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}) +else() + set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) +endif() + +message("PADDLE_LIB_NAME:" ${PADDLE_LIB_NAME}) +message("DEPS:" $DEPS) + +if (NOT WIN32) + set(DEPS ${DEPS} + ${MATH_LIB} ${MKLDNN_LIB} + glog gflags protobuf z xxhash yaml-cpp + ) + if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") + set(DEPS ${DEPS} snappystream) + endif() + if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") + set(DEPS ${DEPS} snappy) + endif() +else() + set(DEPS ${DEPS} + ${MATH_LIB} ${MKLDNN_LIB} + glog gflags_static libprotobuf xxhash libyaml-cppmt) + set(DEPS ${DEPS} libcmt shlwapi) + if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib") + set(DEPS ${DEPS} snappy) + endif() + if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") + set(DEPS ${DEPS} snappystream) + endif() +endif(NOT WIN32) + +if(WITH_GPU) + if(NOT WIN32) + if (WITH_TENSORRT) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) + endif() + set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX}) + else() + set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} ) + set(DEPS ${DEPS} ${CUDNN_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() +endif() + +if (NOT WIN32) + set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread") + set(DEPS ${DEPS} ${EXTERNAL_LIB}) +endif() + +set(DEPS ${DEPS} ${OpenCV_LIBS}) +add_executable(main ${SRCS}) +ADD_DEPENDENCIES(main ext-yaml-cpp) +message("DEPS:" $DEPS) +target_link_libraries(main ${DEPS}) + +if (WIN32 AND WITH_MKL) + add_custom_command(TARGET main POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll + ) +endif() + +if (WIN32) + add_custom_command(TARGET main POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll + ) +endif() diff --git a/deploy/pptracking/cpp/README.md b/deploy/pptracking/cpp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..62e80b6bd74d678ac658b6f914ef722e7bd575f6 --- /dev/null +++ b/deploy/pptracking/cpp/README.md @@ -0,0 +1,144 @@ +# C++端预测部署 + +在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 预测引擎使用了AnalysisPredictor,专门针对推理进行了优化,该引擎可以对模型进行多项图优化,减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求,我们提供了独立于PaddleDetection的预测脚本,方便用户直接集成部署。当前C++部署支持基于Fairmot的单镜头类别预测部署,并支持人流量统计、出入口计数功能。 + +主要包含三个步骤: + +- 准备环境 +- 导出预测模型 +- C++预测 + +## 一、准备环境 + +环境要求: + +- GCC 8.2 +- CUDA 10.1/10.2/11.1; CUDNN 7.6/8.1 +- CMake 3.0+ +- TensorRT 6/7 + +NVIDIA Jetson用户请参考[Jetson平台编译指南](../../cpp/Jetson_build.md#jetson环境搭建)完成JetPack安装 + +### 1. 下载代码 + +``` +git clone https://github.com/PaddlePaddle/PaddleDetection.git +# C++部署代码与其他目录代码独立 +cd deploy/pptracking/cpp +``` + +### 2. 下载或编译PaddlePaddle C++预测库 + +请根据环境选择适当的预测库进行下载,参考[C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html) + +下载并解压后`./paddle_inference`目录包含内容为: + +``` +paddle_inference +├── paddle # paddle核心库和头文件 +| +├── third_party # 第三方依赖库和头文件 +| +└── version.txt # 版本和编译信息 +``` + +**注意:** 如果用户环境与官网提供环境不一致(如cuda 、cudnn、tensorrt版本不一致等),或对飞桨源代码有修改需求,或希望进行定制化构建,可参考[文档](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)自行源码编译预测库。 + +### 3. 编译 + +编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下: + +``` +# 是否使用GPU(即是否使用 CUDA) +WITH_GPU=ON + +# 是否使用MKL or openblas,TX2需要设置为OFF +WITH_MKL=OFF + +# 是否集成 TensorRT(仅WITH_GPU=ON 有效) +WITH_TENSORRT=ON + +# TensorRT 的include路径 +TENSORRT_INC_DIR=/path/to/TensorRT/include + +# TensorRT 的lib路径 +TENSORRT_LIB_DIR=/path/to/TensorRT/lib + +# Paddle 预测库路径 +PADDLE_DIR=/path/to/paddle_inference/ + +# Paddle 预测库名称 +PADDLE_LIB_NAME=libpaddle_inference + +# CUDA 的 lib 路径 +CUDA_LIB=/path/to/cuda/lib + +# CUDNN 的 lib 路径 +CUDNN_LIB=/path/to/cudnn/lib + +# OPENCV路径 +OPENCV_DIR=/path/to/opencv +``` + +修改脚本设置好主要参数后,执行```build.sh```脚本: + +``` +sh ./scripts/build.sh +``` + +**注意:** + +1. `TX2`平台的`CUDA`、`CUDNN`需要通过`JetPack`安装。 +2. 已提供linux和tx2平台的opencv下载方式,其他环境请自行安装[opencv](https://opencv.org/) + +## 二、导出预测模型 + +将训练保存的权重导出为预测库需要的模型格式,使用PaddleDetection下的```tools/export_model.py```导出模型 + +``` +python tools/export_model.py -c configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams +``` + +预测模型会默认导出到```output_inference/fairmot_hrnetv2_w18_dlafpn_30e_576x320```目录下,包括```infer_cfg.yml```, ```model.pdiparams```, ```model.pdiparams.info```, ```model.pdmodel``` + +导出模型也可以通过[预测模型列表]()直接下载使用 + +## 三、C++预测 + +完成以上步骤后,可以通过```build/main```进行预测,参数列表如下: + +| 参数 | 说明 | +| ---- | ---- | +| --track_model_dir | 导出的跟踪预测模型所在路径 | +| --video_file | 要预测的视频文件路径 | +| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| +| --gpu_id | 指定进行推理的GPU device id(默认值为0)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --output_dir | 输出图片所在的文件夹, 默认为output | +| --use_mkldnn | CPU预测中是否开启MKLDNN加速 | +| --cpu_threads | 设置cpu线程数,默认为1 | +| --do_entrance_counting | 是否进行出入口流量统计,默认为否 | +| --save_result | 是否保存跟踪结果 | + +样例一: + +```shell +# 使用CPU测试视频 `test.mp4` , 模型和测试视频均移至`build`目录下 + +./main --track_model_dir=./fairmot_hrnetv2_w18_dlafpn_30e_576x320 --video_file=test.mp4 + +# 视频可视化预测结果默认保存在当前目录下output/test.mp4文件中 +``` + + +样例二: + +```shell +# 使用GPU测试视频 `test.mp4` , 模型和测试视频均移至`build`目录下,实现出入口计数功能,并保存跟踪结果 + +./main -video_file=test.mp4 -track_model_dir=./fairmot_dla34_30e_1088x608/ --device=gpu --do_entrance_counting=True --save_result=True + +# 视频可视化预测结果默认保存在当前目录下`output/test.mp4`中 +# 跟踪结果保存在`output/mot_output.txt`中 +# 计数结果保存在`output/flow_statistic.txt`中 +``` diff --git a/deploy/pptracking/cpp/cmake/yaml-cpp.cmake b/deploy/pptracking/cpp/cmake/yaml-cpp.cmake new file mode 100644 index 0000000000000000000000000000000000000000..7bc7f34d476d69d57336940bcf6c8c55311b8112 --- /dev/null +++ b/deploy/pptracking/cpp/cmake/yaml-cpp.cmake @@ -0,0 +1,30 @@ + +find_package(Git REQUIRED) + +include(ExternalProject) + +message("${CMAKE_BUILD_TYPE}") + +ExternalProject_Add( + ext-yaml-cpp + URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip + URL_MD5 9542d6de397d1fbd649ed468cb5850e6 + CMAKE_ARGS + -DYAML_CPP_BUILD_TESTS=OFF + -DYAML_CPP_BUILD_TOOLS=OFF + -DYAML_CPP_INSTALL=OFF + -DYAML_CPP_BUILD_CONTRIB=OFF + -DMSVC_SHARED_RT=OFF + -DBUILD_SHARED_LIBS=OFF + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib + -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib + PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp" + # Disable install step + INSTALL_COMMAND "" + LOG_DOWNLOAD ON + LOG_BUILD 1 +) diff --git a/deploy/pptracking/cpp/include/config_parser.h b/deploy/pptracking/cpp/include/config_parser.h new file mode 100644 index 0000000000000000000000000000000000000000..b801d62dbcd359c48c12e9e05f01bc3dc9fe508f --- /dev/null +++ b/deploy/pptracking/cpp/include/config_parser.h @@ -0,0 +1,137 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +#include "yaml-cpp/yaml.h" + +#ifdef _WIN32 +#define OS_PATH_SEP "\\" +#else +#define OS_PATH_SEP "/" +#endif + +namespace PaddleDetection { + +// Inference model configuration parser +class ConfigPaser { + public: + ConfigPaser() {} + + ~ConfigPaser() {} + + bool load_config(const std::string& model_dir, + const std::string& cfg = "infer_cfg.yml") { + // Load as a YAML::Node + YAML::Node config; + config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg); + + // Get runtime mode : fluid, trt_fp16, trt_fp32 + if (config["mode"].IsDefined()) { + mode_ = config["mode"].as(); + } else { + std::cerr << "Please set mode, " + << "support value : fluid/trt_fp16/trt_fp32." << std::endl; + return false; + } + + // Get model arch: FairMot or YOLO/Picodet/LCNet for DeepSort + if (config["arch"].IsDefined()) { + arch_ = config["arch"].as(); + } else { + std::cerr << "Please set model arch," + << "support value : FairMot, YOLO, PicoDet, LCNet etc" + << std::endl; + return false; + } + + // Get min_subgraph_size for tensorrt + if (config["min_subgraph_size"].IsDefined()) { + min_subgraph_size_ = config["min_subgraph_size"].as(); + } else { + std::cerr << "Please set min_subgraph_size." << std::endl; + return false; + } + // Get draw_threshold for visualization + if (config["draw_threshold"].IsDefined()) { + draw_threshold_ = config["draw_threshold"].as(); + } else { + std::cerr << "Please set draw_threshold." << std::endl; + return false; + } + // Get Preprocess for preprocessing + if (config["Preprocess"].IsDefined()) { + preprocess_info_ = config["Preprocess"]; + } else { + std::cerr << "Please set Preprocess." << std::endl; + return false; + } + // Get label_list for visualization + if (config["label_list"].IsDefined()) { + label_list_ = config["label_list"].as>(); + } else { + std::cerr << "Please set label_list." << std::endl; + return false; + } + + // Get use_dynamic_shape for TensorRT + if (config["use_dynamic_shape"].IsDefined()) { + use_dynamic_shape_ = config["use_dynamic_shape"].as(); + } else { + std::cerr << "Please set use_dynamic_shape." << std::endl; + return false; + } + + // Get conf_thresh for tracker + if (config["tracker"].IsDefined()) { + if (config["tracker"]["conf_thres"].IsDefined()) { + conf_thresh_ = config["tracker"]["conf_thres"].as(); + } else { + std::cerr << "Please set conf_thres in tracker." << std::endl; + return false; + } + } + + // Get NMS for postprocess + if (config["NMS"].IsDefined()) { + nms_info_ = config["NMS"]; + } + // Get fpn_stride in PicoDet + if (config["fpn_stride"].IsDefined()) { + fpn_stride_.clear(); + for (auto item : config["fpn_stride"]) { + fpn_stride_.emplace_back(item.as()); + } + } + + return true; + } + std::string mode_; + float draw_threshold_; + std::string arch_; + int min_subgraph_size_; + YAML::Node preprocess_info_; + YAML::Node nms_info_; + std::vector label_list_; + std::vector fpn_stride_; + bool use_dynamic_shape_; + float conf_thresh_; +}; + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/include/jde_predictor.h b/deploy/pptracking/cpp/include/jde_predictor.h new file mode 100644 index 0000000000000000000000000000000000000000..53f1fb937194aeb7425fc8bcb26215dab31e36a0 --- /dev/null +++ b/deploy/pptracking/cpp/include/jde_predictor.h @@ -0,0 +1,97 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "paddle_inference_api.h" // NOLINT + +#include "include/config_parser.h" +#include "include/preprocess_op.h" +#include "include/utils.h" + +using namespace paddle_infer; // NOLINT + +namespace PaddleDetection { + +class JDEPredictor { + public: + explicit JDEPredictor(const std::string& device = "CPU", + const std::string& model_dir = "", + const double threshold = -1., + const std::string& run_mode = "fluid", + const int gpu_id = 0, + const bool use_mkldnn = false, + const int cpu_threads = 1, + bool trt_calib_mode = false, + const int min_box_area = 200) { + this->device_ = device; + this->gpu_id_ = gpu_id; + this->use_mkldnn_ = use_mkldnn; + this->cpu_math_library_num_threads_ = cpu_threads; + this->trt_calib_mode_ = trt_calib_mode; + this->min_box_area_ = min_box_area; + + config_.load_config(model_dir); + this->min_subgraph_size_ = config_.min_subgraph_size_; + preprocessor_.Init(config_.preprocess_info_); + LoadModel(model_dir, run_mode); + this->conf_thresh_ = config_.conf_thresh_; + } + + // Load Paddle inference model + void LoadModel(const std::string& model_dir, + const std::string& run_mode = "fluid"); + + // Run predictor + void Predict(const std::vector imgs, + const double threshold = 0.5, + MOTResult* result = nullptr, + std::vector* times = nullptr); + + private: + std::string device_ = "CPU"; + float threhold = 0.5; + int gpu_id_ = 0; + bool use_mkldnn_ = false; + int cpu_math_library_num_threads_ = 1; + int min_subgraph_size_ = 3; + bool trt_calib_mode_ = false; + + // Preprocess image and copy data to input buffer + void Preprocess(const cv::Mat& image_mat); + // Postprocess result + void Postprocess(const cv::Mat dets, const cv::Mat emb, MOTResult* result); + + std::shared_ptr predictor_; + Preprocessor preprocessor_; + ImageBlob inputs_; + std::vector bbox_data_; + std::vector emb_data_; + double threshold_; + ConfigPaser config_; + float min_box_area_; + float conf_thresh_; +}; + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/include/lapjv.h b/deploy/pptracking/cpp/include/lapjv.h new file mode 100644 index 0000000000000000000000000000000000000000..ffaa010c00525e9babac1bfe276189187dc840f8 --- /dev/null +++ b/deploy/pptracking/cpp/include/lapjv.h @@ -0,0 +1,64 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/gatagat/lap/blob/master/lap/lapjv.h +// Ths copyright of gatagat/lap is as follows: +// MIT License + +#ifndef DEPLOY_PPTRACKING_CPP_INCLUDE_LAPJV_H_ +#define DEPLOY_PPTRACKING_CPP_INCLUDE_LAPJV_H_ +#define LARGE 1000000 + +#if !defined TRUE +#define TRUE 1 +#endif +#if !defined FALSE +#define FALSE 0 +#endif + +#define NEW(x, t, n) \ + if ((x = reinterpret_cast(malloc(sizeof(t) * (n)))) == 0) { \ + return -1; \ + } +#define FREE(x) \ + if (x != 0) { \ + free(x); \ + x = 0; \ + } +#define SWAP_INDICES(a, b) \ + { \ + int_t _temp_index = a; \ + a = b; \ + b = _temp_index; \ + } +#include + +namespace PaddleDetection { + +typedef signed int int_t; +typedef unsigned int uint_t; +typedef double cost_t; +typedef char boolean; +typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t; + +int lapjv_internal(const cv::Mat &cost, + const bool extend_cost, + const float cost_limit, + int *x, + int *y); + +} // namespace PaddleDetection + +#endif // DEPLOY_PPTRACKING_CPP_INCLUDE_LAPJV_H_ diff --git a/deploy/pptracking/cpp/include/pipeline.h b/deploy/pptracking/cpp/include/pipeline.h new file mode 100644 index 0000000000000000000000000000000000000000..f3e6799b132991907f01acbdfd6803578577182d --- /dev/null +++ b/deploy/pptracking/cpp/include/pipeline.h @@ -0,0 +1,142 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef DEPLOY_PPTRACKING_CPP_INCLUDE_PIPELINE_H_ +#define DEPLOY_PPTRACKING_CPP_INCLUDE_PIPELINE_H_ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#elif LINUX +#include +#include +#endif + +#include "include/jde_predictor.h" +#include "include/sde_predictor.h" + +namespace PaddleDetection { + +class Pipeline { + public: + explicit Pipeline(const std::string& device, + const double threshold, + const std::string& output_dir, + const std::string& run_mode = "fluid", + const int gpu_id = 0, + const bool use_mkldnn = false, + const int cpu_threads = 1, + const bool trt_calib_mode = false, + const bool do_entrance_counting = false, + const bool save_result = false, + const std::string& scene = "pedestrian", + const bool tiny_obj = false, + const bool is_mtmct = false, + const int secs_interval = 10, + const std::string track_model_dir = "", + const std::string det_model_dir = "", + const std::string reid_model_dir = "") { + std::vector input; + this->input_ = input; + this->device_ = device; + this->threshold_ = threshold; + this->output_dir_ = output_dir; + this->run_mode_ = run_mode; + this->gpu_id_ = gpu_id; + this->use_mkldnn_ = use_mkldnn; + this->cpu_threads_ = cpu_threads; + this->trt_calib_mode_ = trt_calib_mode; + this->do_entrance_counting_ = do_entrance_counting; + this->secs_interval_ = secs_interval_; + this->save_result_ = save_result; + SelectModel(scene, + tiny_obj, + is_mtmct, + track_model_dir, + det_model_dir, + reid_model_dir); + InitPredictor(); + } + + // Set input, it must execute before Run() + void SetInput(const std::string& input_video); + void ClearInput(); + + // Run pipeline in video + void Run(); + void PredictMOT(const std::string& video_path); + void PredictMTMCT(const std::vector video_inputs); + + // Run pipeline in stream + void RunMOTStream(const cv::Mat img, + const int frame_id, + const int video_fps, + const Rect entrance, + cv::Mat out_img, + std::vector* records, + std::set* count_set, + std::set* interval_count_set, + std::vector* in_count_list, + std::vector* out_count_list, + std::map>* prev_center, + std::vector* flow_records); + void RunMTMCTStream(const std::vector imgs, + std::vector* records); + + void PrintBenchmarkLog(const std::vector det_time, const int img_num); + + private: + // Select model according to scenes, it must execute before Run() + void SelectModel(const std::string& scene = "pedestrian", + const bool tiny_obj = false, + const bool is_mtmct = false, + const std::string track_model_dir = "", + const std::string det_model_dir = "", + const std::string reid_model_dir = ""); + void InitPredictor(); + + std::shared_ptr jde_sct_; + std::shared_ptr sde_sct_; + + std::vector input_; + std::vector stream_; + std::string device_; + double threshold_; + std::string output_dir_; + std::string track_model_dir_; + std::string det_model_dir_; + std::string reid_model_dir_; + std::string run_mode_ = "fluid"; + int gpu_id_ = 0; + bool use_mkldnn_ = false; + int cpu_threads_ = 1; + bool trt_calib_mode_ = false; + bool do_entrance_counting_ = false; + bool save_result_ = false; + int secs_interval_ = 10; +}; + +} // namespace PaddleDetection + +#endif // DEPLOY_PPTRACKING_CPP_INCLUDE_PIPELINE_H_ diff --git a/deploy/pptracking/cpp/include/postprocess.h b/deploy/pptracking/cpp/include/postprocess.h new file mode 100644 index 0000000000000000000000000000000000000000..41b10960351d69a0e3388adfc763c11f162a0bbd --- /dev/null +++ b/deploy/pptracking/cpp/include/postprocess.h @@ -0,0 +1,62 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "include/utils.h" + +namespace PaddleDetection { + +// Generate visualization color +cv::Scalar GetColor(int idx); + +// Visualize Tracking Results +cv::Mat VisualizeTrackResult(const cv::Mat& img, + const MOTResult& results, + const float fps, + const int frame_id); + +// Pedestrian/Vehicle Counting +void FlowStatistic(const MOTResult& results, + const int frame_id, + const int secs_interval, + const bool do_entrance_counting, + const int video_fps, + const Rect entrance, + std::set* id_set, + std::set* interval_id_set, + std::vector* in_id_list, + std::vector* out_id_list, + std::map>* prev_center, + std::vector* records); + +// Save Tracking Results +void SaveMOTResult(const MOTResult& results, + const int frame_id, + std::vector* records); + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/include/predictor.h b/deploy/pptracking/cpp/include/predictor.h new file mode 100644 index 0000000000000000000000000000000000000000..f4c4168725ae1e9b0575bd3af4aec662a7f35c5a --- /dev/null +++ b/deploy/pptracking/cpp/include/predictor.h @@ -0,0 +1,99 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "paddle_inference_api.h" // NOLINT + +#include "include/config_parser.h" +#include "include/jde_predictor.h" +#include "include/preprocess_op.h" +#include "include/sde_predictor.h" + +using namespace paddle_infer; // NOLINT + +namespace PaddleDetection { + +class Predictor { + public: + explicit Predictor(const std::string& device = "CPU", + const std::string& track_model_dir = "", + const std::string& det_model_dir = "", + const std::string& reid_model_dir = "", + const double threshold = -1., + const std::string& run_mode = "fluid", + const int gpu_id = 0, + const bool use_mkldnn = false, + const int cpu_threads = 1, + bool trt_calib_mode = false, + const int min_box_area = 200) { + if (track_model_dir.empty() && det_model_dir.empty()) { + throw "Predictor must receive track_model or det_model!"; + } + + if (!track_model_dir.empty() && !det_model_dir.empty()) { + throw "Predictor only receive one of track_model or det_model!"; + } + + if (!track_model_dir.empty()) { + jde_sct_ = + std::make_shared(device, + track_model_dir, + threshold, + run_mode, + gpu_id, + use_mkldnn, + cpu_threads, + trt_calib_mode, + min_box_area); + use_jde_ = true; + } + if (!det_model_dir.empty()) { + sde_sct_ = std::make_shared(device, + det_model_dir, + reid_model_dir, + threshold, + run_mode, + gpu_id, + use_mkldnn, + cpu_threads, + trt_calib_mode, + min_box_area); + use_jde_ = false; + } + } + + // Run predictor + void Predict(const std::vector imgs, + const double threshold = 0.5, + MOTResult* result = nullptr, + std::vector* times = nullptr); + + private: + std::shared_ptr jde_sct_; + std::shared_ptr sde_sct_; + bool use_jde_ = true; +}; + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/include/preprocess_op.h b/deploy/pptracking/cpp/include/preprocess_op.h new file mode 100644 index 0000000000000000000000000000000000000000..b45388c91cde778b9b83e8f1c297878edf02573e --- /dev/null +++ b/deploy/pptracking/cpp/include/preprocess_op.h @@ -0,0 +1,171 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace PaddleDetection { + +// Object for storing all preprocessed data +class ImageBlob { + public: + // image width and height + std::vector im_shape_; + // Buffer for image data after preprocessing + std::vector im_data_; + // in net data shape(after pad) + std::vector in_net_shape_; + // Evaluation image width and height + // std::vector eval_im_size_f_; + // Scale factor for image size to origin image size + std::vector scale_factor_; +}; + +// Abstraction of preprocessing opration class +class PreprocessOp { + public: + virtual void Init(const YAML::Node& item) = 0; + virtual void Run(cv::Mat* im, ImageBlob* data) = 0; +}; + +class InitInfo : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) {} + virtual void Run(cv::Mat* im, ImageBlob* data); +}; + +class NormalizeImage : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + mean_ = item["mean"].as>(); + scale_ = item["std"].as>(); + is_scale_ = item["is_scale"].as(); + } + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + // CHW or HWC + std::vector mean_; + std::vector scale_; + bool is_scale_; +}; + +class Permute : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) {} + virtual void Run(cv::Mat* im, ImageBlob* data); +}; + +class Resize : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + interp_ = item["interp"].as(); + keep_ratio_ = item["keep_ratio"].as(); + target_size_ = item["target_size"].as>(); + } + + // Compute best resize scale for x-dimension, y-dimension + std::pair GenerateScale(const cv::Mat& im); + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + int interp_; + bool keep_ratio_; + std::vector target_size_; + std::vector in_net_shape_; +}; + +class LetterBoxResize : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + target_size_ = item["target_size"].as>(); + } + + float GenerateScale(const cv::Mat& im); + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + std::vector target_size_; + std::vector in_net_shape_; +}; +// Models with FPN need input shape % stride == 0 +class PadStride : public PreprocessOp { + public: + virtual void Init(const YAML::Node& item) { + stride_ = item["stride"].as(); + } + + virtual void Run(cv::Mat* im, ImageBlob* data); + + private: + int stride_; +}; + +class Preprocessor { + public: + void Init(const YAML::Node& config_node) { + // initialize image info at first + ops_["InitInfo"] = std::make_shared(); + for (const auto& item : config_node) { + auto op_name = item["type"].as(); + + ops_[op_name] = CreateOp(op_name); + ops_[op_name]->Init(item); + } + } + + std::shared_ptr CreateOp(const std::string& name) { + if (name == "Resize") { + return std::make_shared(); + } else if (name == "LetterBoxResize") { + return std::make_shared(); + } else if (name == "Permute") { + return std::make_shared(); + } else if (name == "NormalizeImage") { + return std::make_shared(); + } else if (name == "PadStride") { + // use PadStride instead of PadBatch + return std::make_shared(); + } + std::cerr << "can not find function of OP: " << name + << " and return: nullptr" << std::endl; + return nullptr; + } + + void Run(cv::Mat* im, ImageBlob* data); + + public: + static const std::vector RUN_ORDER; + + private: + std::unordered_map> ops_; +}; + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/include/sde_predictor.h b/deploy/pptracking/cpp/include/sde_predictor.h new file mode 100644 index 0000000000000000000000000000000000000000..f05a8644dfa26e00df47aebc212f2ff3b5a098f7 --- /dev/null +++ b/deploy/pptracking/cpp/include/sde_predictor.h @@ -0,0 +1,106 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "paddle_inference_api.h" // NOLINT + +#include "include/config_parser.h" +#include "include/preprocess_op.h" +#include "include/utils.h" + +using namespace paddle_infer; // NOLINT + +namespace PaddleDetection { + +class SDEPredictor { + public: + explicit SDEPredictor(const std::string& device, + const std::string& det_model_dir = "", + const std::string& reid_model_dir = "", + const double threshold = -1., + const std::string& run_mode = "fluid", + const int gpu_id = 0, + const bool use_mkldnn = false, + const int cpu_threads = 1, + bool trt_calib_mode = false, + const int min_box_area = 200) { + this->device_ = device; + this->gpu_id_ = gpu_id; + this->use_mkldnn_ = use_mkldnn; + this->cpu_math_library_num_threads_ = cpu_threads; + this->trt_calib_mode_ = trt_calib_mode; + this->min_box_area_ = min_box_area; + + det_config_.load_config(det_model_dir); + this->min_subgraph_size_ = det_config_.min_subgraph_size_; + det_preprocessor_.Init(det_config_.preprocess_info_); + + reid_config_.load_config(reid_model_dir); + reid_preprocessor_.Init(reid_config_.preprocess_info_); + + LoadModel(det_model_dir, reid_model_dir, run_mode); + this->conf_thresh_ = det_config_.conf_thresh_; + } + + // Load Paddle inference model + void LoadModel(const std::string& det_model_dir, + const std::string& reid_model_dir, + const std::string& run_mode = "fluid"); + + // Run predictor + void Predict(const std::vector imgs, + const double threshold = 0.5, + MOTResult* result = nullptr, + std::vector* times = nullptr); + + private: + std::string device_ = "CPU"; + float threhold = 0.5; + int gpu_id_ = 0; + bool use_mkldnn_ = false; + int cpu_math_library_num_threads_ = 1; + int min_subgraph_size_ = 3; + bool trt_calib_mode_ = false; + + // Preprocess image and copy data to input buffer + void Preprocess(const cv::Mat& image_mat); + // Postprocess result + void Postprocess(const cv::Mat dets, const cv::Mat emb, MOTResult* result); + + std::shared_ptr det_predictor_; + std::shared_ptr reid_predictor_; + Preprocessor det_preprocessor_; + Preprocessor reid_preprocessor_; + ImageBlob inputs_; + std::vector bbox_data_; + std::vector emb_data_; + double threshold_; + ConfigPaser det_config_; + ConfigPaser reid_config_; + float min_box_area_ = 200; + float conf_thresh_; +}; + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/include/tracker.h b/deploy/pptracking/cpp/include/tracker.h new file mode 100644 index 0000000000000000000000000000000000000000..244530f140a3728b1f37d032c2d74693bd7e8f74 --- /dev/null +++ b/deploy/pptracking/cpp/include/tracker.h @@ -0,0 +1,72 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.h +// Ths copyright of CnybTseng/JDE is as follows: +// MIT License + +#pragma once + +#include +#include + +#include +#include +#include +#include "include/trajectory.h" + +namespace PaddleDetection { + +typedef std::map Match; +typedef std::map::iterator MatchIterator; + +struct Track { + int id; + float score; + cv::Vec4f ltrb; +}; + +class JDETracker { + public: + static JDETracker *instance(void); + virtual bool update(const cv::Mat &dets, + const cv::Mat &emb, + std::vector *tracks); + + private: + JDETracker(void); + virtual ~JDETracker(void) {} + cv::Mat motion_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b); + void linear_assignment(const cv::Mat &cost, + float cost_limit, + Match *matches, + std::vector *mismatch_row, + std::vector *mismatch_col); + void remove_duplicate_trajectory(TrajectoryPool *a, + TrajectoryPool *b, + float iou_thresh = 0.15f); + + private: + static JDETracker *me; + int timestamp; + TrajectoryPool tracked_trajectories; + TrajectoryPool lost_trajectories; + TrajectoryPool removed_trajectories; + int max_lost_time; + float lambda; + float det_thresh; +}; + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/include/trajectory.h b/deploy/pptracking/cpp/include/trajectory.h new file mode 100644 index 0000000000000000000000000000000000000000..c21e8cac368a77983da2844794b7c778a97573a3 --- /dev/null +++ b/deploy/pptracking/cpp/include/trajectory.h @@ -0,0 +1,230 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.h +// Ths copyright of CnybTseng/JDE is as follows: +// MIT License + +#pragma once + +#include + +#include +#include +#include +#include "opencv2/video/tracking.hpp" + +namespace PaddleDetection { + +typedef enum { New = 0, Tracked = 1, Lost = 2, Removed = 3 } TrajectoryState; + +class Trajectory; +typedef std::vector TrajectoryPool; +typedef std::vector::iterator TrajectoryPoolIterator; +typedef std::vector TrajectoryPtrPool; +typedef std::vector::iterator TrajectoryPtrPoolIterator; + +class TKalmanFilter : public cv::KalmanFilter { + public: + TKalmanFilter(void); + virtual ~TKalmanFilter(void) {} + virtual void init(const cv::Mat &measurement); + virtual const cv::Mat &predict(); + virtual const cv::Mat &correct(const cv::Mat &measurement); + virtual void project(cv::Mat *mean, cv::Mat *covariance) const; + + private: + float std_weight_position; + float std_weight_velocity; +}; + +inline TKalmanFilter::TKalmanFilter(void) : cv::KalmanFilter(8, 4) { + cv::KalmanFilter::transitionMatrix = cv::Mat::eye(8, 8, CV_32F); + for (int i = 0; i < 4; ++i) + cv::KalmanFilter::transitionMatrix.at(i, i + 4) = 1; + cv::KalmanFilter::measurementMatrix = cv::Mat::eye(4, 8, CV_32F); + std_weight_position = 1 / 20.f; + std_weight_velocity = 1 / 160.f; +} + +class Trajectory : public TKalmanFilter { + public: + Trajectory(); + Trajectory(const cv::Vec4f <rb, float score, const cv::Mat &embedding); + Trajectory(const Trajectory &other); + Trajectory &operator=(const Trajectory &rhs); + virtual ~Trajectory(void) {} + + static int next_id(); + virtual const cv::Mat &predict(void); + virtual void update(Trajectory *traj, + int timestamp, + bool update_embedding = true); + virtual void activate(int timestamp); + virtual void reactivate(Trajectory *traj, int timestamp, bool newid = false); + virtual void mark_lost(void); + virtual void mark_removed(void); + + friend TrajectoryPool operator+(const TrajectoryPool &a, + const TrajectoryPool &b); + friend TrajectoryPool operator+(const TrajectoryPool &a, + const TrajectoryPtrPool &b); + friend TrajectoryPool &operator+=(TrajectoryPool &a, // NOLINT + const TrajectoryPtrPool &b); + friend TrajectoryPool operator-(const TrajectoryPool &a, + const TrajectoryPool &b); + friend TrajectoryPool &operator-=(TrajectoryPool &a, // NOLINT + const TrajectoryPool &b); + friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b); + friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, + TrajectoryPool *b); + friend TrajectoryPtrPool operator-(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b); + + friend cv::Mat embedding_distance(const TrajectoryPool &a, + const TrajectoryPool &b); + friend cv::Mat embedding_distance(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b); + friend cv::Mat embedding_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b); + + friend cv::Mat mahalanobis_distance(const TrajectoryPool &a, + const TrajectoryPool &b); + friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b); + friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b); + + friend cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b); + friend cv::Mat iou_distance(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b); + friend cv::Mat iou_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b); + + private: + void update_embedding(const cv::Mat &embedding); + + public: + TrajectoryState state; + cv::Vec4f ltrb; + cv::Mat smooth_embedding; + int id; + bool is_activated; + int timestamp; + int starttime; + float score; + + private: + static int count; + cv::Vec4f xyah; + cv::Mat current_embedding; + float eta; + int length; +}; + +inline cv::Vec4f ltrb2xyah(const cv::Vec4f <rb) { + cv::Vec4f xyah; + xyah[0] = (ltrb[0] + ltrb[2]) * 0.5f; + xyah[1] = (ltrb[1] + ltrb[3]) * 0.5f; + xyah[3] = ltrb[3] - ltrb[1]; + xyah[2] = (ltrb[2] - ltrb[0]) / xyah[3]; + return xyah; +} + +inline Trajectory::Trajectory() + : state(New), + ltrb(cv::Vec4f()), + smooth_embedding(cv::Mat()), + id(0), + is_activated(false), + timestamp(0), + starttime(0), + score(0), + eta(0.9), + length(0) {} + +inline Trajectory::Trajectory(const cv::Vec4f <rb_, + float score_, + const cv::Mat &embedding) + : state(New), + ltrb(ltrb_), + smooth_embedding(cv::Mat()), + id(0), + is_activated(false), + timestamp(0), + starttime(0), + score(score_), + eta(0.9), + length(0) { + xyah = ltrb2xyah(ltrb); + update_embedding(embedding); +} + +inline Trajectory::Trajectory(const Trajectory &other) + : state(other.state), + ltrb(other.ltrb), + id(other.id), + is_activated(other.is_activated), + timestamp(other.timestamp), + starttime(other.starttime), + xyah(other.xyah), + score(other.score), + eta(other.eta), + length(other.length) { + other.smooth_embedding.copyTo(smooth_embedding); + other.current_embedding.copyTo(current_embedding); + // copy state in KalmanFilter + + other.statePre.copyTo(cv::KalmanFilter::statePre); + other.statePost.copyTo(cv::KalmanFilter::statePost); + other.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre); + other.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost); +} + +inline Trajectory &Trajectory::operator=(const Trajectory &rhs) { + this->state = rhs.state; + this->ltrb = rhs.ltrb; + rhs.smooth_embedding.copyTo(this->smooth_embedding); + this->id = rhs.id; + this->is_activated = rhs.is_activated; + this->timestamp = rhs.timestamp; + this->starttime = rhs.starttime; + this->xyah = rhs.xyah; + this->score = rhs.score; + rhs.current_embedding.copyTo(this->current_embedding); + this->eta = rhs.eta; + this->length = rhs.length; + + // copy state in KalmanFilter + + rhs.statePre.copyTo(cv::KalmanFilter::statePre); + rhs.statePost.copyTo(cv::KalmanFilter::statePost); + rhs.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre); + rhs.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost); + + return *this; +} + +inline int Trajectory::next_id() { + ++count; + return count; +} + +inline void Trajectory::mark_lost(void) { state = Lost; } + +inline void Trajectory::mark_removed(void) { state = Removed; } + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/include/utils.h b/deploy/pptracking/cpp/include/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..9d94492a430be3e25ceb572ceb181a5dce755637 --- /dev/null +++ b/deploy/pptracking/cpp/include/utils.h @@ -0,0 +1,44 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "include/tracker.h" + +namespace PaddleDetection { + +struct Rect { + float left; + float top; + float right; + float bottom; +}; + +struct MOTTrack { + int ids; + float score; + Rect rects; + int class_id = -1; +}; + +typedef std::vector MOTResult; + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/scripts/build.sh b/deploy/pptracking/cpp/scripts/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..8b8d2cf7f970e774fd838aecefa08e258d569fe6 --- /dev/null +++ b/deploy/pptracking/cpp/scripts/build.sh @@ -0,0 +1,78 @@ +# 是否使用GPU(即是否使用 CUDA) +WITH_GPU=OFF + +# 是否使用MKL or openblas,TX2需要设置为OFF +WITH_MKL=ON + +# 是否集成 TensorRT(仅WITH_GPU=ON 有效) +WITH_TENSORRT=OFF + +# paddle 预测库lib名称,由于不同平台不同版本预测库lib名称不同,请查看所下载的预测库中`paddle_inference/lib/`文件夹下`lib`的名称 +PADDLE_LIB_NAME=libpaddle_inference + +# TensorRT 的include路径 +TENSORRT_INC_DIR=/path/to/tensorrt/include + +# TensorRT 的lib路径 +TENSORRT_LIB_DIR=/path/to/tensorrt/lib + +# Paddle 预测库路径 +PADDLE_DIR=/path/to/paddle_inference + +# CUDA 的 lib 路径 +CUDA_LIB=/path/to/cuda/lib + +# CUDNN 的 lib 路径 +CUDNN_LIB=/path/to/cudnn/lib + +MACHINE_TYPE=`uname -m` +echo "MACHINE_TYPE: "${MACHINE_TYPE} + + +if [ "$MACHINE_TYPE" = "x86_64" ] +then + echo "set OPENCV_DIR for x86_64" + # linux系统通过以下命令下载预编译的opencv + mkdir -p $(pwd)/deps && cd $(pwd)/deps + wget -c https://paddledet.bj.bcebos.com/data/opencv-3.4.16_gcc8.2_ffmpeg.tar.gz + tar -xvf opencv-3.4.16_gcc8.2_ffmpeg.tar.gz && cd .. + + # set OPENCV_DIR + OPENCV_DIR=$(pwd)/deps/opencv-3.4.16_gcc8.2_ffmpeg + +elif [ "$MACHINE_TYPE" = "aarch64" ] +then + echo "set OPENCV_DIR for aarch64" + # TX2平台通过以下命令下载预编译的opencv + mkdir -p $(pwd)/deps && cd $(pwd)/deps + wget -c https://bj.bcebos.com/v1/paddledet/data/TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0.tar.gz + tar -xvf TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0.tar.gz && cd .. + + # set OPENCV_DIR + OPENCV_DIR=$(pwd)/deps/TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0/ + +else + echo "Please set OPENCV_DIR manually" +fi + +echo "OPENCV_DIR: "$OPENCV_DIR + +# 以下无需改动 +rm -rf build +mkdir -p build +cd build +cmake .. \ + -DWITH_GPU=${WITH_GPU} \ + -DWITH_MKL=${WITH_MKL} \ + -DWITH_TENSORRT=${WITH_TENSORRT} \ + -DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR} \ + -DTENSORRT_INC_DIR=${TENSORRT_INC_DIR} \ + -DPADDLE_DIR=${PADDLE_DIR} \ + -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \ + -DCUDA_LIB=${CUDA_LIB} \ + -DCUDNN_LIB=${CUDNN_LIB} \ + -DOPENCV_DIR=${OPENCV_DIR} \ + -DPADDLE_LIB_NAME=${PADDLE_LIB_NAME} \ + +make +echo "make finished!" diff --git a/deploy/pptracking/cpp/src/jde_predictor.cc b/deploy/pptracking/cpp/src/jde_predictor.cc new file mode 100644 index 0000000000000000000000000000000000000000..2abad38b583d7bf8b76930f6b006db9e9f32d764 --- /dev/null +++ b/deploy/pptracking/cpp/src/jde_predictor.cc @@ -0,0 +1,234 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +// for setprecision +#include +#include +#include "include/jde_predictor.h" + +using namespace paddle_infer; // NOLINT + +namespace PaddleDetection { + +// Load Model and create model predictor +void JDEPredictor::LoadModel(const std::string& model_dir, + const std::string& run_mode) { + paddle_infer::Config config; + std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel"; + std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams"; + config.SetModel(prog_file, params_file); + if (this->device_ == "GPU") { + config.EnableUseGpu(200, this->gpu_id_); + config.SwitchIrOptim(true); + // use tensorrt + if (run_mode != "fluid") { + auto precision = paddle_infer::Config::Precision::kFloat32; + if (run_mode == "trt_fp32") { + precision = paddle_infer::Config::Precision::kFloat32; + } else if (run_mode == "trt_fp16") { + precision = paddle_infer::Config::Precision::kHalf; + } else if (run_mode == "trt_int8") { + precision = paddle_infer::Config::Precision::kInt8; + } else { + printf( + "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'"); + } + // set tensorrt + config.EnableTensorRtEngine(1 << 30, + 1, + this->min_subgraph_size_, + precision, + false, + this->trt_calib_mode_); + } + } else if (this->device_ == "XPU") { + config.EnableXpu(10 * 1024 * 1024); + } else { + config.DisableGpu(); + if (this->use_mkldnn_) { + config.EnableMKLDNN(); + // cache 10 different shapes for mkldnn to avoid memory leak + config.SetMkldnnCacheCapacity(10); + } + config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_); + } + config.SwitchUseFeedFetchOps(false); + config.SwitchIrOptim(true); + config.DisableGlogInfo(); + // Memory optimization + config.EnableMemoryOptim(); + predictor_ = std::move(CreatePredictor(config)); +} + +void FilterDets(const float conf_thresh, + const cv::Mat dets, + std::vector* index) { + for (int i = 0; i < dets.rows; ++i) { + float score = *dets.ptr(i, 4); + if (score > conf_thresh) { + index->push_back(i); + } + } +} + +void JDEPredictor::Preprocess(const cv::Mat& ori_im) { + // Clone the image : keep the original mat for postprocess + cv::Mat im = ori_im.clone(); + preprocessor_.Run(&im, &inputs_); +} + +void JDEPredictor::Postprocess(const cv::Mat dets, + const cv::Mat emb, + MOTResult* result) { + result->clear(); + std::vector tracks; + std::vector valid; + FilterDets(conf_thresh_, dets, &valid); + cv::Mat new_dets, new_emb; + for (int i = 0; i < valid.size(); ++i) { + new_dets.push_back(dets.row(valid[i])); + new_emb.push_back(emb.row(valid[i])); + } + JDETracker::instance()->update(new_dets, new_emb, &tracks); + if (tracks.size() == 0) { + MOTTrack mot_track; + Rect ret = {*dets.ptr(0, 0), + *dets.ptr(0, 1), + *dets.ptr(0, 2), + *dets.ptr(0, 3)}; + mot_track.ids = 1; + mot_track.score = *dets.ptr(0, 4); + mot_track.rects = ret; + result->push_back(mot_track); + } else { + std::vector::iterator titer; + for (titer = tracks.begin(); titer != tracks.end(); ++titer) { + if (titer->score < threshold_) { + continue; + } else { + float w = titer->ltrb[2] - titer->ltrb[0]; + float h = titer->ltrb[3] - titer->ltrb[1]; + bool vertical = w / h > 1.6; + float area = w * h; + if (area > min_box_area_ && !vertical) { + MOTTrack mot_track; + Rect ret = { + titer->ltrb[0], titer->ltrb[1], titer->ltrb[2], titer->ltrb[3]}; + mot_track.rects = ret; + mot_track.score = titer->score; + mot_track.ids = titer->id; + result->push_back(mot_track); + } + } + } + } +} + +void JDEPredictor::Predict(const std::vector imgs, + const double threshold, + MOTResult* result, + std::vector* times) { + auto preprocess_start = std::chrono::steady_clock::now(); + int batch_size = imgs.size(); + + // in_data_batch + std::vector in_data_all; + std::vector im_shape_all(batch_size * 2); + std::vector scale_factor_all(batch_size * 2); + + // Preprocess image + for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) { + cv::Mat im = imgs.at(bs_idx); + Preprocess(im); + im_shape_all[bs_idx * 2] = inputs_.im_shape_[0]; + im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1]; + + scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0]; + scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1]; + + in_data_all.insert( + in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end()); + } + + // Prepare input tensor + auto input_names = predictor_->GetInputNames(); + for (const auto& tensor_name : input_names) { + auto in_tensor = predictor_->GetInputHandle(tensor_name); + if (tensor_name == "image") { + int rh = inputs_.in_net_shape_[0]; + int rw = inputs_.in_net_shape_[1]; + in_tensor->Reshape({batch_size, 3, rh, rw}); + in_tensor->CopyFromCpu(in_data_all.data()); + } else if (tensor_name == "im_shape") { + in_tensor->Reshape({batch_size, 2}); + in_tensor->CopyFromCpu(im_shape_all.data()); + } else if (tensor_name == "scale_factor") { + in_tensor->Reshape({batch_size, 2}); + in_tensor->CopyFromCpu(scale_factor_all.data()); + } + } + + auto preprocess_end = std::chrono::steady_clock::now(); + std::vector bbox_shape; + std::vector emb_shape; + + // Run predictor + auto inference_start = std::chrono::steady_clock::now(); + predictor_->Run(); + // Get output tensor + auto output_names = predictor_->GetOutputNames(); + auto bbox_tensor = predictor_->GetOutputHandle(output_names[0]); + bbox_shape = bbox_tensor->shape(); + auto emb_tensor = predictor_->GetOutputHandle(output_names[1]); + emb_shape = emb_tensor->shape(); + // Calculate bbox length + int bbox_size = 1; + for (int j = 0; j < bbox_shape.size(); ++j) { + bbox_size *= bbox_shape[j]; + } + // Calculate emb length + int emb_size = 1; + for (int j = 0; j < emb_shape.size(); ++j) { + emb_size *= emb_shape[j]; + } + + bbox_data_.resize(bbox_size); + bbox_tensor->CopyToCpu(bbox_data_.data()); + + emb_data_.resize(emb_size); + emb_tensor->CopyToCpu(emb_data_.data()); + auto inference_end = std::chrono::steady_clock::now(); + + // Postprocessing result + auto postprocess_start = std::chrono::steady_clock::now(); + result->clear(); + + cv::Mat dets(bbox_shape[0], 6, CV_32FC1, bbox_data_.data()); + cv::Mat emb(bbox_shape[0], emb_shape[1], CV_32FC1, emb_data_.data()); + + Postprocess(dets, emb, result); + + auto postprocess_end = std::chrono::steady_clock::now(); + + std::chrono::duration preprocess_diff = + preprocess_end - preprocess_start; + (*times)[0] += static_cast(preprocess_diff.count() * 1000); + std::chrono::duration inference_diff = inference_end - inference_start; + (*times)[1] += static_cast(inference_diff.count() * 1000); + std::chrono::duration postprocess_diff = + postprocess_end - postprocess_start; + (*times)[2] += static_cast(postprocess_diff.count() * 1000); +} + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/src/lapjv.cpp b/deploy/pptracking/cpp/src/lapjv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bb710e740e5291c5332ad91770e3649ec317ed20 --- /dev/null +++ b/deploy/pptracking/cpp/src/lapjv.cpp @@ -0,0 +1,409 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/gatagat/lap/blob/master/lap/lapjv.cpp +// Ths copyright of gatagat/lap is as follows: +// MIT License + +#include +#include +#include + +#include "include/lapjv.h" + +namespace PaddleDetection { + +/** Column-reduction and reduction transfer for a dense cost matrix. + */ +int _ccrrt_dense( + const int n, float *cost[], int *free_rows, int *x, int *y, float *v) { + int n_free_rows; + bool *unique; + + for (int i = 0; i < n; i++) { + x[i] = -1; + v[i] = LARGE; + y[i] = 0; + } + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + const float c = cost[i][j]; + if (c < v[j]) { + v[j] = c; + y[j] = i; + } + } + } + NEW(unique, bool, n); + memset(unique, TRUE, n); + { + int j = n; + do { + j--; + const int i = y[j]; + if (x[i] < 0) { + x[i] = j; + } else { + unique[i] = FALSE; + y[j] = -1; + } + } while (j > 0); + } + n_free_rows = 0; + for (int i = 0; i < n; i++) { + if (x[i] < 0) { + free_rows[n_free_rows++] = i; + } else if (unique[i]) { + const int j = x[i]; + float min = LARGE; + for (int j2 = 0; j2 < n; j2++) { + if (j2 == static_cast(j)) { + continue; + } + const float c = cost[i][j2] - v[j2]; + if (c < min) { + min = c; + } + } + v[j] -= min; + } + } + FREE(unique); + return n_free_rows; +} + +/** Augmenting row reduction for a dense cost matrix. + */ +int _carr_dense(const int n, + float *cost[], + const int n_free_rows, + int *free_rows, + int *x, + int *y, + float *v) { + int current = 0; + int new_free_rows = 0; + int rr_cnt = 0; + while (current < n_free_rows) { + int i0; + int j1, j2; + float v1, v2, v1_new; + bool v1_lowers; + + rr_cnt++; + const int free_i = free_rows[current++]; + j1 = 0; + v1 = cost[free_i][0] - v[0]; + j2 = -1; + v2 = LARGE; + for (int j = 1; j < n; j++) { + const float c = cost[free_i][j] - v[j]; + if (c < v2) { + if (c >= v1) { + v2 = c; + j2 = j; + } else { + v2 = v1; + v1 = c; + j2 = j1; + j1 = j; + } + } + } + i0 = y[j1]; + v1_new = v[j1] - (v2 - v1); + v1_lowers = v1_new < v[j1]; + if (rr_cnt < current * n) { + if (v1_lowers) { + v[j1] = v1_new; + } else if (i0 >= 0 && j2 >= 0) { + j1 = j2; + i0 = y[j2]; + } + if (i0 >= 0) { + if (v1_lowers) { + free_rows[--current] = i0; + } else { + free_rows[new_free_rows++] = i0; + } + } + } else { + if (i0 >= 0) { + free_rows[new_free_rows++] = i0; + } + } + x[free_i] = j1; + y[j1] = free_i; + } + return new_free_rows; +} + +/** Find columns with minimum d[j] and put them on the SCAN list. + */ +int _find_dense(const int n, int lo, float *d, int *cols, int *y) { + int hi = lo + 1; + float mind = d[cols[lo]]; + for (int k = hi; k < n; k++) { + int j = cols[k]; + if (d[j] <= mind) { + if (d[j] < mind) { + hi = lo; + mind = d[j]; + } + cols[k] = cols[hi]; + cols[hi++] = j; + } + } + return hi; +} + +// Scan all columns in TODO starting from arbitrary column in SCAN +// and try to decrease d of the TODO columns using the SCAN column. +int _scan_dense(const int n, + float *cost[], + int *plo, + int *phi, + float *d, + int *cols, + int *pred, + int *y, + float *v) { + int lo = *plo; + int hi = *phi; + float h, cred_ij; + + while (lo != hi) { + int j = cols[lo++]; + const int i = y[j]; + const float mind = d[j]; + h = cost[i][j] - v[j] - mind; + // For all columns in TODO + for (int k = hi; k < n; k++) { + j = cols[k]; + cred_ij = cost[i][j] - v[j] - h; + if (cred_ij < d[j]) { + d[j] = cred_ij; + pred[j] = i; + if (cred_ij == mind) { + if (y[j] < 0) { + return j; + } + cols[k] = cols[hi]; + cols[hi++] = j; + } + } + } + } + *plo = lo; + *phi = hi; + return -1; +} + +/** Single iteration of modified Dijkstra shortest path algorithm as explained + * in the JV paper. + * + * This is a dense matrix version. + * + * \return The closest free column index. + */ +int find_path_dense(const int n, + float *cost[], + const int start_i, + int *y, + float *v, + int *pred) { + int lo = 0, hi = 0; + int final_j = -1; + int n_ready = 0; + int *cols; + float *d; + + NEW(cols, int, n); + NEW(d, float, n); + + for (int i = 0; i < n; i++) { + cols[i] = i; + pred[i] = start_i; + d[i] = cost[start_i][i] - v[i]; + } + while (final_j == -1) { + // No columns left on the SCAN list. + if (lo == hi) { + n_ready = lo; + hi = _find_dense(n, lo, d, cols, y); + for (int k = lo; k < hi; k++) { + const int j = cols[k]; + if (y[j] < 0) { + final_j = j; + } + } + } + if (final_j == -1) { + final_j = _scan_dense(n, cost, &lo, &hi, d, cols, pred, y, v); + } + } + + { + const float mind = d[cols[lo]]; + for (int k = 0; k < n_ready; k++) { + const int j = cols[k]; + v[j] += d[j] - mind; + } + } + + FREE(cols); + FREE(d); + + return final_j; +} + +/** Augment for a dense cost matrix. + */ +int _ca_dense(const int n, + float *cost[], + const int n_free_rows, + int *free_rows, + int *x, + int *y, + float *v) { + int *pred; + + NEW(pred, int, n); + + for (int *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) { + int i = -1, j; + int k = 0; + + j = find_path_dense(n, cost, *pfree_i, y, v, pred); + while (i != *pfree_i) { + i = pred[j]; + y[j] = i; + SWAP_INDICES(j, x[i]); + k++; + } + } + FREE(pred); + return 0; +} + +/** Solve dense sparse LAP. + */ +int lapjv_internal(const cv::Mat &cost, + const bool extend_cost, + const float cost_limit, + int *x, + int *y) { + int n_rows = cost.rows; + int n_cols = cost.cols; + int n; + if (n_rows == n_cols) { + n = n_rows; + } else if (!extend_cost) { + throw std::invalid_argument( + "Square cost array expected. If cost is intentionally non-square, pass " + "extend_cost=True."); + } + + // Get extend cost + if (extend_cost || cost_limit < LARGE) { + n = n_rows + n_cols; + } + cv::Mat cost_expand(n, n, CV_32F); + float expand_value; + if (cost_limit < LARGE) { + expand_value = cost_limit / 2; + } else { + double max_v; + minMaxLoc(cost, nullptr, &max_v); + expand_value = static_cast(max_v) + 1.; + } + + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + cost_expand.at(i, j) = expand_value; + if (i >= n_rows && j >= n_cols) { + cost_expand.at(i, j) = 0; + } else if (i < n_rows && j < n_cols) { + cost_expand.at(i, j) = cost.at(i, j); + } + } + } + + // Convert Mat to pointer array + float **cost_ptr; + NEW(cost_ptr, float *, n); + for (int i = 0; i < n; ++i) { + NEW(cost_ptr[i], float, n); + } + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + cost_ptr[i][j] = cost_expand.at(i, j); + } + } + + int ret; + int *free_rows; + float *v; + int *x_c; + int *y_c; + + NEW(free_rows, int, n); + NEW(v, float, n); + NEW(x_c, int, n); + NEW(y_c, int, n); + + ret = _ccrrt_dense(n, cost_ptr, free_rows, x_c, y_c, v); + int i = 0; + while (ret > 0 && i < 2) { + ret = _carr_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v); + i++; + } + if (ret > 0) { + ret = _ca_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v); + } + FREE(v); + FREE(free_rows); + for (int i = 0; i < n; ++i) { + FREE(cost_ptr[i]); + } + FREE(cost_ptr); + if (ret != 0) { + if (ret == -1) { + throw "Out of memory."; + } + throw "Unknown error (lapjv_internal)"; + } + // Get output of x, y, opt + for (int i = 0; i < n; ++i) { + if (i < n_rows) { + x[i] = x_c[i]; + if (x[i] >= n_cols) { + x[i] = -1; + } + } + if (i < n_cols) { + y[i] = y_c[i]; + if (y[i] >= n_rows) { + y[i] = -1; + } + } + } + + FREE(x_c); + FREE(y_c); + return ret; +} + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/src/main.cc b/deploy/pptracking/cpp/src/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..9861eecb91d334feecfd0bc88f4e9dc856ca152b --- /dev/null +++ b/deploy/pptracking/cpp/src/main.cc @@ -0,0 +1,172 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#endif + +#include +#include "include/pipeline.h" + +DEFINE_string(video_file, "", "Path of input video."); +DEFINE_string(video_other_file, + "", + "Path of other input video used for MTMCT."); +DEFINE_string(device, + "CPU", + "Choose the device you want to run, it can be: CPU/GPU/XPU, " + "default is CPU."); +DEFINE_double(threshold, 0.5, "Threshold of score."); +DEFINE_string(output_dir, "output", "Directory of output visualization files."); +DEFINE_string(run_mode, + "fluid", + "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)"); +DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); +DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU"); +DEFINE_int32(cpu_threads, 1, "Num of threads with CPU"); +DEFINE_bool(trt_calib_mode, + false, + "If the model is produced by TRT offline quantitative calibration, " + "trt_calib_mode need to set True"); +DEFINE_bool(tiny_obj, false, "Whether tracking tiny object"); +DEFINE_bool(do_entrance_counting, + false, + "Whether counting the numbers of identifiers entering " + "or getting out from the entrance."); +DEFINE_int32(secs_interval, 10, "The seconds interval to count after tracking"); +DEFINE_bool(save_result, false, "Whether saving result after tracking"); +DEFINE_string( + scene, + "", + "scene of tracking system, it can be : pedestrian/vehicle/multiclass"); +DEFINE_bool(is_mtmct, false, "Whether use multi-target multi-camera tracking"); +DEFINE_string(track_model_dir, "", "Path of tracking model"); +DEFINE_string(det_model_dir, "", "Path of detection model"); +DEFINE_string(reid_model_dir, "", "Path of reid model"); + +static std::string DirName(const std::string& filepath) { + auto pos = filepath.rfind(OS_PATH_SEP); + if (pos == std::string::npos) { + return ""; + } + return filepath.substr(0, pos); +} + +static bool PathExists(const std::string& path) { +#ifdef _WIN32 + struct _stat buffer; + return (_stat(path.c_str(), &buffer) == 0); +#else + struct stat buffer; + return (stat(path.c_str(), &buffer) == 0); +#endif // !_WIN32 +} + +static void MkDir(const std::string& path) { + if (PathExists(path)) return; + int ret = 0; +#ifdef _WIN32 + ret = _mkdir(path.c_str()); +#else + ret = mkdir(path.c_str(), 0755); +#endif // !_WIN32 + if (ret != 0) { + std::string path_error(path); + path_error += " mkdir failed!"; + throw std::runtime_error(path_error); + } +} + +static void MkDirs(const std::string& path) { + if (path.empty()) return; + if (PathExists(path)) return; + + MkDirs(DirName(path)); + MkDir(path); +} + +int main(int argc, char** argv) { + // Parsing command-line + google::ParseCommandLineFlags(&argc, &argv, true); + bool has_model_dir = + !(FLAGS_track_model_dir.empty() && FLAGS_det_model_dir.empty() && + FLAGS_reid_model_dir.empty()); + if (FLAGS_video_file.empty() || (FLAGS_scene.empty() && !has_model_dir)) { + LOG(ERROR) << "Usage: \n" + << "1. ./main -video_file=/PATH/TO/INPUT/IMAGE/ " + << "-scene=pedestrian/vehicle/multiclass\n" + << "2. ./main -video_file=/PATH/TO/INPUT/IMAGE/ " + << "-track_model_dir=/PATH/TO/MODEL_DIR" << std::endl; + + return -1; + } + if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" || + FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) { + LOG(ERROR) + << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'."; + return -1; + } + transform(FLAGS_device.begin(), + FLAGS_device.end(), + FLAGS_device.begin(), + ::toupper); + if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" || + FLAGS_device == "XPU")) { + LOG(ERROR) << "device should be 'CPU', 'GPU' or 'XPU'."; + return -1; + } + + if (!PathExists(FLAGS_output_dir)) { + MkDirs(FLAGS_output_dir); + } + + PaddleDetection::Pipeline pipeline(FLAGS_device, + FLAGS_threshold, + FLAGS_output_dir, + FLAGS_run_mode, + FLAGS_gpu_id, + FLAGS_use_mkldnn, + FLAGS_cpu_threads, + FLAGS_trt_calib_mode, + FLAGS_do_entrance_counting, + FLAGS_save_result, + FLAGS_scene, + FLAGS_tiny_obj, + FLAGS_is_mtmct, + FLAGS_secs_interval, + FLAGS_track_model_dir, + FLAGS_det_model_dir, + FLAGS_reid_model_dir); + + pipeline.SetInput(FLAGS_video_file); + if (!FLAGS_video_other_file.empty()) { + pipeline.SetInput(FLAGS_video_other_file); + } + pipeline.Run(); + return 0; +} diff --git a/deploy/pptracking/cpp/src/pipeline.cc b/deploy/pptracking/cpp/src/pipeline.cc new file mode 100644 index 0000000000000000000000000000000000000000..9606f65a658d4478880b7a856d4744e7d9124bbe --- /dev/null +++ b/deploy/pptracking/cpp/src/pipeline.cc @@ -0,0 +1,368 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +// for setprecision +#include +#include +#include +#include + +#include "include/pipeline.h" +#include "include/postprocess.h" +#include "include/predictor.h" + +namespace PaddleDetection { + +void Pipeline::SetInput(const std::string& input_video) { + input_.push_back(input_video); +} + +void Pipeline::ClearInput() { + input_.clear(); + stream_.clear(); +} + +void Pipeline::SelectModel(const std::string& scene, + const bool tiny_obj, + const bool is_mtmct, + const std::string track_model_dir, + const std::string det_model_dir, + const std::string reid_model_dir) { + // model_dir has higher priority + if (!track_model_dir.empty()) { + track_model_dir_ = track_model_dir; + return; + } + if (!det_model_dir.empty() && !reid_model_dir.empty()) { + det_model_dir_ = det_model_dir; + reid_model_dir_ = reid_model_dir; + return; + } + + // Single camera model, based on FairMot + if (scene == "pedestrian") { + if (tiny_obj) { + track_model_dir_ = "../pedestrian_track_tiny"; + } else { + track_model_dir_ = "../pedestrian_track"; + } + } else if (scene != "vehicle") { + if (tiny_obj) { + track_model_dir_ = "../vehicle_track_tiny"; + } else { + track_model_dir_ = "../vehicle_track"; + } + } else if (scene == "multiclass") { + if (tiny_obj) { + track_model_dir_ = "../multiclass_track_tiny"; + } else { + track_model_dir_ = "../multiclass_track"; + } + } + + // Multi-camera model, based on PicoDet & LCNet + if (is_mtmct && scene == "pedestrian") { + det_model_dir_ = "../pedestrian_det"; + reid_model_dir_ = "../pedestrian_reid"; + } else if (is_mtmct && scene == "vehicle") { + det_model_dir_ = "../vehicle_det"; + reid_model_dir_ = "../vehicle_reid"; + } else if (is_mtmct && scene == "multiclass") { + throw "Multi-camera tracking is not supported in multiclass scene now."; + } +} + +void Pipeline::InitPredictor() { + if (track_model_dir_.empty() && det_model_dir_.empty()) { + throw "Predictor must receive track_model or det_model!"; + } + + if (!track_model_dir_.empty()) { + jde_sct_ = std::make_shared(device_, + track_model_dir_, + threshold_, + run_mode_, + gpu_id_, + use_mkldnn_, + cpu_threads_, + trt_calib_mode_); + } + if (!det_model_dir_.empty()) { + sde_sct_ = std::make_shared(device_, + det_model_dir_, + reid_model_dir_, + threshold_, + run_mode_, + gpu_id_, + use_mkldnn_, + cpu_threads_, + trt_calib_mode_); + } +} + +void Pipeline::Run() { + if (track_model_dir_.empty() && det_model_dir_.empty()) { + LOG(ERROR) << "Pipeline must use SelectModel before Run"; + return; + } + if (input_.size() == 0) { + LOG(ERROR) << "Pipeline must use SetInput before Run"; + return; + } + + if (!track_model_dir_.empty()) { + // single camera + if (input_.size() > 1) { + throw "Single camera tracking except single video, but received %d", + input_.size(); + } + PredictMOT(input_[0]); + } else { + // multi cameras + if (input_.size() != 2) { + throw "Multi camera tracking except two videos, but received %d", + input_.size(); + } + PredictMTMCT(input_); + } +} + +void Pipeline::PredictMOT(const std::string& video_path) { + // Open video + cv::VideoCapture capture; + capture.open(video_path.c_str()); + if (!capture.isOpened()) { + printf("can not open video : %s\n", video_path.c_str()); + return; + } + + // Get Video info : resolution, fps + int video_width = static_cast(capture.get(CV_CAP_PROP_FRAME_WIDTH)); + int video_height = static_cast(capture.get(CV_CAP_PROP_FRAME_HEIGHT)); + int video_fps = static_cast(capture.get(CV_CAP_PROP_FPS)); + + LOG(INFO) << "----------------------- Input info -----------------------"; + LOG(INFO) << "video_width: " << video_width; + LOG(INFO) << "video_height: " << video_height; + LOG(INFO) << "input fps: " << video_fps; + + // Create VideoWriter for output + cv::VideoWriter video_out; + std::string video_out_path = output_dir_ + OS_PATH_SEP + "mot_output.mp4"; + int fcc = cv::VideoWriter::fourcc('m', 'p', '4', 'v'); + video_out.open(video_out_path.c_str(), + fcc, // 0x00000021, + video_fps, + cv::Size(video_width, video_height), + true); + if (!video_out.isOpened()) { + printf("create video writer failed!\n"); + return; + } + + PaddleDetection::MOTResult result; + std::vector det_times(3); + std::set id_set; + std::set interval_id_set; + std::vector in_id_list; + std::vector out_id_list; + std::map> prev_center; + Rect entrance = {0, + static_cast(video_height) / 2, + static_cast(video_width), + static_cast(video_height) / 2}; + double times; + double total_time; + // Capture all frames and do inference + cv::Mat frame; + int frame_id = 0; + + std::vector records; + std::vector flow_records; + records.push_back("result format: frame_id, track_id, x1, y1, w, h\n"); + + LOG(INFO) << "------------------- Predict info ------------------------"; + while (capture.read(frame)) { + if (frame.empty()) { + break; + } + std::vector imgs; + imgs.push_back(frame); + jde_sct_->Predict(imgs, threshold_, &result, &det_times); + frame_id += 1; + total_time = std::accumulate(det_times.begin(), det_times.end(), 0.); + times = total_time / frame_id; + + LOG(INFO) << "frame_id: " << frame_id + << " predict time(s): " << total_time / 1000; + + cv::Mat out_img = PaddleDetection::VisualizeTrackResult( + frame, result, 1000. / times, frame_id); + + // TODO(qianhui): the entrance line can be set by users + PaddleDetection::FlowStatistic(result, + frame_id, + secs_interval_, + do_entrance_counting_, + video_fps, + entrance, + &id_set, + &interval_id_set, + &in_id_list, + &out_id_list, + &prev_center, + &flow_records); + + if (save_result_) { + PaddleDetection::SaveMOTResult(result, frame_id, &records); + } + + // Draw the entrance line + if (do_entrance_counting_) { + float line_thickness = std::max(1, static_cast(video_width / 500.)); + cv::Point pt1 = cv::Point(entrance.left, entrance.top); + cv::Point pt2 = cv::Point(entrance.right, entrance.bottom); + cv::line(out_img, pt1, pt2, cv::Scalar(0, 255, 255), line_thickness); + } + video_out.write(out_img); + } + capture.release(); + video_out.release(); + PrintBenchmarkLog(det_times, frame_id); + LOG(INFO) << "-------------------- Final Output info -------------------"; + LOG(INFO) << "Total frame: " << frame_id; + LOG(INFO) << "Visualized output saved as " << video_out_path.c_str(); + if (save_result_) { + FILE* fp; + + std::string result_output_path = + output_dir_ + OS_PATH_SEP + "mot_output.txt"; + if ((fp = fopen(result_output_path.c_str(), "w+")) == NULL) { + printf("Open %s error.\n", result_output_path.c_str()); + return; + } + for (int l; l < records.size(); ++l) { + fprintf(fp, records[l].c_str()); + } + + fclose(fp); + LOG(INFO) << "txt result output saved as " << result_output_path.c_str(); + + result_output_path = output_dir_ + OS_PATH_SEP + "flow_statistic.txt"; + if ((fp = fopen(result_output_path.c_str(), "w+")) == NULL) { + printf("Open %s error.\n", result_output_path); + return; + } + for (int l; l < flow_records.size(); ++l) { + fprintf(fp, flow_records[l].c_str()); + } + fclose(fp); + LOG(INFO) << "txt flow statistic saved as " << result_output_path.c_str(); + } +} + +void Pipeline::PredictMTMCT(const std::vector video_path) { + throw "Not Implement!"; +} + +void Pipeline::RunMOTStream(const cv::Mat img, + const int frame_id, + const int video_fps, + const Rect entrance, + cv::Mat out_img, + std::vector* records, + std::set* id_set, + std::set* interval_id_set, + std::vector* in_id_list, + std::vector* out_id_list, + std::map>* prev_center, + std::vector* flow_records) { + PaddleDetection::MOTResult result; + std::vector det_times(3); + double times; + double total_time; + + LOG(INFO) << "------------------- Predict info ------------------------"; + std::vector imgs; + imgs.push_back(img); + jde_sct_->Predict(imgs, threshold_, &result, &det_times); + total_time = std::accumulate(det_times.begin(), det_times.end(), 0.); + times = total_time / frame_id; + + LOG(INFO) << "frame_id: " << frame_id + << " predict time(s): " << total_time / 1000; + + out_img = PaddleDetection::VisualizeTrackResult( + img, result, 1000. / times, frame_id); + + // Count total number + // Count in & out number + PaddleDetection::FlowStatistic(result, + frame_id, + secs_interval_, + do_entrance_counting_, + video_fps, + entrance, + id_set, + interval_id_set, + in_id_list, + out_id_list, + prev_center, + flow_records); + + PrintBenchmarkLog(det_times, frame_id); + if (save_result_) { + PaddleDetection::SaveMOTResult(result, frame_id, records); + } +} + +void Pipeline::RunMTMCTStream(const std::vector imgs, + std::vector* records) { + throw "Not Implement!"; +} + +void Pipeline::PrintBenchmarkLog(const std::vector det_time, + const int img_num) { + LOG(INFO) << "----------------------- Config info -----------------------"; + LOG(INFO) << "runtime_device: " << device_; + LOG(INFO) << "ir_optim: " + << "True"; + LOG(INFO) << "enable_memory_optim: " + << "True"; + int has_trt = run_mode_.find("trt"); + if (has_trt >= 0) { + LOG(INFO) << "enable_tensorrt: " + << "True"; + std::string precision = run_mode_.substr(4, 8); + LOG(INFO) << "precision: " << precision; + } else { + LOG(INFO) << "enable_tensorrt: " + << "False"; + LOG(INFO) << "precision: " + << "fp32"; + } + LOG(INFO) << "enable_mkldnn: " << (use_mkldnn_ ? "True" : "False"); + LOG(INFO) << "cpu_math_library_num_threads: " << cpu_threads_; + LOG(INFO) << "----------------------- Perf info ------------------------"; + LOG(INFO) << "Total number of predicted data: " << img_num + << " and total time spent(s): " + << std::accumulate(det_time.begin(), det_time.end(), 0.) / 1000; + int num = std::max(1, img_num); + LOG(INFO) << "preproce_time(ms): " << det_time[0] / num + << ", inference_time(ms): " << det_time[1] / num + << ", postprocess_time(ms): " << det_time[2] / num; +} + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/src/postprocess.cc b/deploy/pptracking/cpp/src/postprocess.cc new file mode 100644 index 0000000000000000000000000000000000000000..39a752aeb179bdf44da8c1b05376dab4618ad73a --- /dev/null +++ b/deploy/pptracking/cpp/src/postprocess.cc @@ -0,0 +1,207 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +// for setprecision +#include +#include +#include +#include "include/postprocess.h" + +namespace PaddleDetection { + +cv::Scalar GetColor(int idx) { + idx = idx * 3; + cv::Scalar color = + cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255); + return color; +} + +cv::Mat VisualizeTrackResult(const cv::Mat& img, + const MOTResult& results, + const float fps, + const int frame_id) { + cv::Mat vis_img = img.clone(); + int im_h = img.rows; + int im_w = img.cols; + float text_scale = std::max(1, static_cast(im_w / 1600.)); + float text_thickness = 2.; + float line_thickness = std::max(1, static_cast(im_w / 500.)); + + std::ostringstream oss; + oss << std::setiosflags(std::ios::fixed) << std::setprecision(4); + oss << "frame: " << frame_id << " "; + oss << "fps: " << fps << " "; + oss << "num: " << results.size(); + std::string text = oss.str(); + + cv::Point origin; + origin.x = 0; + origin.y = static_cast(15 * text_scale); + cv::putText(vis_img, + text, + origin, + cv::FONT_HERSHEY_PLAIN, + text_scale, + (0, 0, 255), + 2); + + for (int i = 0; i < results.size(); ++i) { + const int obj_id = results[i].ids; + const float score = results[i].score; + + cv::Scalar color = GetColor(obj_id); + + cv::Point pt1 = cv::Point(results[i].rects.left, results[i].rects.top); + cv::Point pt2 = cv::Point(results[i].rects.right, results[i].rects.bottom); + cv::Point id_pt = + cv::Point(results[i].rects.left, results[i].rects.top + 10); + cv::Point score_pt = + cv::Point(results[i].rects.left, results[i].rects.top - 10); + cv::rectangle(vis_img, pt1, pt2, color, line_thickness); + + std::ostringstream idoss; + idoss << std::setiosflags(std::ios::fixed) << std::setprecision(4); + idoss << obj_id; + std::string id_text = idoss.str(); + + cv::putText(vis_img, + id_text, + id_pt, + cv::FONT_HERSHEY_PLAIN, + text_scale, + cv::Scalar(0, 255, 255), + text_thickness); + + std::ostringstream soss; + soss << std::setiosflags(std::ios::fixed) << std::setprecision(2); + soss << score; + std::string score_text = soss.str(); + + cv::putText(vis_img, + score_text, + score_pt, + cv::FONT_HERSHEY_PLAIN, + text_scale, + cv::Scalar(0, 255, 255), + text_thickness); + } + return vis_img; +} + +void FlowStatistic(const MOTResult& results, + const int frame_id, + const int secs_interval, + const bool do_entrance_counting, + const int video_fps, + const Rect entrance, + std::set* id_set, + std::set* interval_id_set, + std::vector* in_id_list, + std::vector* out_id_list, + std::map>* prev_center, + std::vector* records) { + if (frame_id == 0) interval_id_set->clear(); + + if (do_entrance_counting) { + // Count in and out number: + // Use horizontal center line as the entrance just for simplification. + // If a person located in the above the horizontal center line + // at the previous frame and is in the below the line at the current frame, + // the in number is increased by one. + // If a person was in the below the horizontal center line + // at the previous frame and locates in the below the line at the current + // frame, + // the out number is increased by one. + // TODO(qianhui): if the entrance is not the horizontal center line, + // the counting method should be optimized. + + float entrance_y = entrance.top; + for (const auto& result : results) { + float center_x = (result.rects.left + result.rects.right) / 2; + float center_y = (result.rects.top + result.rects.bottom) / 2; + int ids = result.ids; + std::map>::iterator iter; + iter = prev_center->find(ids); + if (iter != prev_center->end()) { + if (iter->second[1] <= entrance_y && center_y > entrance_y) { + in_id_list->push_back(ids); + } + if (iter->second[1] >= entrance_y && center_y < entrance_y) { + out_id_list->push_back(ids); + } + (*prev_center)[ids][0] = center_x; + (*prev_center)[ids][1] = center_y; + } else { + prev_center->insert( + std::pair>(ids, {center_x, center_y})); + } + } + } + + // Count totol number, number at a manual-setting interval + for (const auto& result : results) { + id_set->insert(result.ids); + interval_id_set->insert(result.ids); + } + + std::ostringstream os; + os << "Frame id: " << frame_id << ", Total count: " << id_set->size(); + if (do_entrance_counting) { + os << ", In count: " << in_id_list->size() + << ", Out count: " << out_id_list->size(); + } + + // Reset counting at the interval beginning + int curr_interval_count = -1; + if (frame_id % video_fps == 0 && frame_id / video_fps % secs_interval == 0) { + curr_interval_count = interval_id_set->size(); + os << ", Count during " << secs_interval + << " secs: " << curr_interval_count; + interval_id_set->clear(); + } + os << "\n"; + std::string record = os.str(); + records->push_back(record); + LOG(INFO) << record; +} + +void SaveMOTResult(const MOTResult& results, + const int frame_id, + std::vector* records) { + // result format: frame_id, track_id, x1, y1, w, h + std::string record; + for (int i = 0; i < results.size(); ++i) { + MOTTrack mot_track = results[i]; + int ids = mot_track.ids; + float score = mot_track.score; + Rect rects = mot_track.rects; + float x1 = rects.left; + float y1 = rects.top; + float x2 = rects.right; + float y2 = rects.bottom; + float w = x2 - x1; + float h = y2 - y1; + if (w == 0 || h == 0) { + continue; + } + std::ostringstream os; + os << frame_id << " " << ids << "" << x1 << " " << y1 << " " << w << " " + << h << "\n"; + record = os.str(); + records->push_back(record); + } +} + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/src/predictor.cc b/deploy/pptracking/cpp/src/predictor.cc new file mode 100644 index 0000000000000000000000000000000000000000..ea479f3ab049143147a938bb575f8995dee55c95 --- /dev/null +++ b/deploy/pptracking/cpp/src/predictor.cc @@ -0,0 +1,35 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +// for setprecision +#include +#include +#include "include/predictor.h" + +using namespace paddle_infer; // NOLINT + +namespace PaddleDetection { + +void Predictor::Predict(const std::vector imgs, + const double threshold, + MOTResult* result, + std::vector* times) { + if (use_jde_) { + jde_sct_->Predict(imgs, threshold, result, times); + } else { + sde_sct_->Predict(imgs, threshold, result, times); + } +} + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/src/preprocess_op.cc b/deploy/pptracking/cpp/src/preprocess_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..3158ad67b60b473e0f38d609b55accf35a37a6a8 --- /dev/null +++ b/deploy/pptracking/cpp/src/preprocess_op.cc @@ -0,0 +1,187 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "include/preprocess_op.h" + +namespace PaddleDetection { + +void InitInfo::Run(cv::Mat* im, ImageBlob* data) { + data->im_shape_ = {static_cast(im->rows), + static_cast(im->cols)}; + data->scale_factor_ = {1., 1.}; + data->in_net_shape_ = {static_cast(im->rows), + static_cast(im->cols)}; +} + +void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) { + double e = 1.0; + if (is_scale_) { + e /= 255.0; + } + (*im).convertTo(*im, CV_32FC3, e); + for (int h = 0; h < im->rows; h++) { + for (int w = 0; w < im->cols; w++) { + im->at(h, w)[0] = + (im->at(h, w)[0] - mean_[0]) / scale_[0]; + im->at(h, w)[1] = + (im->at(h, w)[1] - mean_[1]) / scale_[1]; + im->at(h, w)[2] = + (im->at(h, w)[2] - mean_[2]) / scale_[2]; + } + } +} + +void Permute::Run(cv::Mat* im, ImageBlob* data) { + (*im).convertTo(*im, CV_32FC3); + int rh = im->rows; + int rw = im->cols; + int rc = im->channels(); + (data->im_data_).resize(rc * rh * rw); + float* base = (data->im_data_).data(); + for (int i = 0; i < rc; ++i) { + cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i); + } +} + +void Resize::Run(cv::Mat* im, ImageBlob* data) { + auto resize_scale = GenerateScale(*im); + data->im_shape_ = {static_cast(im->cols * resize_scale.first), + static_cast(im->rows * resize_scale.second)}; + data->in_net_shape_ = {static_cast(im->cols * resize_scale.first), + static_cast(im->rows * resize_scale.second)}; + cv::resize( + *im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_); + data->im_shape_ = { + static_cast(im->rows), static_cast(im->cols), + }; + data->scale_factor_ = { + resize_scale.second, resize_scale.first, + }; +} + +std::pair Resize::GenerateScale(const cv::Mat& im) { + std::pair resize_scale; + int origin_w = im.cols; + int origin_h = im.rows; + + if (keep_ratio_) { + int im_size_max = std::max(origin_w, origin_h); + int im_size_min = std::min(origin_w, origin_h); + int target_size_max = + *std::max_element(target_size_.begin(), target_size_.end()); + int target_size_min = + *std::min_element(target_size_.begin(), target_size_.end()); + float scale_min = + static_cast(target_size_min) / static_cast(im_size_min); + float scale_max = + static_cast(target_size_max) / static_cast(im_size_max); + float scale_ratio = std::min(scale_min, scale_max); + resize_scale = {scale_ratio, scale_ratio}; + } else { + resize_scale.first = + static_cast(target_size_[1]) / static_cast(origin_w); + resize_scale.second = + static_cast(target_size_[0]) / static_cast(origin_h); + } + return resize_scale; +} + +void LetterBoxResize::Run(cv::Mat* im, ImageBlob* data) { + float resize_scale = GenerateScale(*im); + int new_shape_w = std::round(im->cols * resize_scale); + int new_shape_h = std::round(im->rows * resize_scale); + data->im_shape_ = {static_cast(new_shape_h), + static_cast(new_shape_w)}; + float padw = (target_size_[1] - new_shape_w) / 2.; + float padh = (target_size_[0] - new_shape_h) / 2.; + + int top = std::round(padh - 0.1); + int bottom = std::round(padh + 0.1); + int left = std::round(padw - 0.1); + int right = std::round(padw + 0.1); + + cv::resize( + *im, *im, cv::Size(new_shape_w, new_shape_h), 0, 0, cv::INTER_AREA); + + data->in_net_shape_ = { + static_cast(im->rows), static_cast(im->cols), + }; + cv::copyMakeBorder(*im, + *im, + top, + bottom, + left, + right, + cv::BORDER_CONSTANT, + cv::Scalar(127.5)); + + data->in_net_shape_ = { + static_cast(im->rows), static_cast(im->cols), + }; + + data->scale_factor_ = { + resize_scale, resize_scale, + }; +} + +float LetterBoxResize::GenerateScale(const cv::Mat& im) { + int origin_w = im.cols; + int origin_h = im.rows; + + int target_h = target_size_[0]; + int target_w = target_size_[1]; + + float ratio_h = static_cast(target_h) / static_cast(origin_h); + float ratio_w = static_cast(target_w) / static_cast(origin_w); + float resize_scale = std::min(ratio_h, ratio_w); + return resize_scale; +} + +void PadStride::Run(cv::Mat* im, ImageBlob* data) { + if (stride_ <= 0) { + return; + } + int rc = im->channels(); + int rh = im->rows; + int rw = im->cols; + int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_; + int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_; + cv::copyMakeBorder( + *im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT, cv::Scalar(0)); + data->in_net_shape_ = { + static_cast(im->rows), static_cast(im->cols), + }; +} + +// Preprocessor op running order +const std::vector Preprocessor::RUN_ORDER = {"InitInfo", + "Resize", + "LetterBoxResize", + "NormalizeImage", + "PadStride", + "Permute"}; + +void Preprocessor::Run(cv::Mat* im, ImageBlob* data) { + for (const auto& name : RUN_ORDER) { + if (ops_.find(name) != ops_.end()) { + ops_[name]->Run(im, data); + } + } +} + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/src/sde_predictor.cc b/deploy/pptracking/cpp/src/sde_predictor.cc new file mode 100644 index 0000000000000000000000000000000000000000..e469e8ddc5a154e2ba9b97560b6434427f5e7df1 --- /dev/null +++ b/deploy/pptracking/cpp/src/sde_predictor.cc @@ -0,0 +1,46 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +// for setprecision +#include +#include +#include "include/sde_predictor.h" + +using namespace paddle_infer; // NOLINT + +namespace PaddleDetection { + +// Load Model and create model predictor +void SDEPredictor::LoadModel(const std::string& det_model_dir, + const std::string& reid_model_dir, + const std::string& run_mode) { + throw "Not Implement"; +} + +void SDEPredictor::Preprocess(const cv::Mat& ori_im) { throw "Not Implement"; } + +void SDEPredictor::Postprocess(const cv::Mat dets, + const cv::Mat emb, + MOTResult* result) { + throw "Not Implement"; +} + +void SDEPredictor::Predict(const std::vector imgs, + const double threshold, + MOTResult* result, + std::vector* times) { + throw "Not Implement"; +} + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/src/tracker.cc b/deploy/pptracking/cpp/src/tracker.cc new file mode 100644 index 0000000000000000000000000000000000000000..6351a2dd46354e676006155852ca166e3e3532cc --- /dev/null +++ b/deploy/pptracking/cpp/src/tracker.cc @@ -0,0 +1,304 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.cpp +// Ths copyright of CnybTseng/JDE is as follows: +// MIT License + +#include +#include +#include +#include + +#include "include/lapjv.h" +#include "include/tracker.h" + +#define mat2vec4f(m) \ + cv::Vec4f(*m.ptr(0, 0), \ + *m.ptr(0, 1), \ + *m.ptr(0, 2), \ + *m.ptr(0, 3)) + +namespace PaddleDetection { + +static std::map chi2inv95 = {{1, 3.841459f}, + {2, 5.991465f}, + {3, 7.814728f}, + {4, 9.487729f}, + {5, 11.070498f}, + {6, 12.591587f}, + {7, 14.067140f}, + {8, 15.507313f}, + {9, 16.918978f}}; + +JDETracker *JDETracker::me = new JDETracker; + +JDETracker *JDETracker::instance(void) { return me; } + +JDETracker::JDETracker(void) + : timestamp(0), max_lost_time(30), lambda(0.98f), det_thresh(0.3f) {} + +bool JDETracker::update(const cv::Mat &dets, + const cv::Mat &emb, + std::vector *tracks) { + ++timestamp; + TrajectoryPool candidates(dets.rows); + for (int i = 0; i < dets.rows; ++i) { + float score = *dets.ptr(i, 4); + const cv::Mat <rb_ = dets(cv::Rect(0, i, 4, 1)); + cv::Vec4f ltrb = mat2vec4f(ltrb_); + const cv::Mat &embedding = emb(cv::Rect(0, i, emb.cols, 1)); + candidates[i] = Trajectory(ltrb, score, embedding); + } + + TrajectoryPtrPool tracked_trajectories; + TrajectoryPtrPool unconfirmed_trajectories; + for (size_t i = 0; i < this->tracked_trajectories.size(); ++i) { + if (this->tracked_trajectories[i].is_activated) + tracked_trajectories.push_back(&this->tracked_trajectories[i]); + else + unconfirmed_trajectories.push_back(&this->tracked_trajectories[i]); + } + + TrajectoryPtrPool trajectory_pool = + tracked_trajectories + &(this->lost_trajectories); + + for (size_t i = 0; i < trajectory_pool.size(); ++i) + trajectory_pool[i]->predict(); + + Match matches; + std::vector mismatch_row; + std::vector mismatch_col; + + cv::Mat cost = motion_distance(trajectory_pool, candidates); + linear_assignment(cost, 0.7f, &matches, &mismatch_row, &mismatch_col); + + MatchIterator miter; + TrajectoryPtrPool activated_trajectories; + TrajectoryPtrPool retrieved_trajectories; + + for (miter = matches.begin(); miter != matches.end(); miter++) { + Trajectory *pt = trajectory_pool[miter->first]; + Trajectory &ct = candidates[miter->second]; + if (pt->state == Tracked) { + pt->update(&ct, timestamp); + activated_trajectories.push_back(pt); + } else { + pt->reactivate(&ct, timestamp); + retrieved_trajectories.push_back(pt); + } + } + + TrajectoryPtrPool next_candidates(mismatch_col.size()); + for (size_t i = 0; i < mismatch_col.size(); ++i) + next_candidates[i] = &candidates[mismatch_col[i]]; + + TrajectoryPtrPool next_trajectory_pool; + for (size_t i = 0; i < mismatch_row.size(); ++i) { + int j = mismatch_row[i]; + if (trajectory_pool[j]->state == Tracked) + next_trajectory_pool.push_back(trajectory_pool[j]); + } + + cost = iou_distance(next_trajectory_pool, next_candidates); + linear_assignment(cost, 0.5f, &matches, &mismatch_row, &mismatch_col); + + for (miter = matches.begin(); miter != matches.end(); miter++) { + Trajectory *pt = next_trajectory_pool[miter->first]; + Trajectory *ct = next_candidates[miter->second]; + if (pt->state == Tracked) { + pt->update(ct, timestamp); + activated_trajectories.push_back(pt); + } else { + pt->reactivate(ct, timestamp); + retrieved_trajectories.push_back(pt); + } + } + + TrajectoryPtrPool lost_trajectories; + for (size_t i = 0; i < mismatch_row.size(); ++i) { + Trajectory *pt = next_trajectory_pool[mismatch_row[i]]; + if (pt->state != Lost) { + pt->mark_lost(); + lost_trajectories.push_back(pt); + } + } + + TrajectoryPtrPool nnext_candidates(mismatch_col.size()); + for (size_t i = 0; i < mismatch_col.size(); ++i) + nnext_candidates[i] = next_candidates[mismatch_col[i]]; + cost = iou_distance(unconfirmed_trajectories, nnext_candidates); + linear_assignment(cost, 0.7f, &matches, &mismatch_row, &mismatch_col); + + for (miter = matches.begin(); miter != matches.end(); miter++) { + unconfirmed_trajectories[miter->first]->update( + nnext_candidates[miter->second], timestamp); + activated_trajectories.push_back(unconfirmed_trajectories[miter->first]); + } + + TrajectoryPtrPool removed_trajectories; + + for (size_t i = 0; i < mismatch_row.size(); ++i) { + unconfirmed_trajectories[mismatch_row[i]]->mark_removed(); + removed_trajectories.push_back(unconfirmed_trajectories[mismatch_row[i]]); + } + + for (size_t i = 0; i < mismatch_col.size(); ++i) { + if (nnext_candidates[mismatch_col[i]]->score < det_thresh) continue; + nnext_candidates[mismatch_col[i]]->activate(timestamp); + activated_trajectories.push_back(nnext_candidates[mismatch_col[i]]); + } + + for (size_t i = 0; i < this->lost_trajectories.size(); ++i) { + Trajectory < = this->lost_trajectories[i]; + if (timestamp - lt.timestamp > max_lost_time) { + lt.mark_removed(); + removed_trajectories.push_back(<); + } + } + + TrajectoryPoolIterator piter; + for (piter = this->tracked_trajectories.begin(); + piter != this->tracked_trajectories.end();) { + if (piter->state != Tracked) + piter = this->tracked_trajectories.erase(piter); + else + ++piter; + } + + this->tracked_trajectories += activated_trajectories; + this->tracked_trajectories += retrieved_trajectories; + + this->lost_trajectories -= this->tracked_trajectories; + this->lost_trajectories += lost_trajectories; + this->lost_trajectories -= this->removed_trajectories; + this->removed_trajectories += removed_trajectories; + remove_duplicate_trajectory(&this->tracked_trajectories, + &this->lost_trajectories); + + tracks->clear(); + for (size_t i = 0; i < this->tracked_trajectories.size(); ++i) { + if (this->tracked_trajectories[i].is_activated) { + Track track = {.id = this->tracked_trajectories[i].id, + .score = this->tracked_trajectories[i].score, + .ltrb = this->tracked_trajectories[i].ltrb}; + tracks->push_back(track); + } + } + return 0; +} + +cv::Mat JDETracker::motion_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b) { + if (0 == a.size() || 0 == b.size()) + return cv::Mat(a.size(), b.size(), CV_32F); + + cv::Mat edists = embedding_distance(a, b); + cv::Mat mdists = mahalanobis_distance(a, b); + cv::Mat fdists = lambda * edists + (1 - lambda) * mdists; + + const float gate_thresh = chi2inv95[4]; + for (int i = 0; i < fdists.rows; ++i) { + for (int j = 0; j < fdists.cols; ++j) { + if (*mdists.ptr(i, j) > gate_thresh) + *fdists.ptr(i, j) = FLT_MAX; + } + } + + return fdists; +} + +void JDETracker::linear_assignment(const cv::Mat &cost, + float cost_limit, + Match *matches, + std::vector *mismatch_row, + std::vector *mismatch_col) { + matches->clear(); + mismatch_row->clear(); + mismatch_col->clear(); + if (cost.empty()) { + for (int i = 0; i < cost.rows; ++i) mismatch_row->push_back(i); + for (int i = 0; i < cost.cols; ++i) mismatch_col->push_back(i); + return; + } + + float opt = 0; + cv::Mat x(cost.rows, 1, CV_32S); + cv::Mat y(cost.cols, 1, CV_32S); + + lapjv_internal(cost, + true, + cost_limit, + reinterpret_cast(x.data), + reinterpret_cast(y.data)); + + for (int i = 0; i < x.rows; ++i) { + int j = *x.ptr(i); + if (j >= 0) + matches->insert({i, j}); + else + mismatch_row->push_back(i); + } + + for (int i = 0; i < y.rows; ++i) { + int j = *y.ptr(i); + if (j < 0) mismatch_col->push_back(i); + } + + return; +} + +void JDETracker::remove_duplicate_trajectory(TrajectoryPool *a, + TrajectoryPool *b, + float iou_thresh) { + if (a->size() == 0 || b->size() == 0) return; + + cv::Mat dist = iou_distance(*a, *b); + cv::Mat mask = dist < iou_thresh; + std::vector idx; + cv::findNonZero(mask, idx); + + std::vector da; + std::vector db; + for (size_t i = 0; i < idx.size(); ++i) { + int ta = (*a)[idx[i].y].timestamp - (*a)[idx[i].y].starttime; + int tb = (*b)[idx[i].x].timestamp - (*b)[idx[i].x].starttime; + if (ta > tb) + db.push_back(idx[i].x); + else + da.push_back(idx[i].y); + } + + int id = 0; + TrajectoryPoolIterator piter; + for (piter = a->begin(); piter != a->end();) { + std::vector::iterator iter = find(da.begin(), da.end(), id++); + if (iter != da.end()) + piter = a->erase(piter); + else + ++piter; + } + + id = 0; + for (piter = b->begin(); piter != b->end();) { + std::vector::iterator iter = find(db.begin(), db.end(), id++); + if (iter != db.end()) + piter = b->erase(piter); + else + ++piter; + } +} + +} // namespace PaddleDetection diff --git a/deploy/pptracking/cpp/src/trajectory.cc b/deploy/pptracking/cpp/src/trajectory.cc new file mode 100644 index 0000000000000000000000000000000000000000..0ff2e1a5fc7088eec94f052d933d22589d2a81c0 --- /dev/null +++ b/deploy/pptracking/cpp/src/trajectory.cc @@ -0,0 +1,517 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The code is based on: +// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.cpp +// Ths copyright of CnybTseng/JDE is as follows: +// MIT License + +#include "include/trajectory.h" +#include + +namespace PaddleDetection { + +void TKalmanFilter::init(const cv::Mat &measurement) { + measurement.copyTo(statePost(cv::Rect(0, 0, 1, 4))); + statePost(cv::Rect(0, 4, 1, 4)).setTo(0); + statePost.copyTo(statePre); + + float varpos = 2 * std_weight_position * (*measurement.ptr(3)); + varpos *= varpos; + float varvel = 10 * std_weight_velocity * (*measurement.ptr(3)); + varvel *= varvel; + + errorCovPost.setTo(0); + *errorCovPost.ptr(0, 0) = varpos; + *errorCovPost.ptr(1, 1) = varpos; + *errorCovPost.ptr(2, 2) = 1e-4f; + *errorCovPost.ptr(3, 3) = varpos; + *errorCovPost.ptr(4, 4) = varvel; + *errorCovPost.ptr(5, 5) = varvel; + *errorCovPost.ptr(6, 6) = 1e-10f; + *errorCovPost.ptr(7, 7) = varvel; + errorCovPost.copyTo(errorCovPre); +} + +const cv::Mat &TKalmanFilter::predict() { + float varpos = std_weight_position * (*statePre.ptr(3)); + varpos *= varpos; + float varvel = std_weight_velocity * (*statePre.ptr(3)); + varvel *= varvel; + + processNoiseCov.setTo(0); + *processNoiseCov.ptr(0, 0) = varpos; + *processNoiseCov.ptr(1, 1) = varpos; + *processNoiseCov.ptr(2, 2) = 1e-4f; + *processNoiseCov.ptr(3, 3) = varpos; + *processNoiseCov.ptr(4, 4) = varvel; + *processNoiseCov.ptr(5, 5) = varvel; + *processNoiseCov.ptr(6, 6) = 1e-10f; + *processNoiseCov.ptr(7, 7) = varvel; + + return cv::KalmanFilter::predict(); +} + +const cv::Mat &TKalmanFilter::correct(const cv::Mat &measurement) { + float varpos = std_weight_position * (*measurement.ptr(3)); + varpos *= varpos; + + measurementNoiseCov.setTo(0); + *measurementNoiseCov.ptr(0, 0) = varpos; + *measurementNoiseCov.ptr(1, 1) = varpos; + *measurementNoiseCov.ptr(2, 2) = 1e-2f; + *measurementNoiseCov.ptr(3, 3) = varpos; + + return cv::KalmanFilter::correct(measurement); +} + +void TKalmanFilter::project(cv::Mat *mean, cv::Mat *covariance) const { + float varpos = std_weight_position * (*statePost.ptr(3)); + varpos *= varpos; + + cv::Mat measurementNoiseCov_ = cv::Mat::eye(4, 4, CV_32F); + *measurementNoiseCov_.ptr(0, 0) = varpos; + *measurementNoiseCov_.ptr(1, 1) = varpos; + *measurementNoiseCov_.ptr(2, 2) = 1e-2f; + *measurementNoiseCov_.ptr(3, 3) = varpos; + + *mean = measurementMatrix * statePost; + cv::Mat temp = measurementMatrix * errorCovPost; + gemm(temp, + measurementMatrix, + 1, + measurementNoiseCov_, + 1, + *covariance, + cv::GEMM_2_T); +} + +int Trajectory::count = 0; + +const cv::Mat &Trajectory::predict(void) { + if (state != Tracked) *cv::KalmanFilter::statePost.ptr(7) = 0; + return TKalmanFilter::predict(); +} + +void Trajectory::update(Trajectory *traj, + int timestamp_, + bool update_embedding_) { + timestamp = timestamp_; + ++length; + ltrb = traj->ltrb; + xyah = traj->xyah; + TKalmanFilter::correct(cv::Mat(traj->xyah)); + state = Tracked; + is_activated = true; + score = traj->score; + if (update_embedding_) update_embedding(traj->current_embedding); +} + +void Trajectory::activate(int timestamp_) { + id = next_id(); + TKalmanFilter::init(cv::Mat(xyah)); + length = 0; + state = Tracked; + if (timestamp_ == 1) { + is_activated = true; + } + timestamp = timestamp_; + starttime = timestamp_; +} + +void Trajectory::reactivate(Trajectory *traj, int timestamp_, bool newid) { + TKalmanFilter::correct(cv::Mat(traj->xyah)); + update_embedding(traj->current_embedding); + length = 0; + state = Tracked; + is_activated = true; + timestamp = timestamp_; + if (newid) id = next_id(); +} + +void Trajectory::update_embedding(const cv::Mat &embedding) { + current_embedding = embedding / cv::norm(embedding); + if (smooth_embedding.empty()) { + smooth_embedding = current_embedding; + } else { + smooth_embedding = eta * smooth_embedding + (1 - eta) * current_embedding; + } + smooth_embedding = smooth_embedding / cv::norm(smooth_embedding); +} + +TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPool &b) { + TrajectoryPool sum; + sum.insert(sum.end(), a.begin(), a.end()); + + std::vector ids(a.size()); + for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i].id; + + for (size_t i = 0; i < b.size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), b[i].id); + if (iter == ids.end()) { + sum.push_back(b[i]); + ids.push_back(b[i].id); + } + } + + return sum; +} + +TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPtrPool &b) { + TrajectoryPool sum; + sum.insert(sum.end(), a.begin(), a.end()); + + std::vector ids(a.size()); + for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i].id; + + for (size_t i = 0; i < b.size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), b[i]->id); + if (iter == ids.end()) { + sum.push_back(*b[i]); + ids.push_back(b[i]->id); + } + } + + return sum; +} + +TrajectoryPool &operator+=(TrajectoryPool &a, // NOLINT + const TrajectoryPtrPool &b) { + std::vector ids(a.size()); + for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i].id; + + for (size_t i = 0; i < b.size(); ++i) { + if (b[i]->smooth_embedding.empty()) continue; + std::vector::iterator iter = find(ids.begin(), ids.end(), b[i]->id); + if (iter == ids.end()) { + a.push_back(*b[i]); + ids.push_back(b[i]->id); + } + } + + return a; +} + +TrajectoryPool operator-(const TrajectoryPool &a, const TrajectoryPool &b) { + TrajectoryPool dif; + std::vector ids(b.size()); + for (size_t i = 0; i < b.size(); ++i) ids[i] = b[i].id; + + for (size_t i = 0; i < a.size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), a[i].id); + if (iter == ids.end()) dif.push_back(a[i]); + } + + return dif; +} + +TrajectoryPool &operator-=(TrajectoryPool &a, // NOLINT + const TrajectoryPool &b) { + std::vector ids(b.size()); + for (size_t i = 0; i < b.size(); ++i) ids[i] = b[i].id; + + TrajectoryPoolIterator piter; + for (piter = a.begin(); piter != a.end();) { + std::vector::iterator iter = find(ids.begin(), ids.end(), piter->id); + if (iter == ids.end()) + ++piter; + else + piter = a.erase(piter); + } + + return a; +} + +TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b) { + TrajectoryPtrPool sum; + sum.insert(sum.end(), a.begin(), a.end()); + + std::vector ids(a.size()); + for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i]->id; + + for (size_t i = 0; i < b.size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), b[i]->id); + if (iter == ids.end()) { + sum.push_back(b[i]); + ids.push_back(b[i]->id); + } + } + + return sum; +} + +TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, TrajectoryPool *b) { + TrajectoryPtrPool sum; + sum.insert(sum.end(), a.begin(), a.end()); + + std::vector ids(a.size()); + for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i]->id; + + for (size_t i = 0; i < b->size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), (*b)[i].id); + if (iter == ids.end()) { + sum.push_back(&(*b)[i]); + ids.push_back((*b)[i].id); + } + } + + return sum; +} + +TrajectoryPtrPool operator-(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b) { + TrajectoryPtrPool dif; + std::vector ids(b.size()); + for (size_t i = 0; i < b.size(); ++i) ids[i] = b[i]->id; + + for (size_t i = 0; i < a.size(); ++i) { + std::vector::iterator iter = find(ids.begin(), ids.end(), a[i]->id); + if (iter == ids.end()) dif.push_back(a[i]); + } + + return dif; +} + +cv::Mat embedding_distance(const TrajectoryPool &a, const TrajectoryPool &b) { + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + cv::Mat u = a[i].smooth_embedding; + cv::Mat v = b[j].smooth_embedding; + double uv = u.dot(v); + double uu = u.dot(u); + double vv = v.dot(v); + double dist = std::abs(1. - uv / std::sqrt(uu * vv)); + // double dist = cv::norm(a[i].smooth_embedding, b[j].smooth_embedding, + // cv::NORM_L2); + distsi[j] = static_cast(std::max(std::min(dist, 2.), 0.)); + } + } + return dists; +} + +cv::Mat embedding_distance(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b) { + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + // double dist = cv::norm(a[i]->smooth_embedding, b[j]->smooth_embedding, + // cv::NORM_L2); + // distsi[j] = static_cast(dist); + cv::Mat u = a[i]->smooth_embedding; + cv::Mat v = b[j]->smooth_embedding; + double uv = u.dot(v); + double uu = u.dot(u); + double vv = v.dot(v); + double dist = std::abs(1. - uv / std::sqrt(uu * vv)); + distsi[j] = static_cast(std::max(std::min(dist, 2.), 0.)); + } + } + + return dists; +} + +cv::Mat embedding_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b) { + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + // double dist = cv::norm(a[i]->smooth_embedding, b[j].smooth_embedding, + // cv::NORM_L2); + // distsi[j] = static_cast(dist); + cv::Mat u = a[i]->smooth_embedding; + cv::Mat v = b[j].smooth_embedding; + double uv = u.dot(v); + double uu = u.dot(u); + double vv = v.dot(v); + double dist = std::abs(1. - uv / std::sqrt(uu * vv)); + distsi[j] = static_cast(std::max(std::min(dist, 2.), 0.)); + } + } + + return dists; +} + +cv::Mat mahalanobis_distance(const TrajectoryPool &a, const TrajectoryPool &b) { + std::vector means(a.size()); + std::vector icovariances(a.size()); + for (size_t i = 0; i < a.size(); ++i) { + cv::Mat covariance; + a[i].project(&means[i], &covariance); + cv::invert(covariance, icovariances[i]); + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Mat x(b[j].xyah); + float dist = + static_cast(cv::Mahalanobis(x, means[i], icovariances[i])); + distsi[j] = dist * dist; + } + } + + return dists; +} + +cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a, + const TrajectoryPtrPool &b) { + std::vector means(a.size()); + std::vector icovariances(a.size()); + for (size_t i = 0; i < a.size(); ++i) { + cv::Mat covariance; + a[i]->project(&means[i], &covariance); + cv::invert(covariance, icovariances[i]); + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Mat x(b[j]->xyah); + float dist = + static_cast(cv::Mahalanobis(x, means[i], icovariances[i])); + distsi[j] = dist * dist; + } + } + + return dists; +} + +cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a, + const TrajectoryPool &b) { + std::vector means(a.size()); + std::vector icovariances(a.size()); + + for (size_t i = 0; i < a.size(); ++i) { + cv::Mat covariance; + a[i]->project(&means[i], &covariance); + cv::invert(covariance, icovariances[i]); + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Mat x(b[j].xyah); + float dist = + static_cast(cv::Mahalanobis(x, means[i], icovariances[i])); + distsi[j] = dist * dist; + } + } + + return dists; +} + +static inline float calc_inter_area(const cv::Vec4f &a, const cv::Vec4f &b) { + if (a[2] < b[0] || a[0] > b[2] || a[3] < b[1] || a[1] > b[3]) return 0.f; + + float w = std::min(a[2], b[2]) - std::max(a[0], b[0]); + float h = std::min(a[3], b[3]) - std::max(a[1], b[1]); + return w * h; +} + +cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b) { + std::vector areaa(a.size()); + for (size_t i = 0; i < a.size(); ++i) { + float w = a[i].ltrb[2] - a[i].ltrb[0]; + float h = a[i].ltrb[3] - a[i].ltrb[1]; + areaa[i] = w * h; + } + + std::vector areab(b.size()); + for (size_t j = 0; j < b.size(); ++j) { + float w = b[j].ltrb[2] - b[j].ltrb[0]; + float h = b[j].ltrb[3] - b[j].ltrb[1]; + areab[j] = w * h; + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + const cv::Vec4f &boxa = a[i].ltrb; + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Vec4f &boxb = b[j].ltrb; + float inters = calc_inter_area(boxa, boxb); + distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters); + } + } + + return dists; +} + +cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPtrPool &b) { + std::vector areaa(a.size()); + for (size_t i = 0; i < a.size(); ++i) { + float w = a[i]->ltrb[2] - a[i]->ltrb[0]; + float h = a[i]->ltrb[3] - a[i]->ltrb[1]; + areaa[i] = w * h; + } + + std::vector areab(b.size()); + for (size_t j = 0; j < b.size(); ++j) { + float w = b[j]->ltrb[2] - b[j]->ltrb[0]; + float h = b[j]->ltrb[3] - b[j]->ltrb[1]; + areab[j] = w * h; + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + const cv::Vec4f &boxa = a[i]->ltrb; + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Vec4f &boxb = b[j]->ltrb; + float inters = calc_inter_area(boxa, boxb); + distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters); + } + } + + return dists; +} + +cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b) { + std::vector areaa(a.size()); + for (size_t i = 0; i < a.size(); ++i) { + float w = a[i]->ltrb[2] - a[i]->ltrb[0]; + float h = a[i]->ltrb[3] - a[i]->ltrb[1]; + areaa[i] = w * h; + } + + std::vector areab(b.size()); + for (size_t j = 0; j < b.size(); ++j) { + float w = b[j].ltrb[2] - b[j].ltrb[0]; + float h = b[j].ltrb[3] - b[j].ltrb[1]; + areab[j] = w * h; + } + + cv::Mat dists(a.size(), b.size(), CV_32F); + for (size_t i = 0; i < a.size(); ++i) { + const cv::Vec4f &boxa = a[i]->ltrb; + float *distsi = dists.ptr(i); + for (size_t j = 0; j < b.size(); ++j) { + const cv::Vec4f &boxb = b[j].ltrb; + float inters = calc_inter_area(boxa, boxb); + distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters); + } + } + + return dists; +} + +} // namespace PaddleDetection diff --git a/deploy/pptracking/python/README.md b/deploy/pptracking/python/README.md new file mode 100644 index 0000000000000000000000000000000000000000..190cf63ec6d389fc4e72d29e8b46ffb5eb445963 --- /dev/null +++ b/deploy/pptracking/python/README.md @@ -0,0 +1,123 @@ +# Python端预测部署 + +在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 预测引擎使用了AnalysisPredictor,专门针对推理进行了优化,是基于[C++预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/native_infer.html)的Python接口,该引擎可以对模型进行多项图优化,减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求,我们提供了独立于PaddleDetection的预测脚本,方便用户直接集成部署。 + +主要包含两个步骤: + +- 导出预测模型 +- 基于Python进行预测 + +PaddleDetection在训练过程包括网络的前向和优化器相关参数,而在部署过程中,我们只需要前向参数,具体参考:[导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/deploy/EXPORT_MODEL.md) +导出后目录下,包括`infer_cfg.yml`, `model.pdiparams`, `model.pdiparams.info`, `model.pdmodel`四个文件。 + +## 1. 对FairMOT模型的导出和预测 + +### 1.1 导出预测模型 +```bash +CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams +``` + +### 1.2 用导出的模型基于Python去预测 +```bash +python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/fairmot_hrnetv2_w18_dlafpn_30e_576x320 --video_file={your video name}.mp4 --device=GPU --save_mot_txts +``` +**注意:** + - 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`表示保存跟踪结果的txt文件,或`--save_images`表示保存跟踪结果可视化图片。 + - 对于多类别或车辆的FairMOT模型的导出和Python预测只需更改相应的config和模型权重即可。如: + ``` + job_name=mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone + model_type=mot/mcfairmot + config=configs/${model_type}/${job_name}.yml + + CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=https://paddledet.bj.bcebos.com/models/mot/${job_name}.pdparams + python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/${job_name} --video_file={your video name}.mp4 --device=GPU --save_mot_txts + ``` + + +## 2. 对DeepSORT模型的导出和预测 +### 2.1 导出预测模型 + +Step 1:导出检测模型 +```bash +# 导出JDE YOLOv3行人检测模型 +CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/jde_yolov3_darknet53_30e_1088x608_mix.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams + +# 或导出PPYOLOv2行人检测模型 +CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/ppyolov2_r50vd_dcn_365e_640x640_mot17half.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/ppyolov2_r50vd_dcn_365e_640x640_mot17half.pdparams +``` + +Step 2:导出ReID模型 +```bash +# 导出PCB Pyramid ReID模型 +CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/reid/deepsort_pcb_pyramid_r101.yml -o reid_weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams +# 或者导出PPLCNet ReID模型 +CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/reid/deepsort_pplcnet.yml -o reid_weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams +``` + +### 2.2 用导出的模型基于Python去预测 + +```bash +# 用导出JDE YOLOv3行人检测模型和PCB Pyramid ReID模型 +python deploy/pptracking/python/mot_sde_infer.py --model_dir=output_inference/jde_yolov3_darknet53_30e_1088x608_mix/ --reid_model_dir=output_inference/deepsort_pcb_pyramid_r101/ --video_file={your video name}.mp4 --device=GPU --save_mot_txts + +# 或用导出的PPYOLOv2行人检测模型和PPLCNet ReID模型 +python deploy/pptracking/python/mot_sde_infer.py --model_dir=output_inference/ppyolov2_r50vd_dcn_365e_640x640_mot17half/ --reid_model_dir=output_inference/deepsort_pplcnet/ --video_file={your video name}.mp4 --device=GPU --scaled=True --save_mot_txts +``` +**注意:** + - 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`(对每个视频保存一个txt)或`--save_images`表示保存跟踪结果可视化图片。 + - `--scaled`表示在模型输出结果的坐标是否已经是缩放回原图的,如果使用的检测模型是JDE的YOLOv3则为False,如果使用通用检测模型则为True。 + + +## 3. 跨境跟踪模型的导出和预测 +### 3.1 导出预测模型 +Step 1:下载导出的检测模型 +```bash +wget https://paddledet.bj.bcebos.com/models/mot/deepsort/picodet_l_640_aic21mtmct_vehicle.tar +tar -xvf picodet_l_640_aic21mtmct_vehicle.tar +``` +Step 2:下载导出的ReID模型 +```bash +wget https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet_vehicle.tar +tar -xvf deepsort_pplcnet_vehicle.tar +``` + +### 3.2 用导出的模型基于Python去预测 +```bash +# 用导出PicoDet车辆检测模型和PPLCNet车辆ReID模型 +python deploy/pptracking/python/mot_sde_infer.py --model_dir=picodet_l_640_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --mtmct_dir={your mtmct scene video folder} --mtmct_cfg=mtmct_cfg --device=GPU --scaled=True --save_mot_txts --save_images +``` +**注意:** + 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`(对每个视频保存一个txt),或`--save_images`表示保存跟踪结果可视化图片。 + `--scaled`表示在模型输出结果的坐标是否已经是缩放回原图的,如果使用的检测模型是JDE的YOLOv3则为False,如果使用通用检测模型则为True。 + `--mtmct_dir`是MTMCT预测的某个场景的文件夹名字,里面包含该场景不同摄像头拍摄视频的图片文件夹,其数量至少为两个。 + + +## 参数说明: + +| 参数 | 是否必须|含义 | +|-------|-------|----------| +| --model_dir | Yes| 上述导出的模型路径 | +| --image_file | Option | 需要预测的图片 | +| --image_dir | Option | 要预测的图片文件夹路径 | +| --video_file | Option | 需要预测的视频 | +| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| +| --device | Option | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`| +| --run_mode | Option |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --batch_size | Option |预测时的batch size,在指定`image_dir`时有效,默认为1 | +| --threshold | Option|预测得分的阈值,默认为0.5| +| --output_dir | Option|可视化结果保存的根目录,默认为output/| +| --run_benchmark | Option| 是否运行benchmark,同时需指定`--image_file`或`--image_dir`,默认为False | +| --enable_mkldnn | Option | CPU预测中是否开启MKLDNN加速,默认为False | +| --cpu_threads | Option| 设置cpu线程数,默认为1 | +| --trt_calib_mode | Option| TensorRT是否使用校准功能,默认为False。使用TensorRT的int8功能时,需设置为True,使用PaddleSlim量化后的模型时需要设置为False | +| --do_entrance_counting | Option | 是否统计出入口流量,默认为False | +| --draw_center_traj | Option | 是否绘制跟踪轨迹,默认为False | +| --mtmct_dir | Option | 需要进行MTMCT跨境头跟踪预测的图片文件夹路径,默认为None | +| --mtmct_cfg | Option | 需要进行MTMCT跨境头跟踪预测的配置文件路径,默认为None | + +说明: + +- 参数优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`。 +- run_mode:fluid代表使用AnalysisPredictor,精度float32来推理,其他参数指用AnalysisPredictor,TensorRT不同精度来推理。 +- 如果安装的PaddlePaddle不支持基于TensorRT进行预测,需要自行编译,详细可参考[预测库编译教程](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)。 +- --run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`。 diff --git a/deploy/pptracking/python/benchmark_utils.py b/deploy/pptracking/python/benchmark_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..af7637288dff517314d95e5404dcd88b8db956c7 --- /dev/null +++ b/deploy/pptracking/python/benchmark_utils.py @@ -0,0 +1,277 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import logging + +import paddle +import paddle.inference as paddle_infer + +from pathlib import Path + +CUR_DIR = os.path.dirname(os.path.abspath(__file__)) +LOG_PATH_ROOT = f"{CUR_DIR}/../../output" + + +class PaddleInferBenchmark(object): + def __init__(self, + config, + model_info: dict={}, + data_info: dict={}, + perf_info: dict={}, + resource_info: dict={}, + **kwargs): + """ + Construct PaddleInferBenchmark Class to format logs. + args: + config(paddle.inference.Config): paddle inference config + model_info(dict): basic model info + {'model_name': 'resnet50' + 'precision': 'fp32'} + data_info(dict): input data info + {'batch_size': 1 + 'shape': '3,224,224' + 'data_num': 1000} + perf_info(dict): performance result + {'preprocess_time_s': 1.0 + 'inference_time_s': 2.0 + 'postprocess_time_s': 1.0 + 'total_time_s': 4.0} + resource_info(dict): + cpu and gpu resources + {'cpu_rss': 100 + 'gpu_rss': 100 + 'gpu_util': 60} + """ + # PaddleInferBenchmark Log Version + self.log_version = "1.0.3" + + # Paddle Version + self.paddle_version = paddle.__version__ + self.paddle_commit = paddle.__git_commit__ + paddle_infer_info = paddle_infer.get_version() + self.paddle_branch = paddle_infer_info.strip().split(': ')[-1] + + # model info + self.model_info = model_info + + # data info + self.data_info = data_info + + # perf info + self.perf_info = perf_info + + try: + # required value + self.model_name = model_info['model_name'] + self.precision = model_info['precision'] + + self.batch_size = data_info['batch_size'] + self.shape = data_info['shape'] + self.data_num = data_info['data_num'] + + self.inference_time_s = round(perf_info['inference_time_s'], 4) + except: + self.print_help() + raise ValueError( + "Set argument wrong, please check input argument and its type") + + self.preprocess_time_s = perf_info.get('preprocess_time_s', 0) + self.postprocess_time_s = perf_info.get('postprocess_time_s', 0) + self.total_time_s = perf_info.get('total_time_s', 0) + + self.inference_time_s_90 = perf_info.get("inference_time_s_90", "") + self.inference_time_s_99 = perf_info.get("inference_time_s_99", "") + self.succ_rate = perf_info.get("succ_rate", "") + self.qps = perf_info.get("qps", "") + + # conf info + self.config_status = self.parse_config(config) + + # mem info + if isinstance(resource_info, dict): + self.cpu_rss_mb = int(resource_info.get('cpu_rss_mb', 0)) + self.cpu_vms_mb = int(resource_info.get('cpu_vms_mb', 0)) + self.cpu_shared_mb = int(resource_info.get('cpu_shared_mb', 0)) + self.cpu_dirty_mb = int(resource_info.get('cpu_dirty_mb', 0)) + self.cpu_util = round(resource_info.get('cpu_util', 0), 2) + + self.gpu_rss_mb = int(resource_info.get('gpu_rss_mb', 0)) + self.gpu_util = round(resource_info.get('gpu_util', 0), 2) + self.gpu_mem_util = round(resource_info.get('gpu_mem_util', 0), 2) + else: + self.cpu_rss_mb = 0 + self.cpu_vms_mb = 0 + self.cpu_shared_mb = 0 + self.cpu_dirty_mb = 0 + self.cpu_util = 0 + + self.gpu_rss_mb = 0 + self.gpu_util = 0 + self.gpu_mem_util = 0 + + # init benchmark logger + self.benchmark_logger() + + def benchmark_logger(self): + """ + benchmark logger + """ + # remove other logging handler + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + + # Init logger + FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + log_output = f"{LOG_PATH_ROOT}/{self.model_name}.log" + Path(f"{LOG_PATH_ROOT}").mkdir(parents=True, exist_ok=True) + logging.basicConfig( + level=logging.INFO, + format=FORMAT, + handlers=[ + logging.FileHandler( + filename=log_output, mode='w'), + logging.StreamHandler(), + ]) + self.logger = logging.getLogger(__name__) + self.logger.info( + f"Paddle Inference benchmark log will be saved to {log_output}") + + def parse_config(self, config) -> dict: + """ + parse paddle predictor config + args: + config(paddle.inference.Config): paddle inference config + return: + config_status(dict): dict style config info + """ + if isinstance(config, paddle_infer.Config): + config_status = {} + config_status['runtime_device'] = "gpu" if config.use_gpu( + ) else "cpu" + config_status['ir_optim'] = config.ir_optim() + config_status['enable_tensorrt'] = config.tensorrt_engine_enabled() + config_status['precision'] = self.precision + config_status['enable_mkldnn'] = config.mkldnn_enabled() + config_status[ + 'cpu_math_library_num_threads'] = config.cpu_math_library_num_threads( + ) + elif isinstance(config, dict): + config_status['runtime_device'] = config.get('runtime_device', "") + config_status['ir_optim'] = config.get('ir_optim', "") + config_status['enable_tensorrt'] = config.get('enable_tensorrt', "") + config_status['precision'] = config.get('precision', "") + config_status['enable_mkldnn'] = config.get('enable_mkldnn', "") + config_status['cpu_math_library_num_threads'] = config.get( + 'cpu_math_library_num_threads', "") + else: + self.print_help() + raise ValueError( + "Set argument config wrong, please check input argument and its type" + ) + return config_status + + def report(self, identifier=None): + """ + print log report + args: + identifier(string): identify log + """ + if identifier: + identifier = f"[{identifier}]" + else: + identifier = "" + + self.logger.info("\n") + self.logger.info( + "---------------------- Paddle info ----------------------") + self.logger.info(f"{identifier} paddle_version: {self.paddle_version}") + self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}") + self.logger.info(f"{identifier} paddle_branch: {self.paddle_branch}") + self.logger.info(f"{identifier} log_api_version: {self.log_version}") + self.logger.info( + "----------------------- Conf info -----------------------") + self.logger.info( + f"{identifier} runtime_device: {self.config_status['runtime_device']}" + ) + self.logger.info( + f"{identifier} ir_optim: {self.config_status['ir_optim']}") + self.logger.info(f"{identifier} enable_memory_optim: {True}") + self.logger.info( + f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}" + ) + self.logger.info( + f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}") + self.logger.info( + f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}" + ) + self.logger.info( + "----------------------- Model info ----------------------") + self.logger.info(f"{identifier} model_name: {self.model_name}") + self.logger.info(f"{identifier} precision: {self.precision}") + self.logger.info( + "----------------------- Data info -----------------------") + self.logger.info(f"{identifier} batch_size: {self.batch_size}") + self.logger.info(f"{identifier} input_shape: {self.shape}") + self.logger.info(f"{identifier} data_num: {self.data_num}") + self.logger.info( + "----------------------- Perf info -----------------------") + self.logger.info( + f"{identifier} cpu_rss(MB): {self.cpu_rss_mb}, cpu_vms: {self.cpu_vms_mb}, cpu_shared_mb: {self.cpu_shared_mb}, cpu_dirty_mb: {self.cpu_dirty_mb}, cpu_util: {self.cpu_util}%" + ) + self.logger.info( + f"{identifier} gpu_rss(MB): {self.gpu_rss_mb}, gpu_util: {self.gpu_util}%, gpu_mem_util: {self.gpu_mem_util}%" + ) + self.logger.info( + f"{identifier} total time spent(s): {self.total_time_s}") + self.logger.info( + f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, inference_time(ms): {round(self.inference_time_s*1000, 1)}, postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}" + ) + if self.inference_time_s_90: + self.looger.info( + f"{identifier} 90%_cost: {self.inference_time_s_90}, 99%_cost: {self.inference_time_s_99}, succ_rate: {self.succ_rate}" + ) + if self.qps: + self.logger.info(f"{identifier} QPS: {self.qps}") + + def print_help(self): + """ + print function help + """ + print("""Usage: + ==== Print inference benchmark logs. ==== + config = paddle.inference.Config() + model_info = {'model_name': 'resnet50' + 'precision': 'fp32'} + data_info = {'batch_size': 1 + 'shape': '3,224,224' + 'data_num': 1000} + perf_info = {'preprocess_time_s': 1.0 + 'inference_time_s': 2.0 + 'postprocess_time_s': 1.0 + 'total_time_s': 4.0} + resource_info = {'cpu_rss_mb': 100 + 'gpu_rss_mb': 100 + 'gpu_util': 60} + log = PaddleInferBenchmark(config, model_info, data_info, perf_info, resource_info) + log('Test') + """) + + def __call__(self, identifier=None): + """ + __call__ + args: + identifier(string): identify log + """ + self.report(identifier) diff --git a/deploy/pptracking/python/det_infer.py b/deploy/pptracking/python/det_infer.py new file mode 100644 index 0000000000000000000000000000000000000000..a161962b8ef2d4f2c41bb5acdd02b92850d07df1 --- /dev/null +++ b/deploy/pptracking/python/det_infer.py @@ -0,0 +1,657 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import yaml +import glob +from functools import reduce + +import cv2 +import numpy as np +import math + +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor + +from benchmark_utils import PaddleInferBenchmark +from picodet_postprocess import PicoDetPostProcess +from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride, LetterBoxResize +from visualize import visualize_box_mask +from utils import argsparser, Timer, get_current_memory_mb + +# Global dictionary +SUPPORT_MODELS = { + 'YOLO', + 'PicoDet', + 'JDE', + 'FairMOT', + 'DeepSORT', +} + + +class Detector(object): + """ + Args: + pred_config (object): config of model, defined by `Config(model_dir)` + model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + batch_size (int): size of pre batch in inference + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN + """ + + def __init__(self, + pred_config, + model_dir, + device='CPU', + run_mode='fluid', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1280, + trt_opt_shape=640, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False): + self.pred_config = pred_config + self.predictor, self.config = load_predictor( + model_dir, + run_mode=run_mode, + batch_size=batch_size, + min_subgraph_size=self.pred_config.min_subgraph_size, + device=device, + use_dynamic_shape=self.pred_config.use_dynamic_shape, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn) + self.det_times = Timer() + self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 + + def preprocess(self, image_list): + preprocess_ops = [] + for op_info in self.pred_config.preprocess_infos: + new_op_info = op_info.copy() + op_type = new_op_info.pop('type') + preprocess_ops.append(eval(op_type)(**new_op_info)) + + input_im_lst = [] + input_im_info_lst = [] + for im_path in image_list: + im, im_info = preprocess(im_path, preprocess_ops) + input_im_lst.append(im) + input_im_info_lst.append(im_info) + inputs = create_inputs(input_im_lst, input_im_info_lst) + return inputs + + def postprocess(self, + np_boxes, + np_masks, + inputs, + np_boxes_num, + threshold=0.5): + # postprocess output of predictor + results = {} + results['boxes'] = np_boxes + results['boxes_num'] = np_boxes_num + if np_masks is not None: + results['masks'] = np_masks + return results + + def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): + ''' + Args: + image_list (list): list of image + threshold (float): threshold of predicted box' score + Returns: + results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + MaskRCNN's results include 'masks': np.ndarray: + shape: [N, im_h, im_w] + ''' + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(image_list) + self.det_times.preprocess_time_s.end() + np_boxes, np_masks = None, None + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + for i in range(warmup): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + np_boxes = boxes_tensor.copy_to_cpu() + if self.pred_config.mask: + masks_tensor = self.predictor.get_output_handle(output_names[2]) + np_masks = masks_tensor.copy_to_cpu() + + self.det_times.inference_time_s.start() + for i in range(repeats): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + np_boxes = boxes_tensor.copy_to_cpu() + boxes_num = self.predictor.get_output_handle(output_names[1]) + np_boxes_num = boxes_num.copy_to_cpu() + if self.pred_config.mask: + masks_tensor = self.predictor.get_output_handle(output_names[2]) + np_masks = masks_tensor.copy_to_cpu() + self.det_times.inference_time_s.end(repeats=repeats) + + self.det_times.postprocess_time_s.start() + results = [] + if reduce(lambda x, y: x * y, np_boxes.shape) < 6: + print('[WARNNING] No object detected.') + results = {'boxes': np.zeros([0, 6]), 'boxes_num': [0]} + else: + results = self.postprocess( + np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += len(image_list) + return results + + def get_timer(self): + return self.det_times + + +class DetectorPicoDet(Detector): + """ + Args: + config (object): config of model, defined by `Config(model_dir)` + model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + batch_size (int): size of pre batch in inference + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN + """ + + def __init__(self, + pred_config, + model_dir, + device='CPU', + run_mode='fluid', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1280, + trt_opt_shape=640, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False): + self.pred_config = pred_config + self.predictor, self.config = load_predictor( + model_dir, + run_mode=run_mode, + batch_size=batch_size, + min_subgraph_size=self.pred_config.min_subgraph_size, + device=device, + use_dynamic_shape=self.pred_config.use_dynamic_shape, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn) + self.det_times = Timer() + self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 + + def predict(self, image, threshold=0.5, warmup=0, repeats=1): + ''' + Args: + image (str/np.ndarray): path of image/ np.ndarray read by cv2 + threshold (float): threshold of predicted box' score + Returns: + results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + ''' + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(image) + self.det_times.preprocess_time_s.end() + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + np_score_list, np_boxes_list = [], [] + for i in range(warmup): + self.predictor.run() + np_score_list.clear() + np_boxes_list.clear() + output_names = self.predictor.get_output_names() + num_outs = int(len(output_names) / 2) + for out_idx in range(num_outs): + np_score_list.append( + self.predictor.get_output_handle(output_names[out_idx]) + .copy_to_cpu()) + np_boxes_list.append( + self.predictor.get_output_handle(output_names[ + out_idx + num_outs]).copy_to_cpu()) + + self.det_times.inference_time_s.start() + for i in range(repeats): + self.predictor.run() + np_score_list.clear() + np_boxes_list.clear() + output_names = self.predictor.get_output_names() + num_outs = int(len(output_names) / 2) + for out_idx in range(num_outs): + np_score_list.append( + self.predictor.get_output_handle(output_names[out_idx]) + .copy_to_cpu()) + np_boxes_list.append( + self.predictor.get_output_handle(output_names[ + out_idx + num_outs]).copy_to_cpu()) + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.img_num += 1 + self.det_times.postprocess_time_s.start() + self.postprocess = PicoDetPostProcess( + inputs['image'].shape[2:], + inputs['im_shape'], + inputs['scale_factor'], + strides=self.pred_config.fpn_stride, + nms_threshold=self.pred_config.nms['nms_threshold']) + np_boxes, np_boxes_num = self.postprocess(np_score_list, np_boxes_list) + self.det_times.postprocess_time_s.end() + return dict(boxes=np_boxes, boxes_num=np_boxes_num) + + +def create_inputs(imgs, im_info): + """generate input for different model type + Args: + imgs (list(numpy)): list of images (np.ndarray) + im_info (list(dict)): list of image info + Returns: + inputs (dict): input of model + """ + inputs = {} + + im_shape = [] + scale_factor = [] + if len(imgs) == 1: + inputs['image'] = np.array((imgs[0], )).astype('float32') + inputs['im_shape'] = np.array( + (im_info[0]['im_shape'], )).astype('float32') + inputs['scale_factor'] = np.array( + (im_info[0]['scale_factor'], )).astype('float32') + return inputs + + for e in im_info: + im_shape.append(np.array((e['im_shape'], )).astype('float32')) + scale_factor.append(np.array((e['scale_factor'], )).astype('float32')) + + inputs['im_shape'] = np.concatenate(im_shape, axis=0) + inputs['scale_factor'] = np.concatenate(scale_factor, axis=0) + + imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs] + max_shape_h = max([e[0] for e in imgs_shape]) + max_shape_w = max([e[1] for e in imgs_shape]) + padding_imgs = [] + for img in imgs: + im_c, im_h, im_w = img.shape[:] + padding_im = np.zeros( + (im_c, max_shape_h, max_shape_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = img + padding_imgs.append(padding_im) + inputs['image'] = np.stack(padding_imgs, axis=0) + return inputs + + +class PredictConfig(): + """set config of preprocess, postprocess and visualize + Args: + model_dir (str): root path of model.yml + """ + + def __init__(self, model_dir): + # parsing Yaml config for Preprocess + deploy_file = os.path.join(model_dir, 'infer_cfg.yml') + with open(deploy_file) as f: + yml_conf = yaml.safe_load(f) + self.check_model(yml_conf) + self.arch = yml_conf['arch'] + self.preprocess_infos = yml_conf['Preprocess'] + self.min_subgraph_size = yml_conf['min_subgraph_size'] + self.labels = yml_conf['label_list'] + self.mask = False + self.use_dynamic_shape = yml_conf['use_dynamic_shape'] + if 'mask' in yml_conf: + self.mask = yml_conf['mask'] + self.tracker = None + if 'tracker' in yml_conf: + self.tracker = yml_conf['tracker'] + if 'NMS' in yml_conf: + self.nms = yml_conf['NMS'] + if 'fpn_stride' in yml_conf: + self.fpn_stride = yml_conf['fpn_stride'] + self.print_config() + + def check_model(self, yml_conf): + """ + Raises: + ValueError: loaded model not in supported model type + """ + for support_model in SUPPORT_MODELS: + if support_model in yml_conf['arch']: + return True + raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[ + 'arch'], SUPPORT_MODELS)) + + def print_config(self): + print('----------- Model Configuration -----------') + print('%s: %s' % ('Model Arch', self.arch)) + print('%s: ' % ('Transform Order')) + for op_info in self.preprocess_infos: + print('--%s: %s' % ('transform op', op_info['type'])) + print('--------------------------------------------') + + +def load_predictor(model_dir, + run_mode='fluid', + batch_size=1, + device='CPU', + min_subgraph_size=3, + use_dynamic_shape=False, + trt_min_shape=1, + trt_max_shape=1280, + trt_opt_shape=640, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False): + """set AnalysisConfig, generate AnalysisPredictor + Args: + model_dir (str): root path of __model__ and __params__ + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8) + use_dynamic_shape (bool): use dynamic shape or not + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + Returns: + predictor (PaddlePredictor): AnalysisPredictor + Raises: + ValueError: predict by TensorRT need device == 'GPU'. + """ + if device != 'GPU' and run_mode != 'fluid': + raise ValueError( + "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}" + .format(run_mode, device)) + config = Config( + os.path.join(model_dir, 'model.pdmodel'), + os.path.join(model_dir, 'model.pdiparams')) + if device == 'GPU': + # initial GPU memory(M), device ID + config.enable_use_gpu(200, 0) + # optimize graph and fuse op + config.switch_ir_optim(True) + elif device == 'XPU': + config.enable_xpu(10 * 1024 * 1024) + else: + config.disable_gpu() + config.set_cpu_math_library_num_threads(cpu_threads) + if enable_mkldnn: + try: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() + except Exception as e: + print( + "The current environment does not support `mkldnn`, so disable mkldnn." + ) + pass + + precision_map = { + 'trt_int8': Config.Precision.Int8, + 'trt_fp32': Config.Precision.Float32, + 'trt_fp16': Config.Precision.Half + } + if run_mode in precision_map.keys(): + config.enable_tensorrt_engine( + workspace_size=1 << 10, + max_batch_size=batch_size, + min_subgraph_size=min_subgraph_size, + precision_mode=precision_map[run_mode], + use_static=False, + use_calib_mode=trt_calib_mode) + + if use_dynamic_shape: + min_input_shape = { + 'image': [batch_size, 3, trt_min_shape, trt_min_shape] + } + max_input_shape = { + 'image': [batch_size, 3, trt_max_shape, trt_max_shape] + } + opt_input_shape = { + 'image': [batch_size, 3, trt_opt_shape, trt_opt_shape] + } + config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, + opt_input_shape) + print('trt set dynamic shape done!') + + # disable print log when predict + config.disable_glog_info() + # enable shared memory + config.enable_memory_optim() + # disable feed, fetch OP, needed by zero_copy_run + config.switch_use_feed_fetch_ops(False) + predictor = create_predictor(config) + return predictor, config + + +def get_test_images(infer_dir, infer_img): + """ + Get image path list in TEST mode + """ + assert infer_img is not None or infer_dir is not None, \ + "--infer_img or --infer_dir should be set" + assert infer_img is None or os.path.isfile(infer_img), \ + "{} is not a file".format(infer_img) + assert infer_dir is None or os.path.isdir(infer_dir), \ + "{} is not a directory".format(infer_dir) + + # infer_img has a higher priority + if infer_img and os.path.isfile(infer_img): + return [infer_img] + + images = set() + infer_dir = os.path.abspath(infer_dir) + assert os.path.isdir(infer_dir), \ + "infer_dir {} is not a directory".format(infer_dir) + exts = ['jpg', 'jpeg', 'png', 'bmp'] + exts += [ext.upper() for ext in exts] + for ext in exts: + images.update(glob.glob('{}/*.{}'.format(infer_dir, ext))) + images = list(images) + + assert len(images) > 0, "no image found in {}".format(infer_dir) + print("Found {} inference images in total.".format(len(images))) + + return images + + +def visualize(image_list, results, labels, output_dir='output/', threshold=0.5): + # visualize the predict result + start_idx = 0 + for idx, image_file in enumerate(image_list): + im_bboxes_num = results['boxes_num'][idx] + im_results = {} + if 'boxes' in results: + im_results['boxes'] = results['boxes'][start_idx:start_idx + + im_bboxes_num, :] + if 'label' in results: + im_results['label'] = results['label'][start_idx:start_idx + + im_bboxes_num] + if 'score' in results: + im_results['score'] = results['score'][start_idx:start_idx + + im_bboxes_num] + + start_idx += im_bboxes_num + im = visualize_box_mask( + image_file, im_results, labels, threshold=threshold) + img_name = os.path.split(image_file)[-1] + if not os.path.exists(output_dir): + os.makedirs(output_dir) + out_path = os.path.join(output_dir, img_name) + im.save(out_path, quality=95) + print("save result to: " + out_path) + + +def print_arguments(args): + print('----------- Running Arguments -----------') + for arg, value in sorted(vars(args).items()): + print('%s: %s' % (arg, value)) + print('------------------------------------------') + + +def predict_image(detector, image_list, batch_size=1): + batch_loop_cnt = math.ceil(float(len(image_list)) / batch_size) + for i in range(batch_loop_cnt): + start_index = i * batch_size + end_index = min((i + 1) * batch_size, len(image_list)) + batch_image_list = image_list[start_index:end_index] + if FLAGS.run_benchmark: + detector.predict( + batch_image_list, FLAGS.threshold, warmup=10, repeats=10) + cm, gm, gu = get_current_memory_mb() + detector.cpu_mem += cm + detector.gpu_mem += gm + detector.gpu_util += gu + print('Test iter {}'.format(i)) + else: + results = detector.predict(batch_image_list, FLAGS.threshold) + visualize( + batch_image_list, + results, + detector.pred_config.labels, + output_dir=FLAGS.output_dir, + threshold=FLAGS.threshold) + + +def predict_video(detector, camera_id): + video_out_name = 'output.mp4' + if camera_id != -1: + capture = cv2.VideoCapture(camera_id) + else: + capture = cv2.VideoCapture(FLAGS.video_file) + video_out_name = os.path.split(FLAGS.video_file)[-1] + # Get Video info : resolution, fps, frame count + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(capture.get(cv2.CAP_PROP_FPS)) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + print("fps: %d, frame_count: %d" % (fps, frame_count)) + + if not os.path.exists(FLAGS.output_dir): + os.makedirs(FLAGS.output_dir) + out_path = os.path.join(FLAGS.output_dir, video_out_name) + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + index = 1 + while (1): + ret, frame = capture.read() + if not ret: + break + print('detect frame: %d' % (index)) + index += 1 + results = detector.predict([frame], FLAGS.threshold) + im = visualize_box_mask( + frame, + results, + detector.pred_config.labels, + threshold=FLAGS.threshold) + im = np.array(im) + writer.write(im) + if camera_id != -1: + cv2.imshow('Mask Detection', im) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + writer.release() + + +def main(): + pred_config = PredictConfig(FLAGS.model_dir) + detector_func = 'Detector' + if pred_config.arch == 'PicoDet': + detector_func = 'DetectorPicoDet' + + detector = eval(detector_func)(pred_config, + FLAGS.model_dir, + device=FLAGS.device, + run_mode=FLAGS.run_mode, + batch_size=FLAGS.batch_size, + trt_min_shape=FLAGS.trt_min_shape, + trt_max_shape=FLAGS.trt_max_shape, + trt_opt_shape=FLAGS.trt_opt_shape, + trt_calib_mode=FLAGS.trt_calib_mode, + cpu_threads=FLAGS.cpu_threads, + enable_mkldnn=FLAGS.enable_mkldnn) + + # predict from video file or camera video stream + if FLAGS.video_file is not None or FLAGS.camera_id != -1: + predict_video(detector, FLAGS.camera_id) + else: + # predict from image + if FLAGS.image_dir is None and FLAGS.image_file is not None: + assert FLAGS.batch_size == 1, "batch_size should be 1, when image_file is not None" + img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) + predict_image(detector, img_list, FLAGS.batch_size) + if not FLAGS.run_benchmark: + detector.det_times.info(average=True) + else: + mems = { + 'cpu_rss_mb': detector.cpu_mem / len(img_list), + 'gpu_rss_mb': detector.gpu_mem / len(img_list), + 'gpu_util': detector.gpu_util * 100 / len(img_list) + } + + perf_info = detector.det_times.report(average=True) + model_dir = FLAGS.model_dir + mode = FLAGS.run_mode + model_info = { + 'model_name': model_dir.strip('/').split('/')[-1], + 'precision': mode.split('_')[-1] + } + data_info = { + 'batch_size': FLAGS.batch_size, + 'shape': "dynamic_shape", + 'data_num': perf_info['img_num'] + } + det_log = PaddleInferBenchmark(detector.config, model_info, + data_info, perf_info, mems) + det_log('Det') + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + print_arguments(FLAGS) + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU' + ], "device should be CPU, GPU or XPU" + main() diff --git a/deploy/pptracking/python/mot/__init__.py b/deploy/pptracking/python/mot/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5f57110ca5731af0d91236516cbc3154f08b44be --- /dev/null +++ b/deploy/pptracking/python/mot/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import matching +from . import tracker +from . import motion +from . import utils +from . import mtmct + +from .matching import * +from .tracker import * +from .motion import * +from .utils import * +from .mtmct import * diff --git a/deploy/pptracking/python/mot/matching/__init__.py b/deploy/pptracking/python/mot/matching/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..54c6680f79f16247c562a9da1024dd3e1de4c57f --- /dev/null +++ b/deploy/pptracking/python/mot/matching/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import jde_matching +from . import deepsort_matching + +from .jde_matching import * +from .deepsort_matching import * diff --git a/deploy/pptracking/python/mot/matching/deepsort_matching.py b/deploy/pptracking/python/mot/matching/deepsort_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..3859ccfbd1f384cc24716a94342230c2c8a2387f --- /dev/null +++ b/deploy/pptracking/python/mot/matching/deepsort_matching.py @@ -0,0 +1,379 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is based on https://github.com/nwojke/deep_sort/tree/master/deep_sort +""" + +import numpy as np +from scipy.optimize import linear_sum_assignment +from ..motion import kalman_filter + +INFTY_COST = 1e+5 + +__all__ = [ + 'iou_1toN', + 'iou_cost', + '_nn_euclidean_distance', + '_nn_cosine_distance', + 'NearestNeighborDistanceMetric', + 'min_cost_matching', + 'matching_cascade', + 'gate_cost_matrix', +] + + +def iou_1toN(bbox, candidates): + """ + Computer intersection over union (IoU) by one box to N candidates. + + Args: + bbox (ndarray): A bounding box in format `(top left x, top left y, width, height)`. + candidates (ndarray): A matrix of candidate bounding boxes (one per row) in the + same format as `bbox`. + + Returns: + ious (ndarray): The intersection over union in [0, 1] between the `bbox` + and each candidate. A higher score means a larger fraction of the + `bbox` is occluded by the candidate. + """ + bbox_tl = bbox[:2] + bbox_br = bbox[:2] + bbox[2:] + candidates_tl = candidates[:, :2] + candidates_br = candidates[:, :2] + candidates[:, 2:] + + tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], + np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] + br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], + np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] + wh = np.maximum(0., br - tl) + + area_intersection = wh.prod(axis=1) + area_bbox = bbox[2:].prod() + area_candidates = candidates[:, 2:].prod(axis=1) + ious = area_intersection / (area_bbox + area_candidates - area_intersection) + return ious + + +def iou_cost(tracks, detections, track_indices=None, detection_indices=None): + """ + IoU distance metric. + + Args: + tracks (list[Track]): A list of tracks. + detections (list[Detection]): A list of detections. + track_indices (Optional[list[int]]): A list of indices to tracks that + should be matched. Defaults to all `tracks`. + detection_indices (Optional[list[int]]): A list of indices to detections + that should be matched. Defaults to all `detections`. + + Returns: + cost_matrix (ndarray): A cost matrix of shape len(track_indices), + len(detection_indices) where entry (i, j) is + `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + cost_matrix = np.zeros((len(track_indices), len(detection_indices))) + for row, track_idx in enumerate(track_indices): + if tracks[track_idx].time_since_update > 1: + cost_matrix[row, :] = 1e+5 + continue + + bbox = tracks[track_idx].to_tlwh() + candidates = np.asarray([detections[i].tlwh for i in detection_indices]) + cost_matrix[row, :] = 1. - iou_1toN(bbox, candidates) + return cost_matrix + + +def _nn_euclidean_distance(s, q): + """ + Compute pair-wise squared (Euclidean) distance between points in `s` and `q`. + + Args: + s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M. + q (ndarray): Query points: an LxM matrix of L samples of dimensionality M. + + Returns: + distances (ndarray): A vector of length M that contains for each entry in `q` the + smallest Euclidean distance to a sample in `s`. + """ + s, q = np.asarray(s), np.asarray(q) + if len(s) == 0 or len(q) == 0: + return np.zeros((len(s), len(q))) + s2, q2 = np.square(s).sum(axis=1), np.square(q).sum(axis=1) + distances = -2. * np.dot(s, q.T) + s2[:, None] + q2[None, :] + distances = np.clip(distances, 0., float(np.inf)) + + return np.maximum(0.0, distances.min(axis=0)) + + +def _nn_cosine_distance(s, q): + """ + Compute pair-wise cosine distance between points in `s` and `q`. + + Args: + s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M. + q (ndarray): Query points: an LxM matrix of L samples of dimensionality M. + + Returns: + distances (ndarray): A vector of length M that contains for each entry in `q` the + smallest Euclidean distance to a sample in `s`. + """ + s = np.asarray(s) / np.linalg.norm(s, axis=1, keepdims=True) + q = np.asarray(q) / np.linalg.norm(q, axis=1, keepdims=True) + distances = 1. - np.dot(s, q.T) + + return distances.min(axis=0) + + +class NearestNeighborDistanceMetric(object): + """ + A nearest neighbor distance metric that, for each target, returns + the closest distance to any sample that has been observed so far. + + Args: + metric (str): Either "euclidean" or "cosine". + matching_threshold (float): The matching threshold. Samples with larger + distance are considered an invalid match. + budget (Optional[int]): If not None, fix samples per class to at most + this number. Removes the oldest samples when the budget is reached. + + Attributes: + samples (Dict[int -> List[ndarray]]): A dictionary that maps from target + identities to the list of samples that have been observed so far. + """ + + def __init__(self, metric, matching_threshold, budget=None): + if metric == "euclidean": + self._metric = _nn_euclidean_distance + elif metric == "cosine": + self._metric = _nn_cosine_distance + else: + raise ValueError( + "Invalid metric; must be either 'euclidean' or 'cosine'") + self.matching_threshold = matching_threshold + self.budget = budget + self.samples = {} + + def partial_fit(self, features, targets, active_targets): + """ + Update the distance metric with new data. + + Args: + features (ndarray): An NxM matrix of N features of dimensionality M. + targets (ndarray): An integer array of associated target identities. + active_targets (List[int]): A list of targets that are currently + present in the scene. + """ + for feature, target in zip(features, targets): + self.samples.setdefault(target, []).append(feature) + if self.budget is not None: + self.samples[target] = self.samples[target][-self.budget:] + self.samples = {k: self.samples[k] for k in active_targets} + + def distance(self, features, targets): + """ + Compute distance between features and targets. + + Args: + features (ndarray): An NxM matrix of N features of dimensionality M. + targets (list[int]): A list of targets to match the given `features` against. + + Returns: + cost_matrix (ndarray): a cost matrix of shape len(targets), len(features), + where element (i, j) contains the closest squared distance between + `targets[i]` and `features[j]`. + """ + cost_matrix = np.zeros((len(targets), len(features))) + for i, target in enumerate(targets): + cost_matrix[i, :] = self._metric(self.samples[target], features) + return cost_matrix + + +def min_cost_matching(distance_metric, + max_distance, + tracks, + detections, + track_indices=None, + detection_indices=None): + """ + Solve linear assignment problem. + + Args: + distance_metric : + Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as + well as a list of N track indices and M detection indices. The + metric should return the NxM dimensional cost matrix, where element + (i, j) is the association cost between the i-th track in the given + track indices and the j-th detection in the given detection_indices. + max_distance (float): Gating threshold. Associations with cost larger + than this value are disregarded. + tracks (list[Track]): A list of predicted tracks at the current time + step. + detections (list[Detection]): A list of detections at the current time + step. + track_indices (list[int]): List of track indices that maps rows in + `cost_matrix` to tracks in `tracks`. + detection_indices (List[int]): List of detection indices that maps + columns in `cost_matrix` to detections in `detections`. + + Returns: + A tuple (List[(int, int)], List[int], List[int]) with the following + three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + if len(detection_indices) == 0 or len(track_indices) == 0: + return [], track_indices, detection_indices # Nothing to match. + + cost_matrix = distance_metric(tracks, detections, track_indices, + detection_indices) + + cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 + indices = linear_sum_assignment(cost_matrix) + + matches, unmatched_tracks, unmatched_detections = [], [], [] + for col, detection_idx in enumerate(detection_indices): + if col not in indices[1]: + unmatched_detections.append(detection_idx) + for row, track_idx in enumerate(track_indices): + if row not in indices[0]: + unmatched_tracks.append(track_idx) + for row, col in zip(indices[0], indices[1]): + track_idx = track_indices[row] + detection_idx = detection_indices[col] + if cost_matrix[row, col] > max_distance: + unmatched_tracks.append(track_idx) + unmatched_detections.append(detection_idx) + else: + matches.append((track_idx, detection_idx)) + return matches, unmatched_tracks, unmatched_detections + + +def matching_cascade(distance_metric, + max_distance, + cascade_depth, + tracks, + detections, + track_indices=None, + detection_indices=None): + """ + Run matching cascade. + + Args: + distance_metric : + Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as + well as a list of N track indices and M detection indices. The + metric should return the NxM dimensional cost matrix, where element + (i, j) is the association cost between the i-th track in the given + track indices and the j-th detection in the given detection_indices. + max_distance (float): Gating threshold. Associations with cost larger + than this value are disregarded. + cascade_depth (int): The cascade depth, should be se to the maximum + track age. + tracks (list[Track]): A list of predicted tracks at the current time + step. + detections (list[Detection]): A list of detections at the current time + step. + track_indices (list[int]): List of track indices that maps rows in + `cost_matrix` to tracks in `tracks`. + detection_indices (List[int]): List of detection indices that maps + columns in `cost_matrix` to detections in `detections`. + + Returns: + A tuple (List[(int, int)], List[int], List[int]) with the following + three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = list(range(len(tracks))) + if detection_indices is None: + detection_indices = list(range(len(detections))) + + unmatched_detections = detection_indices + matches = [] + for level in range(cascade_depth): + if len(unmatched_detections) == 0: # No detections left + break + + track_indices_l = [ + k for k in track_indices if tracks[k].time_since_update == 1 + level + ] + if len(track_indices_l) == 0: # Nothing to match at this level + continue + + matches_l, _, unmatched_detections = \ + min_cost_matching( + distance_metric, max_distance, tracks, detections, + track_indices_l, unmatched_detections) + matches += matches_l + unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) + return matches, unmatched_tracks, unmatched_detections + + +def gate_cost_matrix(kf, + cost_matrix, + tracks, + detections, + track_indices, + detection_indices, + gated_cost=INFTY_COST, + only_position=False): + """ + Invalidate infeasible entries in cost matrix based on the state + distributions obtained by Kalman filtering. + + Args: + kf (object): The Kalman filter. + cost_matrix (ndarray): The NxM dimensional cost matrix, where N is the + number of track indices and M is the number of detection indices, + such that entry (i, j) is the association cost between + `tracks[track_indices[i]]` and `detections[detection_indices[j]]`. + tracks (list[Track]): A list of predicted tracks at the current time + step. + detections (list[Detection]): A list of detections at the current time + step. + track_indices (List[int]): List of track indices that maps rows in + `cost_matrix` to tracks in `tracks`. + detection_indices (List[int]): List of detection indices that maps + columns in `cost_matrix` to detections in `detections`. + gated_cost (Optional[float]): Entries in the cost matrix corresponding + to infeasible associations are set this value. Defaults to a very + large value. + only_position (Optional[bool]): If True, only the x, y position of the + state distribution is considered during gating. Default False. + """ + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray( + [detections[i].to_xyah() for i in detection_indices]) + for row, track_idx in enumerate(track_indices): + track = tracks[track_idx] + gating_distance = kf.gating_distance(track.mean, track.covariance, + measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = gated_cost + return cost_matrix diff --git a/deploy/pptracking/python/mot/matching/jde_matching.py b/deploy/pptracking/python/mot/matching/jde_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..53f891c8b60a6733d2ac48bba42ccb48dca88a68 --- /dev/null +++ b/deploy/pptracking/python/mot/matching/jde_matching.py @@ -0,0 +1,142 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/matching.py +""" + +import lap +import scipy +import numpy as np +from scipy.spatial.distance import cdist +from ..motion import kalman_filter + +__all__ = [ + 'merge_matches', + 'linear_assignment', + 'cython_bbox_ious', + 'iou_distance', + 'embedding_distance', + 'fuse_motion', +] + + +def merge_matches(m1, m2, shape): + O, P, Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix( + (np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix( + (np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1 * M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty( + (0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple( + range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def cython_bbox_ious(atlbrs, btlbrs): + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + try: + import cython_bbox + except Exception as e: + print('cython_bbox not found, please install cython_bbox.' + 'for example: `pip install cython_bbox`.') + exit() + + ious = cython_bbox.bbox_overlaps( + np.ascontiguousarray( + atlbrs, dtype=np.float), + np.ascontiguousarray( + btlbrs, dtype=np.float)) + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU between two list[STrack]. + """ + if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or ( + len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = cython_bbox_ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + + +def embedding_distance(tracks, detections, metric='euclidean'): + """ + Compute cost based on features between two list[STrack]. + """ + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray( + [track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray( + [track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, + metric)) # Nomalized features + return cost_matrix + + +def fuse_motion(kf, + cost_matrix, + tracks, + detections, + only_position=False, + lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, + track.covariance, + measurements, + only_position, + metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_ + ) * gating_distance + return cost_matrix diff --git a/deploy/pptracking/python/mot/motion/__init__.py b/deploy/pptracking/python/mot/motion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e42dd0b019d66d6ea07bec1ad90cf9a8d53d8172 --- /dev/null +++ b/deploy/pptracking/python/mot/motion/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import kalman_filter + +from .kalman_filter import * diff --git a/deploy/pptracking/python/mot/motion/kalman_filter.py b/deploy/pptracking/python/mot/motion/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..84c41ffd40960fc82a9611bc425857982f61ebaa --- /dev/null +++ b/deploy/pptracking/python/mot/motion/kalman_filter.py @@ -0,0 +1,267 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/kalman_filter.py +""" + +import numpy as np +import scipy.linalg + +__all__ = ['KalmanFilter'] +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" + +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919 +} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """ + Create track from unassociated measurement. + + Args: + measurement (ndarray): Bounding box coordinates (x, y, a, h) with + center position (x, y), aspect ratio a, and height h. + + Returns: + The mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are + initialized to 0 mean. + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], 1e-5, + 10 * self._std_weight_velocity * measurement[3] + ] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """ + Run Kalman filter prediction step. + + Args: + mean (ndarray): The 8 dimensional mean vector of the object state + at the previous time step. + covariance (ndarray): The 8x8 dimensional covariance matrix of the + object state at the previous time step. + + Returns: + The mean vector and covariance matrix of the predicted state. + Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[3], self._std_weight_position * + mean[3], 1e-2, self._std_weight_position * mean[3] + ] + std_vel = [ + self._std_weight_velocity * mean[3], self._std_weight_velocity * + mean[3], 1e-5, self._std_weight_velocity * mean[3] + ] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot( + (self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """ + Project state distribution to measurement space. + + Args + mean (ndarray): The state's mean vector (8 dimensional array). + covariance (ndarray): The state's covariance matrix (8x8 dimensional). + + Returns: + The projected mean and covariance matrix of the given state estimate. + """ + std = [ + self._std_weight_position * mean[3], self._std_weight_position * + mean[3], 1e-1, self._std_weight_position * mean[3] + ] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot((self._update_mat, covariance, + self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """ + Run Kalman filter prediction step (Vectorized version). + + Args: + mean (ndarray): The Nx8 dimensional mean matrix of the object states + at the previous time step. + covariance (ndarray): The Nx8x8 dimensional covariance matrics of the + object states at the previous time step. + + Returns: + The mean vector and covariance matrix of the predicted state. + Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], self._std_weight_position * + mean[:, 3], 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3] + ] + std_vel = [ + self._std_weight_velocity * mean[:, 3], self._std_weight_velocity * + mean[:, 3], 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3] + ] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """ + Run Kalman filter correction step. + + Args: + mean (ndarray): The predicted state's mean vector (8 dimensional). + covariance (ndarray): The state's covariance matrix (8x8 dimensional). + measurement (ndarray): The 4 dimensional measurement vector + (x, y, a, h), where (x, y) is the center position, a the aspect + ratio, and h the height of the bounding box. + + Returns: + The measurement-corrected state distribution. + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), + np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot( + (kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, + mean, + covariance, + measurements, + only_position=False, + metric='maha'): + """ + Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + + Args: + mean (ndarray): Mean vector over the state distribution (8 + dimensional). + covariance (ndarray): Covariance of the state distribution (8x8 + dimensional). + measurements (ndarray): An Nx4 dimensional matrix of N measurements, + each in format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position (Optional[bool]): If True, distance computation is + done with respect to the bounding box center position only. + metric (str): Metric type, 'gaussian' or 'maha'. + + Returns + An array of length N, where the i-th element contains the squared + Mahalanobis distance between (mean, covariance) and `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular( + cholesky_factor, + d.T, + lower=True, + check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') diff --git a/deploy/pptracking/python/mot/mtmct/__init__.py b/deploy/pptracking/python/mot/mtmct/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4d0c4f3b6616ab2419d43242a5b3cb33651a4a75 --- /dev/null +++ b/deploy/pptracking/python/mot/mtmct/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import utils +from . import postprocess +from .utils import * +from .postprocess import * + +# The following codes are strongly related to zone and camera parameters +from . import camera_utils +from . import zone +from .camera_utils import * +from .zone import * diff --git a/deploy/pptracking/python/mot/mtmct/camera_utils.py b/deploy/pptracking/python/mot/mtmct/camera_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6455785b26b4818f86a328e6a366c542abc60927 --- /dev/null +++ b/deploy/pptracking/python/mot/mtmct/camera_utils.py @@ -0,0 +1,279 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools +""" + +import numpy as np +from sklearn.cluster import AgglomerativeClustering +from .utils import get_dire, get_match, get_cid_tid, combin_feature, combin_cluster +from .utils import normalize, intracam_ignore, visual_rerank + +__all__ = [ + 'st_filter', + 'get_labels_with_camera', +] + +CAM_DIST = [[0, 40, 55, 100, 120, 145], [40, 0, 15, 60, 80, 105], + [55, 15, 0, 40, 65, 90], [100, 60, 40, 0, 20, 45], + [120, 80, 65, 20, 0, 25], [145, 105, 90, 45, 25, 0]] + + +def st_filter(st_mask, cid_tids, cid_tid_dict): + count = len(cid_tids) + for i in range(count): + i_tracklet = cid_tid_dict[cid_tids[i]] + i_cid = i_tracklet['cam'] + i_dire = get_dire(i_tracklet['zone_list'], i_cid) + i_iot = i_tracklet['io_time'] + for j in range(count): + j_tracklet = cid_tid_dict[cid_tids[j]] + j_cid = j_tracklet['cam'] + j_dire = get_dire(j_tracklet['zone_list'], j_cid) + j_iot = j_tracklet['io_time'] + + match_dire = True + cam_dist = CAM_DIST[i_cid - 41][j_cid - 41] + # if time overlopped + if i_iot[0] - cam_dist < j_iot[0] and j_iot[0] < i_iot[ + 1] + cam_dist: + match_dire = False + if i_iot[0] - cam_dist < j_iot[1] and j_iot[1] < i_iot[ + 1] + cam_dist: + match_dire = False + + # not match after go out + if i_dire[1] in [1, 2]: # i out + if i_iot[0] < j_iot[1] + cam_dist: + match_dire = False + + if i_dire[1] in [1, 2]: + if i_dire[0] in [3] and i_cid > j_cid: + match_dire = False + if i_dire[0] in [4] and i_cid < j_cid: + match_dire = False + + if i_cid in [41] and i_dire[1] in [4]: + if i_iot[0] < j_iot[1] + cam_dist: + match_dire = False + if i_iot[1] > 199: + match_dire = False + if i_cid in [46] and i_dire[1] in [3]: + if i_iot[0] < j_iot[1] + cam_dist: + match_dire = False + + # match after come into + if i_dire[0] in [1, 2]: + if i_iot[1] > j_iot[0] - cam_dist: + match_dire = False + + if i_dire[0] in [1, 2]: + if i_dire[1] in [3] and i_cid > j_cid: + match_dire = False + if i_dire[1] in [4] and i_cid < j_cid: + match_dire = False + + is_ignore = False + if ((i_dire[0] == i_dire[1] and i_dire[0] in [3, 4]) or + (j_dire[0] == j_dire[1] and j_dire[0] in [3, 4])): + is_ignore = True + + if not is_ignore: + # direction conflict + if (i_dire[0] in [3] and j_dire[0] in [4]) or ( + i_dire[1] in [3] and j_dire[1] in [4]): + match_dire = False + # filter before going next scene + if i_dire[1] in [3] and i_cid < j_cid: + if i_iot[1] > j_iot[1] - cam_dist: + match_dire = False + if i_dire[1] in [4] and i_cid > j_cid: + if i_iot[1] > j_iot[1] - cam_dist: + match_dire = False + + if i_dire[0] in [3] and i_cid < j_cid: + if i_iot[0] < j_iot[0] + cam_dist: + match_dire = False + if i_dire[0] in [4] and i_cid > j_cid: + if i_iot[0] < j_iot[0] + cam_dist: + match_dire = False + ## 3-30 + ## 4-1 + if i_dire[0] in [3] and i_cid > j_cid: + if i_iot[1] > j_iot[0] - cam_dist: + match_dire = False + if i_dire[0] in [4] and i_cid < j_cid: + if i_iot[1] > j_iot[0] - cam_dist: + match_dire = False + # filter before going next scene + ## 4-7 + if i_dire[1] in [3] and i_cid > j_cid: + if i_iot[0] < j_iot[1] + cam_dist: + match_dire = False + if i_dire[1] in [4] and i_cid < j_cid: + if i_iot[0] < j_iot[1] + cam_dist: + match_dire = False + else: + if i_iot[1] > 199: + if i_dire[0] in [3] and i_cid < j_cid: + if i_iot[0] < j_iot[0] + cam_dist: + match_dire = False + if i_dire[0] in [4] and i_cid > j_cid: + if i_iot[0] < j_iot[0] + cam_dist: + match_dire = False + if i_dire[0] in [3] and i_cid > j_cid: + match_dire = False + if i_dire[0] in [4] and i_cid < j_cid: + match_dire = False + if i_iot[0] < 1: + if i_dire[1] in [3] and i_cid > j_cid: + match_dire = False + if i_dire[1] in [4] and i_cid < j_cid: + match_dire = False + + if not match_dire: + st_mask[i, j] = 0.0 + st_mask[j, i] = 0.0 + return st_mask + + +def subcam_list(cid_tid_dict, cid_tids): + sub_3_4 = dict() + sub_4_3 = dict() + for cid_tid in cid_tids: + cid, tid = cid_tid + tracklet = cid_tid_dict[cid_tid] + zs, ze = get_dire(tracklet['zone_list'], cid) + if zs in [3] and cid not in [46]: # 4 to 3 + if not cid + 1 in sub_4_3: + sub_4_3[cid + 1] = [] + sub_4_3[cid + 1].append(cid_tid) + if ze in [4] and cid not in [41]: # 4 to 3 + if not cid in sub_4_3: + sub_4_3[cid] = [] + sub_4_3[cid].append(cid_tid) + if zs in [4] and cid not in [41]: # 3 to 4 + if not cid - 1 in sub_3_4: + sub_3_4[cid - 1] = [] + sub_3_4[cid - 1].append(cid_tid) + if ze in [3] and cid not in [46]: # 3 to 4 + if not cid in sub_3_4: + sub_3_4[cid] = [] + sub_3_4[cid].append(cid_tid) + sub_cid_tids = dict() + for i in sub_3_4: + sub_cid_tids[(i, i + 1)] = sub_3_4[i] + for i in sub_4_3: + sub_cid_tids[(i, i - 1)] = sub_4_3[i] + return sub_cid_tids + + +def subcam_list2(cid_tid_dict, cid_tids): + sub_dict = dict() + for cid_tid in cid_tids: + cid, tid = cid_tid + if cid not in [41]: + if not cid in sub_dict: + sub_dict[cid] = [] + sub_dict[cid].append(cid_tid) + if cid not in [46]: + if not cid + 1 in sub_dict: + sub_dict[cid + 1] = [] + sub_dict[cid + 1].append(cid_tid) + return sub_dict + + +def get_sim_matrix(cid_tid_dict, + cid_tids, + use_ff=True, + use_rerank=True, + use_st_filter=False): + # Note: carame releated get_sim_matrix function, + # which is different from the one in utils.py. + count = len(cid_tids) + + q_arr = np.array( + [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)]) + g_arr = np.array( + [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)]) + q_arr = normalize(q_arr, axis=1) + g_arr = normalize(g_arr, axis=1) + + st_mask = np.ones((count, count), dtype=np.float32) + st_mask = intracam_ignore(st_mask, cid_tids) + + # different from utils.py + if use_st_filter: + st_mask = st_filter(st_mask, cid_tids, cid_tid_dict) + + visual_sim_matrix = visual_rerank( + q_arr, g_arr, cid_tids, use_ff=use_ff, use_rerank=use_rerank) + visual_sim_matrix = visual_sim_matrix.astype('float32') + + np.set_printoptions(precision=3) + sim_matrix = visual_sim_matrix * st_mask + + np.fill_diagonal(sim_matrix, 0) + return sim_matrix + + +def get_labels_with_camera(cid_tid_dict, + cid_tids, + use_ff=True, + use_rerank=True, + use_st_filter=False): + # 1st cluster + sub_cid_tids = subcam_list(cid_tid_dict, cid_tids) + sub_labels = dict() + dis_thrs = [0.7, 0.5, 0.5, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5] + + for i, sub_c_to_c in enumerate(sub_cid_tids): + sim_matrix = get_sim_matrix( + cid_tid_dict, + sub_cid_tids[sub_c_to_c], + use_ff=use_ff, + use_rerank=use_rerank, + use_st_filter=use_st_filter) + cluster_labels = AgglomerativeClustering( + n_clusters=None, + distance_threshold=1 - dis_thrs[i], + affinity='precomputed', + linkage='complete').fit_predict(1 - sim_matrix) + labels = get_match(cluster_labels) + cluster_cid_tids = get_cid_tid(labels, sub_cid_tids[sub_c_to_c]) + sub_labels[sub_c_to_c] = cluster_cid_tids + labels, sub_cluster = combin_cluster(sub_labels, cid_tids) + + # 2nd cluster + cid_tid_dict_new = combin_feature(cid_tid_dict, sub_cluster) + sub_cid_tids = subcam_list2(cid_tid_dict_new, cid_tids) + sub_labels = dict() + for i, sub_c_to_c in enumerate(sub_cid_tids): + sim_matrix = get_sim_matrix( + cid_tid_dict_new, + sub_cid_tids[sub_c_to_c], + use_ff=use_ff, + use_rerank=use_rerank, + use_st_filter=use_st_filter) + cluster_labels = AgglomerativeClustering( + n_clusters=None, + distance_threshold=1 - 0.1, + affinity='precomputed', + linkage='complete').fit_predict(1 - sim_matrix) + labels = get_match(cluster_labels) + cluster_cid_tids = get_cid_tid(labels, sub_cid_tids[sub_c_to_c]) + sub_labels[sub_c_to_c] = cluster_cid_tids + labels, sub_cluster = combin_cluster(sub_labels, cid_tids) + + return labels diff --git a/deploy/pptracking/python/mot/mtmct/postprocess.py b/deploy/pptracking/python/mot/mtmct/postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..59e8d14f9465a54db7a165a9f515c438c267a0d6 --- /dev/null +++ b/deploy/pptracking/python/mot/mtmct/postprocess.py @@ -0,0 +1,376 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools +""" + +import re +import cv2 +from tqdm import tqdm +import pickle +import os +import os.path as osp +from os.path import join as opj +import numpy as np +import motmetrics as mm +from functools import reduce + +from .utils import parse_pt_gt, parse_pt, compare_dataframes_mtmc +from .utils import get_labels, getData, gen_new_mot +from .camera_utils import get_labels_with_camera +from .zone import Zone +from ..utils import plot_tracking + +__all__ = [ + 'trajectory_fusion', + 'sub_cluster', + 'gen_res', + 'print_mtmct_result', + 'get_mtmct_matching_results', + 'save_mtmct_crops', + 'save_mtmct_vis_results', +] + + +def trajectory_fusion(mot_feature, cid, cid_bias, use_zone=False, zone_path=''): + cur_bias = cid_bias[cid] + mot_list_break = {} + if use_zone: + zones = Zone(zone_path=zone_path) + zones.set_cam(cid) + mot_list = parse_pt(mot_feature, zones) + else: + mot_list = parse_pt(mot_feature) + + if use_zone: + mot_list = zones.break_mot(mot_list, cid) + mot_list = zones.filter_mot(mot_list, cid) # filter by zone + mot_list = zones.filter_bbox(mot_list, cid) # filter bbox + + mot_list_break = gen_new_mot(mot_list) # save break feature for gen result + + tid_data = dict() + for tid in mot_list: + tracklet = mot_list[tid] + if len(tracklet) <= 1: + continue + frame_list = list(tracklet.keys()) + frame_list.sort() + # filter area too large + zone_list = [tracklet[f]['zone'] for f in frame_list] + feature_list = [ + tracklet[f]['feat'] for f in frame_list + if (tracklet[f]['bbox'][3] - tracklet[f]['bbox'][1] + ) * (tracklet[f]['bbox'][2] - tracklet[f]['bbox'][0]) > 2000 + ] + if len(feature_list) < 2: + feature_list = [tracklet[f]['feat'] for f in frame_list] + io_time = [ + cur_bias + frame_list[0] / 10., cur_bias + frame_list[-1] / 10. + ] + all_feat = np.array([feat for feat in feature_list]) + mean_feat = np.mean(all_feat, axis=0) + tid_data[tid] = { + 'cam': cid, + 'tid': tid, + 'mean_feat': mean_feat, + 'zone_list': zone_list, + 'frame_list': frame_list, + 'tracklet': tracklet, + 'io_time': io_time + } + return tid_data, mot_list_break + + +def sub_cluster(cid_tid_dict, + scene_cluster, + use_ff=True, + use_rerank=True, + use_camera=False, + use_st_filter=False): + ''' + cid_tid_dict: all camera_id and track_id + scene_cluster: like [41, 42, 43, 44, 45, 46] in AIC21 MTMCT S06 test videos + ''' + assert (len(scene_cluster) != 0), "Error: scene_cluster length equals 0" + cid_tids = sorted( + [key for key in cid_tid_dict.keys() if key[0] in scene_cluster]) + if use_camera: + clu = get_labels_with_camera( + cid_tid_dict, + cid_tids, + use_ff=use_ff, + use_rerank=use_rerank, + use_st_filter=use_st_filter) + else: + clu = get_labels( + cid_tid_dict, + cid_tids, + use_ff=use_ff, + use_rerank=use_rerank, + use_st_filter=use_st_filter) + new_clu = list() + for c_list in clu: + if len(c_list) <= 1: continue + cam_list = [cid_tids[c][0] for c in c_list] + if len(cam_list) != len(set(cam_list)): continue + new_clu.append([cid_tids[c] for c in c_list]) + all_clu = new_clu + cid_tid_label = dict() + for i, c_list in enumerate(all_clu): + for c in c_list: + cid_tid_label[c] = i + 1 + return cid_tid_label + + +def gen_res(output_dir_filename, + scene_cluster, + map_tid, + mot_list_breaks, + use_roi=False, + roi_dir=''): + f_w = open(output_dir_filename, 'w') + for idx, mot_feature in enumerate(mot_list_breaks): + cid = scene_cluster[idx] + img_rects = parse_pt_gt(mot_feature) + if use_roi: + assert (roi_dir != ''), "Error: roi_dir is not empty!" + roi = cv2.imread(os.path.join(roi_dir, f'c{cid:03d}/roi.jpg'), 0) + height, width = roi.shape + + for fid in img_rects: + tid_rects = img_rects[fid] + fid = int(fid) + 1 + for tid_rect in tid_rects: + tid = tid_rect[0] + rect = tid_rect[1:] + cx = 0.5 * rect[0] + 0.5 * rect[2] + cy = 0.5 * rect[1] + 0.5 * rect[3] + w = rect[2] - rect[0] + w = min(w * 1.2, w + 40) + h = rect[3] - rect[1] + h = min(h * 1.2, h + 40) + rect[2] -= rect[0] + rect[3] -= rect[1] + rect[0] = max(0, rect[0]) + rect[1] = max(0, rect[1]) + x1, y1 = max(0, cx - 0.5 * w), max(0, cy - 0.5 * h) + if use_roi: + x2, y2 = min(width, cx + 0.5 * w), min(height, cy + 0.5 * h) + else: + x2, y2 = cx + 0.5 * w, cy + 0.5 * h + w, h = x2 - x1, y2 - y1 + new_rect = list(map(int, [x1, y1, w, h])) + rect = list(map(int, rect)) + if (cid, tid) in map_tid: + new_tid = map_tid[(cid, tid)] + f_w.write( + str(cid) + ' ' + str(new_tid) + ' ' + str(fid) + ' ' + + ' '.join(map(str, new_rect)) + ' -1 -1' + '\n') + print('gen_res: write file in {}'.format(output_dir_filename)) + f_w.close() + + +def print_mtmct_result(gt_file, pred_file): + names = [ + 'CameraId', 'Id', 'FrameId', 'X', 'Y', 'Width', 'Height', 'Xworld', + 'Yworld' + ] + gt = getData(gt_file, names=names) + pred = getData(pred_file, names=names) + summary = compare_dataframes_mtmc(gt, pred) + print('MTMCT summary: ', summary.columns.tolist()) + + formatters = { + 'idf1': '{:2.2f}'.format, + 'idp': '{:2.2f}'.format, + 'idr': '{:2.2f}'.format, + 'mota': '{:2.2f}'.format + } + summary = summary[['idf1', 'idp', 'idr', 'mota']] + summary.loc[:, 'idp'] *= 100 + summary.loc[:, 'idr'] *= 100 + summary.loc[:, 'idf1'] *= 100 + summary.loc[:, 'mota'] *= 100 + print( + mm.io.render_summary( + summary, + formatters=formatters, + namemap=mm.io.motchallenge_metric_names)) + + +def get_mtmct_matching_results(pred_mtmct_file, secs_interval=0.5, + video_fps=20): + res = np.loadtxt(pred_mtmct_file) # 'cid, tid, fid, x1, y1, w, h, -1, -1' + carame_ids = list(map(int, np.unique(res[:, 0]))) + + num_track_ids = int(np.max(res[:, 1])) + num_frames = int(np.max(res[:, 2])) + + res = res[:, :7] + # each line in res: 'cid, tid, fid, x1, y1, w, h' + + carame_tids = [] + carame_results = dict() + for c_id in carame_ids: + carame_results[c_id] = res[res[:, 0] == c_id] + tids = np.unique(carame_results[c_id][:, 1]) + tids = list(map(int, tids)) + carame_tids.append(tids) + + # select common tids throughout each video + common_tids = reduce(np.intersect1d, carame_tids) + if len(common_tids) == 0: + print( + 'No common tracked ids in these videos, please check your MOT result or select new videos.' + ) + return None + + # get mtmct matching results by cid_tid_fid_results[c_id][t_id][f_id] + cid_tid_fid_results = dict() + cid_tid_to_fids = dict() + interval = int(secs_interval * video_fps) # preferably less than 10 + for c_id in carame_ids: + cid_tid_fid_results[c_id] = dict() + cid_tid_to_fids[c_id] = dict() + for t_id in common_tids: + tid_mask = carame_results[c_id][:, 1] == t_id + cid_tid_fid_results[c_id][t_id] = dict() + + carame_trackid_results = carame_results[c_id][tid_mask] + fids = np.unique(carame_trackid_results[:, 2]) + fids = fids[fids % interval == 0] + fids = list(map(int, fids)) + cid_tid_to_fids[c_id][t_id] = fids + + for f_id in fids: + st_frame = f_id + ed_frame = f_id + interval + + st_mask = carame_trackid_results[:, 2] >= st_frame + ed_mask = carame_trackid_results[:, 2] < ed_frame + frame_mask = np.logical_and(st_mask, ed_mask) + cid_tid_fid_results[c_id][t_id][f_id] = carame_trackid_results[ + frame_mask] + + return carame_results, cid_tid_fid_results + + +def save_mtmct_crops(cid_tid_fid_res, + images_dir, + crops_dir, + width=300, + height=200): + carame_ids = cid_tid_fid_res.keys() + seqs_folder = os.listdir(images_dir) + seqs = [] + for x in seqs_folder: + if os.path.isdir(os.path.join(images_dir, x)): + seqs.append(x) + assert len(seqs) == len(carame_ids) + seqs.sort() + + if not os.path.exists(crops_dir): + os.makedirs(crops_dir) + + common_tids = list(cid_tid_fid_res[list(carame_ids)[0]].keys()) + + # get crops by name 'tid_cid_fid.jpg + for t_id in common_tids: + for i, c_id in enumerate(carame_ids): + infer_dir = os.path.join(images_dir, seqs[i]) + if os.path.exists(os.path.join(infer_dir, 'img1')): + infer_dir = os.path.join(infer_dir, 'img1') + all_images = os.listdir(infer_dir) + all_images.sort() + + for f_id in cid_tid_fid_res[c_id][t_id].keys(): + frame_idx = f_id - 1 if f_id > 0 else 0 + im_path = os.path.join(infer_dir, all_images[frame_idx]) + + im = cv2.imread(im_path) # (H, W, 3) + + track = cid_tid_fid_res[c_id][t_id][f_id][ + 0] # only select one track + cid, tid, fid, x1, y1, w, h = [int(v) for v in track] + clip = im[y1:(y1 + h), x1:(x1 + w)] + clip = cv2.resize(clip, (width, height)) + + cv2.imwrite( + os.path.join(crops_dir, + 'tid{:06d}_cid{:06d}_fid{:06d}.jpg'.format( + tid, cid, fid)), clip) + + print("Finish cropping image of tracked_id {} in camera: {}".format( + t_id, c_id)) + + +def save_mtmct_vis_results(carame_results, + images_dir, + save_dir, + save_videos=False): + # carame_results: 'cid, tid, fid, x1, y1, w, h' + carame_ids = carame_results.keys() + seqs_folder = os.listdir(images_dir) + seqs = [] + for x in seqs_folder: + if os.path.isdir(os.path.join(images_dir, x)): + seqs.append(x) + assert len(seqs) == len(carame_ids) + seqs.sort() + + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + for i, c_id in enumerate(carame_ids): + print("Start visualization for camera {} of sequence {}.".format( + c_id, seqs[i])) + cid_save_dir = os.path.join(save_dir, '{}'.format(seqs[i])) + if not os.path.exists(cid_save_dir): + os.makedirs(cid_save_dir) + + infer_dir = os.path.join(images_dir, seqs[i]) + if os.path.exists(os.path.join(infer_dir, 'img1')): + infer_dir = os.path.join(infer_dir, 'img1') + all_images = os.listdir(infer_dir) + all_images.sort() + + for f_id, im_path in enumerate(all_images): + img = cv2.imread(os.path.join(infer_dir, im_path)) + tracks = carame_results[c_id][carame_results[c_id][:, 2] == f_id] + if tracks.shape[0] > 0: + tracked_ids = tracks[:, 1] + xywhs = tracks[:, 3:] + online_im = plot_tracking( + img, xywhs, tracked_ids, scores=None, frame_id=f_id) + else: + online_im = img + print('Frame {} of seq {} has no tracking results'.format( + f_id, seqs[i])) + + cv2.imwrite( + os.path.join(cid_save_dir, '{:05d}.jpg'.format(f_id)), + online_im) + if f_id % 40 == 0: + print('Processing frame {}'.format(f_id)) + + if save_videos: + output_video_path = os.path.join(cid_save_dir, '..', + '{}_mtmct_vis.mp4'.format(seqs[i])) + cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format( + cid_save_dir, output_video_path) + os.system(cmd_str) + print('Save camera {} video in {}.'.format(seqs[i], + output_video_path)) diff --git a/deploy/pptracking/python/mot/mtmct/utils.py b/deploy/pptracking/python/mot/mtmct/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..f119794656e799d9990059d4264f161b5eef190b --- /dev/null +++ b/deploy/pptracking/python/mot/mtmct/utils.py @@ -0,0 +1,576 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools +""" + +import os +import re +import cv2 +import paddle +import numpy as np +from sklearn import preprocessing +from sklearn.cluster import AgglomerativeClustering +import gc +import motmetrics as mm +import pandas as pd +from tqdm import tqdm +import warnings +warnings.filterwarnings("ignore") + +__all__ = [ + 'parse_pt', 'parse_bias', 'get_dire', 'parse_pt_gt', + 'compare_dataframes_mtmc', 'get_sim_matrix', 'get_labels', 'getData', + 'gen_new_mot' +] + + +def parse_pt(mot_feature, zones=None): + mot_list = dict() + for line in mot_feature: + fid = int(re.sub('[a-z,A-Z]', "", mot_feature[line]['frame'])) + tid = mot_feature[line]['id'] + bbox = list(map(lambda x: int(float(x)), mot_feature[line]['bbox'])) + if tid not in mot_list: + mot_list[tid] = dict() + out_dict = mot_feature[line] + if zones is not None: + out_dict['zone'] = zones.get_zone(bbox) + else: + out_dict['zone'] = None + mot_list[tid][fid] = out_dict + return mot_list + + +def gen_new_mot(mot_list): + out_dict = dict() + for tracklet in mot_list: + tracklet = mot_list[tracklet] + for f in tracklet: + out_dict[tracklet[f]['imgname']] = tracklet[f] + return out_dict + + +def mergesetfeat1_notrk(P, neg_vector, in_feats, in_labels): + out_feats = [] + for i in range(in_feats.shape[0]): + camera_id = in_labels[i, 1] + feat = in_feats[i] - neg_vector[camera_id] + feat = P[camera_id].dot(feat) + feat = feat / np.linalg.norm(feat, ord=2) + out_feats.append(feat) + out_feats = np.vstack(out_feats) + return out_feats + + +def compute_P2(prb_feats, gal_feats, gal_labels, la=3.0): + X = gal_feats + neg_vector = {} + u_labels = np.unique(gal_labels[:, 1]) + P = {} + for label in u_labels: + curX = gal_feats[gal_labels[:, 1] == label, :] + neg_vector[label] = np.mean(curX, axis=0) + P[label] = np.linalg.inv( + curX.T.dot(curX) + curX.shape[0] * la * np.eye(X.shape[1])) + return P, neg_vector + + +def parse_bias(cameras_bias): + cid_bias = dict() + for cameras in cameras_bias.keys(): + cameras_id = re.sub('[a-z,A-Z]', "", cameras) + cameras_id = int(cameras_id) + bias = cameras_bias[cameras] + cid_bias[cameras_id] = float(bias) + return cid_bias + + +def get_dire(zone_list, cid): + zs, ze = zone_list[0], zone_list[-1] + return (zs, ze) + + +def intracam_ignore(st_mask, cid_tids): + count = len(cid_tids) + for i in range(count): + for j in range(count): + if cid_tids[i][0] == cid_tids[j][0]: + st_mask[i, j] = 0. + return st_mask + + +def mergesetfeat(in_feats, in_labels, in_tracks): + trackset = list(set(list(in_tracks))) + out_feats = [] + out_labels = [] + for track in trackset: + feat = np.mean(in_feats[in_tracks == track], axis=0) + feat = feat / np.linalg.norm(feat, ord=2) + label = in_labels[in_tracks == track][0] + out_feats.append(feat) + out_labels.append(label) + out_feats = np.vstack(out_feats) + out_labels = np.vstack(out_labels) + return out_feats, out_labels + + +def mergesetfeat3(X, labels, gX, glabels, beta=0.08, knn=20, lr=0.5): + for i in range(0, X.shape[0]): + if i % 1000 == 0: + print('feat3:%d/%d' % (i, X.shape[0])) + knnX = gX[glabels[:, 1] != labels[i, 1], :] + sim = knnX.dot(X[i, :]) + knnX = knnX[sim > 0, :] + sim = sim[sim > 0] + if len(sim) > 0: + idx = np.argsort(-sim) + if len(sim) > 2 * knn: + sim = sim[idx[:2 * knn]] + knnX = knnX[idx[:2 * knn], :] + else: + sim = sim[idx] + knnX = knnX[idx, :] + knn = min(knn, len(sim)) + knn_pos_weight = np.exp((sim[:knn] - 1) / beta) + knn_neg_weight = np.ones(len(sim) - knn) + knn_pos_prob = knn_pos_weight / np.sum(knn_pos_weight) + knn_neg_prob = knn_neg_weight / np.sum(knn_neg_weight) + X[i, :] += lr * (knn_pos_prob.dot(knnX[:knn, :]) - + knn_neg_prob.dot(knnX[knn:, :])) + X[i, :] /= np.linalg.norm(X[i, :]) + return X + + +def run_fic(prb_feats, gal_feats, prb_labels, gal_labels, la=3.0): + P, neg_vector = compute_P2(prb_feats, gal_feats, gal_labels, la) + prb_feats_new = mergesetfeat1_notrk(P, neg_vector, prb_feats, prb_labels) + gal_feats_new = mergesetfeat1_notrk(P, neg_vector, gal_feats, gal_labels) + return prb_feats_new, gal_feats_new + + +def run_fac(prb_feats, + gal_feats, + prb_labels, + gal_labels, + beta=0.08, + knn=20, + lr=0.5, + prb_epoch=2, + gal_epoch=3): + gal_feats_new = gal_feats.copy() + for i in range(prb_epoch): + gal_feats_new = mergesetfeat3(gal_feats_new, gal_labels, gal_feats, + gal_labels, beta, knn, lr) + prb_feats_new = prb_feats.copy() + for i in range(gal_epoch): + prb_feats_new = mergesetfeat3(prb_feats_new, prb_labels, gal_feats_new, + gal_labels, beta, knn, lr) + return prb_feats_new, gal_feats_new + + +def euclidean_distance(qf, gf): + m = qf.shape[0] + n = gf.shape[0] + dist_mat = 2 - 2 * paddle.matmul(qf, gf.t()) + return dist_mat + + +def batch_paddle_topk(qf, gf, k1, N=6000): + m = qf.shape[0] + n = gf.shape[0] + dist_mat = [] + initial_rank = [] + for j in range(n // N + 1): + temp_gf = gf[j * N:j * N + N] + temp_qd = [] + for i in range(m // N + 1): + temp_qf = qf[i * N:i * N + N] + temp_d = euclidean_distance(temp_qf, temp_gf) + temp_qd.append(temp_d) + temp_qd = paddle.concat(temp_qd, axis=0) + temp_qd = temp_qd / (paddle.max(temp_qd, axis=0)[0]) + temp_qd = temp_qd.t() + initial_rank.append( + paddle.topk( + temp_qd, k=k1, axis=1, largest=False, sorted=True)[1]) + del temp_qd + del temp_gf + del temp_qf + del temp_d + initial_rank = paddle.concat(initial_rank, axis=0).cpu().numpy() + return initial_rank + + +def batch_euclidean_distance(qf, gf, N=6000): + m = qf.shape[0] + n = gf.shape[0] + dist_mat = [] + for j in range(n // N + 1): + temp_gf = gf[j * N:j * N + N] + temp_qd = [] + for i in range(m // N + 1): + temp_qf = qf[i * N:i * N + N] + temp_d = euclidean_distance(temp_qf, temp_gf) + temp_qd.append(temp_d) + temp_qd = paddle.concat(temp_qd, axis=0) + temp_qd = temp_qd / (paddle.max(temp_qd, axis=0)[0]) + dist_mat.append(temp_qd.t()) # transpose + del temp_qd + del temp_gf + del temp_qf + del temp_d + dist_mat = paddle.concat(dist_mat, axis=0) + return dist_mat + + +def batch_v(feat, R, all_num): + V = np.zeros((all_num, all_num), dtype=np.float32) + m = feat.shape[0] + for i in tqdm(range(m)): + temp_gf = feat[i].unsqueeze(0) + temp_qd = euclidean_distance(temp_gf, feat) + temp_qd = temp_qd / (paddle.max(temp_qd)) + temp_qd = temp_qd.squeeze() + temp_qd = temp_qd.numpy()[R[i].tolist()] + temp_qd = paddle.to_tensor(temp_qd) + weight = paddle.exp(-temp_qd) + weight = (weight / paddle.sum(weight)).numpy() + V[i, R[i]] = weight.astype(np.float32) + return V + + +def k_reciprocal_neigh(initial_rank, i, k1): + forward_k_neigh_index = initial_rank[i, :k1 + 1] + backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1] + fi = np.where(backward_k_neigh_index == i)[0] + return forward_k_neigh_index[fi] + + +def ReRank2(probFea, galFea, k1=20, k2=6, lambda_value=0.3): + # The following naming, e.g. gallery_num, is different from outer scope. + # Don't care about it. + query_num = probFea.shape[0] + all_num = query_num + galFea.shape[0] + feat = paddle.concat([probFea, galFea], axis=0) + initial_rank = batch_paddle_topk(feat, feat, k1 + 1, N=6000) + # del feat + del probFea + del galFea + gc.collect() # empty memory + R = [] + for i in tqdm(range(all_num)): + # k-reciprocal neighbors + k_reciprocal_index = k_reciprocal_neigh(initial_rank, i, k1) + k_reciprocal_expansion_index = k_reciprocal_index + for j in range(len(k_reciprocal_index)): + candidate = k_reciprocal_index[j] + candidate_k_reciprocal_index = k_reciprocal_neigh( + initial_rank, candidate, int(np.around(k1 / 2))) + if len( + np.intersect1d(candidate_k_reciprocal_index, + k_reciprocal_index)) > 2. / 3 * len( + candidate_k_reciprocal_index): + k_reciprocal_expansion_index = np.append( + k_reciprocal_expansion_index, candidate_k_reciprocal_index) + k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index) + R.append(k_reciprocal_expansion_index) + + gc.collect() # empty memory + V = batch_v(feat, R, all_num) + del R + gc.collect() # empty memory + initial_rank = initial_rank[:, :k2] + ### Faster version + if k2 != 1: + V_qe = np.zeros_like(V, dtype=np.float16) + for i in range(all_num): + V_qe[i, :] = np.mean(V[initial_rank[i], :], axis=0) + V = V_qe + del V_qe + del initial_rank + gc.collect() # empty memory + invIndex = [] + for i in range(all_num): + invIndex.append(np.where(V[:, i] != 0)[0]) + jaccard_dist = np.zeros((query_num, all_num), dtype=np.float32) + for i in tqdm(range(query_num)): + temp_min = np.zeros(shape=[1, all_num], dtype=np.float32) + indNonZero = np.where(V[i, :] != 0)[0] + indImages = [invIndex[ind] for ind in indNonZero] + for j in range(len(indNonZero)): + temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum( + V[i, indNonZero[j]], V[indImages[j], indNonZero[j]]) + jaccard_dist[i] = 1 - temp_min / (2. - temp_min) + del V + gc.collect() # empty memory + original_dist = batch_euclidean_distance(feat, feat[:query_num, :]).numpy() + final_dist = jaccard_dist * (1 - lambda_value + ) + original_dist * lambda_value + del original_dist + del jaccard_dist + final_dist = final_dist[:query_num, query_num:] + return final_dist + + +def visual_rerank(prb_feats, + gal_feats, + cid_tids, + use_ff=False, + use_rerank=False): + """Rerank by visual cures.""" + gal_labels = np.array([[0, item[0]] for item in cid_tids]) + prb_labels = gal_labels.copy() + if use_ff: + print('current use ff finetuned parameters....') + # Step1-1: fic. finetuned parameters: [la] + prb_feats, gal_feats = run_fic(prb_feats, gal_feats, prb_labels, + gal_labels, 3.0) + # Step1=2: fac. finetuned parameters: [beta,knn,lr,prb_epoch,gal_epoch] + prb_feats, gal_feats = run_fac(prb_feats, gal_feats, prb_labels, + gal_labels, 0.08, 20, 0.5, 1, 1) + if use_rerank: + paddle.enable_static() + print('current use rerank finetuned parameters....') + # Step2: k-reciprocal. finetuned parameters: [k1,k2,lambda_value] + sims = ReRank2( + paddle.to_tensor(prb_feats), + paddle.to_tensor(gal_feats), 20, 3, 0.3) + else: + # sims = ComputeEuclid(prb_feats, gal_feats, 1) + sims = 1.0 - np.dot(prb_feats, gal_feats.T) + + # NOTE: sims here is actually dist, the smaller the more similar + return 1.0 - sims + + +# sub_cluster +def normalize(nparray, axis=0): + nparray = preprocessing.normalize(nparray, norm='l2', axis=axis) + return nparray + + +def get_match(cluster_labels): + cluster_dict = dict() + cluster = list() + for i, l in enumerate(cluster_labels): + if l in list(cluster_dict.keys()): + cluster_dict[l].append(i) + else: + cluster_dict[l] = [i] + for idx in cluster_dict: + cluster.append(cluster_dict[idx]) + return cluster + + +def get_cid_tid(cluster_labels, cid_tids): + cluster = list() + for labels in cluster_labels: + cid_tid_list = list() + for label in labels: + cid_tid_list.append(cid_tids[label]) + cluster.append(cid_tid_list) + return cluster + + +def combin_feature(cid_tid_dict, sub_cluster): + for sub_ct in sub_cluster: + if len(sub_ct) < 2: continue + mean_feat = np.array([cid_tid_dict[i]['mean_feat'] for i in sub_ct]) + for i in sub_ct: + cid_tid_dict[i]['mean_feat'] = mean_feat.mean(axis=0) + return cid_tid_dict + + +def combin_cluster(sub_labels, cid_tids): + cluster = list() + for sub_c_to_c in sub_labels: + if len(cluster) < 1: + cluster = sub_labels[sub_c_to_c] + continue + for c_ts in sub_labels[sub_c_to_c]: + is_add = False + for i_c, c_set in enumerate(cluster): + if len(set(c_ts) & set(c_set)) > 0: + new_list = list(set(c_ts) | set(c_set)) + cluster[i_c] = new_list + is_add = True + break + if not is_add: + cluster.append(c_ts) + labels = list() + num_tr = 0 + for c_ts in cluster: + label_list = list() + for c_t in c_ts: + label_list.append(cid_tids.index(c_t)) + num_tr += 1 + label_list.sort() + labels.append(label_list) + return labels, cluster + + +def parse_pt_gt(mot_feature): + img_rects = dict() + for line in mot_feature: + fid = int(re.sub('[a-z,A-Z]', "", mot_feature[line]['frame'])) + tid = mot_feature[line]['id'] + rect = list(map(lambda x: int(float(x)), mot_feature[line]['bbox'])) + if fid not in img_rects: + img_rects[fid] = list() + rect.insert(0, tid) + img_rects[fid].append(rect) + return img_rects + + +# eval result +def compare_dataframes_mtmc(gts, ts): + """Compute ID-based evaluation metrics for MTMCT + Return: + df (pandas.DataFrame): Results of the evaluations in a df with only the 'idf1', 'idp', and 'idr' columns. + """ + gtds = [] + tsds = [] + gtcams = gts['CameraId'].drop_duplicates().tolist() + tscams = ts['CameraId'].drop_duplicates().tolist() + maxFrameId = 0 + + for k in sorted(gtcams): + gtd = gts.query('CameraId == %d' % k) + gtd = gtd[['FrameId', 'Id', 'X', 'Y', 'Width', 'Height']] + # max FrameId in gtd only + mfid = gtd['FrameId'].max() + gtd['FrameId'] += maxFrameId + gtd = gtd.set_index(['FrameId', 'Id']) + gtds.append(gtd) + + if k in tscams: + tsd = ts.query('CameraId == %d' % k) + tsd = tsd[['FrameId', 'Id', 'X', 'Y', 'Width', 'Height']] + # max FrameId among both gtd and tsd + mfid = max(mfid, tsd['FrameId'].max()) + tsd['FrameId'] += maxFrameId + tsd = tsd.set_index(['FrameId', 'Id']) + tsds.append(tsd) + + maxFrameId += mfid + + # compute multi-camera tracking evaluation stats + multiCamAcc = mm.utils.compare_to_groundtruth( + pd.concat(gtds), pd.concat(tsds), 'iou') + metrics = list(mm.metrics.motchallenge_metrics) + metrics.extend(['num_frames', 'idfp', 'idfn', 'idtp']) + mh = mm.metrics.create() + summary = mh.compute(multiCamAcc, metrics=metrics, name='MultiCam') + return summary + + +def get_sim_matrix(cid_tid_dict, + cid_tids, + use_ff=True, + use_rerank=True, + use_st_filter=False): + # Note: carame independent get_sim_matrix function, + # which is different from the one in camera_utils.py. + count = len(cid_tids) + + q_arr = np.array( + [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)]) + g_arr = np.array( + [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)]) + q_arr = normalize(q_arr, axis=1) + g_arr = normalize(g_arr, axis=1) + + st_mask = np.ones((count, count), dtype=np.float32) + st_mask = intracam_ignore(st_mask, cid_tids) + + visual_sim_matrix = visual_rerank( + q_arr, g_arr, cid_tids, use_ff=use_ff, use_rerank=use_rerank) + visual_sim_matrix = visual_sim_matrix.astype('float32') + + np.set_printoptions(precision=3) + sim_matrix = visual_sim_matrix * st_mask + + np.fill_diagonal(sim_matrix, 0) + return sim_matrix + + +def get_labels(cid_tid_dict, + cid_tids, + use_ff=True, + use_rerank=True, + use_st_filter=False): + # 1st cluster + sub_cid_tids = list(cid_tid_dict.keys()) + sub_labels = dict() + dis_thrs = [0.7, 0.5, 0.5, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5] + + sim_matrix = get_sim_matrix( + cid_tid_dict, + cid_tids, + use_ff=use_ff, + use_rerank=use_rerank, + use_st_filter=use_st_filter) + cluster_labels = AgglomerativeClustering( + n_clusters=None, + distance_threshold=0.5, + affinity='precomputed', + linkage='complete').fit_predict(1 - sim_matrix) + labels = get_match(cluster_labels) + sub_cluster = get_cid_tid(labels, cid_tids) + + # 2nd cluster + cid_tid_dict_new = combin_feature(cid_tid_dict, sub_cluster) + sub_labels = dict() + sim_matrix = get_sim_matrix( + cid_tid_dict_new, + cid_tids, + use_ff=use_ff, + use_rerank=use_rerank, + use_st_filter=use_st_filter) + cluster_labels = AgglomerativeClustering( + n_clusters=None, + distance_threshold=0.9, + affinity='precomputed', + linkage='complete').fit_predict(1 - sim_matrix) + labels = get_match(cluster_labels) + sub_cluster = get_cid_tid(labels, cid_tids) + + return labels + + +def getData(fpath, names=None, sep='\s+|\t+|,'): + """ Get the necessary track data from a file handle. + Args: + fpath (str) : Original path of file reading from. + names (list[str]): List of column names for the data. + sep (str): Allowed separators regular expression string. + Return: + df (pandas.DataFrame): Data frame containing the data loaded from the + stream with optionally assigned column names. No index is set on the data. + """ + try: + df = pd.read_csv( + fpath, + sep=sep, + index_col=None, + skipinitialspace=True, + header=None, + names=names, + engine='python') + return df + + except Exception as e: + raise ValueError("Could not read input from %s. Error: %s" % + (fpath, repr(e))) diff --git a/deploy/pptracking/python/mot/mtmct/zone.py b/deploy/pptracking/python/mot/mtmct/zone.py new file mode 100644 index 0000000000000000000000000000000000000000..7a8331615b6cd485f61c39be93024113e6eda245 --- /dev/null +++ b/deploy/pptracking/python/mot/mtmct/zone.py @@ -0,0 +1,403 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools +""" + +import os +import cv2 +import numpy as np +from sklearn.cluster import AgglomerativeClustering + +BBOX_B = 10 / 15 + + +class Zone(object): + def __init__(self, zone_path='datasets/zone'): + # 0: b 1: g 3: r 123:w + # w r not high speed + # b g high speed + assert zone_path != '', "Error: zone_path is not empty!" + zones = {} + for img_name in os.listdir(zone_path): + camnum = int(img_name.split('.')[0][-3:]) + zone_img = cv2.imread(os.path.join(zone_path, img_name)) + zones[camnum] = zone_img + self.zones = zones + self.current_cam = 0 + + def set_cam(self, cam): + self.current_cam = cam + + def get_zone(self, bbox): + cx = int((bbox[0] + bbox[2]) / 2) + cy = int((bbox[1] + bbox[3]) / 2) + pix = self.zones[self.current_cam][max(cy - 1, 0), max(cx - 1, 0), :] + zone_num = 0 + if pix[0] > 50 and pix[1] > 50 and pix[2] > 50: # w + zone_num = 1 + if pix[0] < 50 and pix[1] < 50 and pix[2] > 50: # r + zone_num = 2 + if pix[0] < 50 and pix[1] > 50 and pix[2] < 50: # g + zone_num = 3 + if pix[0] > 50 and pix[1] < 50 and pix[2] < 50: # b + zone_num = 4 + return zone_num + + def is_ignore(self, zone_list, frame_list, cid): + # 0 not in any corssroad, 1 white 2 red 3 green 4 bule + zs, ze = zone_list[0], zone_list[-1] + fs, fe = frame_list[0], frame_list[-1] + if zs == ze: + # if always on one section, excluding + if ze in [1, 2]: + return 2 + if zs != 0 and 0 in zone_list: + return 0 + if fe - fs > 1500: + return 2 + if fs < 2: + if cid in [45]: + if ze in [3, 4]: + return 1 + else: + return 2 + if fe > 1999: + if cid in [41]: + if ze not in [3]: + return 2 + else: + return 0 + if fs < 2 or fe > 1999: + if ze in [3, 4]: + return 0 + if ze in [3, 4]: + return 1 + return 2 + else: + # if camera section change + if cid in [41, 42, 43, 44, 45, 46]: + # come from road extension, exclusing + if zs == 1 and ze == 2: + return 2 + if zs == 2 and ze == 1: + return 2 + if cid in [41]: + # On 41 camera, no vehicle come into 42 camera + if (zs in [1, 2]) and ze == 4: + return 2 + if zs == 4 and (ze in [1, 2]): + return 2 + if cid in [46]: + # On 46 camera,no vehicle come into 45 + if (zs in [1, 2]) and ze == 3: + return 2 + if zs == 3 and (ze in [1, 2]): + return 2 + return 0 + + def filter_mot(self, mot_list, cid): + new_mot_list = dict() + sub_mot_list = dict() + for tracklet in mot_list: + tracklet_dict = mot_list[tracklet] + frame_list = list(tracklet_dict.keys()) + frame_list.sort() + zone_list = [] + for f in frame_list: + zone_list.append(tracklet_dict[f]['zone']) + if self.is_ignore(zone_list, frame_list, cid) == 0: + new_mot_list[tracklet] = tracklet_dict + if self.is_ignore(zone_list, frame_list, cid) == 1: + sub_mot_list[tracklet] = tracklet_dict + return new_mot_list + + def filter_bbox(self, mot_list, cid): + new_mot_list = dict() + yh = self.zones[cid].shape[0] + for tracklet in mot_list: + tracklet_dict = mot_list[tracklet] + frame_list = list(tracklet_dict.keys()) + frame_list.sort() + bbox_list = [] + for f in frame_list: + bbox_list.append(tracklet_dict[f]['bbox']) + bbox_x = [b[0] for b in bbox_list] + bbox_y = [b[1] for b in bbox_list] + bbox_w = [b[2] - b[0] for b in bbox_list] + bbox_h = [b[3] - b[1] for b in bbox_list] + new_frame_list = list() + if 0 in bbox_x or 0 in bbox_y: + b0 = [ + i for i, f in enumerate(frame_list) + if bbox_x[i] < 5 or bbox_y[i] + bbox_h[i] > yh - 5 + ] + if len(b0) == len(frame_list): + if cid in [41, 42, 44, 45, 46]: + continue + max_w = max(bbox_w) + max_h = max(bbox_h) + for i, f in enumerate(frame_list): + if bbox_w[i] > max_w * BBOX_B and bbox_h[ + i] > max_h * BBOX_B: + new_frame_list.append(f) + else: + l_i, r_i = 0, len(frame_list) - 1 + if len(b0) == 0: + continue + if b0[0] == 0: + for i in range(len(b0) - 1): + if b0[i] + 1 == b0[i + 1]: + l_i = b0[i + 1] + else: + break + if b0[-1] == len(frame_list) - 1: + for i in range(len(b0) - 1): + i = len(b0) - 1 - i + if b0[i] - 1 == b0[i - 1]: + r_i = b0[i - 1] + else: + break + + max_lw, max_lh = bbox_w[l_i], bbox_h[l_i] + max_rw, max_rh = bbox_w[r_i], bbox_h[r_i] + for i, f in enumerate(frame_list): + if i < l_i: + if bbox_w[i] > max_lw * BBOX_B and bbox_h[ + i] > max_lh * BBOX_B: + new_frame_list.append(f) + elif i > r_i: + if bbox_w[i] > max_rw * BBOX_B and bbox_h[ + i] > max_rh * BBOX_B: + new_frame_list.append(f) + else: + new_frame_list.append(f) + new_tracklet_dict = dict() + for f in new_frame_list: + new_tracklet_dict[f] = tracklet_dict[f] + new_mot_list[tracklet] = new_tracklet_dict + else: + new_mot_list[tracklet] = tracklet_dict + return new_mot_list + + def break_mot(self, mot_list, cid): + new_mot_list = dict() + new_num_tracklets = max(mot_list) + 1 + for tracklet in mot_list: + tracklet_dict = mot_list[tracklet] + frame_list = list(tracklet_dict.keys()) + frame_list.sort() + zone_list = [] + back_tracklet = False + new_zone_f = 0 + pre_frame = frame_list[0] + time_break = False + for f in frame_list: + if f - pre_frame > 100: + if cid in [44, 45]: + time_break = True + break + if not cid in [41, 44, 45, 46]: + break + pre_frame = f + new_zone = tracklet_dict[f]['zone'] + if len(zone_list) > 0 and zone_list[-1] == new_zone: + continue + if new_zone_f > 1: + if len(zone_list) > 1 and new_zone in zone_list: + back_tracklet = True + zone_list.append(new_zone) + new_zone_f = 0 + else: + new_zone_f += 1 + if back_tracklet: + new_tracklet_dict = dict() + pre_bbox = -1 + pre_arrow = 0 + have_break = False + for f in frame_list: + now_bbox = tracklet_dict[f]['bbox'] + if type(pre_bbox) == int: + if pre_bbox == -1: + pre_bbox = now_bbox + now_arrow = now_bbox[0] - pre_bbox[0] + if pre_arrow * now_arrow < 0 and len( + new_tracklet_dict) > 15 and not have_break: + new_mot_list[tracklet] = new_tracklet_dict + new_tracklet_dict = dict() + have_break = True + if have_break: + tracklet_dict[f]['id'] = new_num_tracklets + new_tracklet_dict[f] = tracklet_dict[f] + pre_bbox, pre_arrow = now_bbox, now_arrow + + if have_break: + new_mot_list[new_num_tracklets] = new_tracklet_dict + new_num_tracklets += 1 + else: + new_mot_list[tracklet] = new_tracklet_dict + elif time_break: + new_tracklet_dict = dict() + have_break = False + pre_frame = frame_list[0] + for f in frame_list: + if f - pre_frame > 100: + new_mot_list[tracklet] = new_tracklet_dict + new_tracklet_dict = dict() + have_break = True + new_tracklet_dict[f] = tracklet_dict[f] + pre_frame = f + if have_break: + new_mot_list[new_num_tracklets] = new_tracklet_dict + new_num_tracklets += 1 + else: + new_mot_list[tracklet] = new_tracklet_dict + else: + new_mot_list[tracklet] = tracklet_dict + return new_mot_list + + def intra_matching(self, mot_list, sub_mot_list): + sub_zone_dict = dict() + new_mot_list = dict() + new_mot_list, new_sub_mot_list = self.do_intra_matching2(mot_list, + sub_mot_list) + return new_mot_list + + def do_intra_matching2(self, mot_list, sub_list): + new_zone_dict = dict() + + def get_trac_info(tracklet1): + t1_f = list(tracklet1) + t1_f.sort() + t1_fs = t1_f[0] + t1_fe = t1_f[-1] + t1_zs = tracklet1[t1_fs]['zone'] + t1_ze = tracklet1[t1_fe]['zone'] + t1_boxs = tracklet1[t1_fs]['bbox'] + t1_boxe = tracklet1[t1_fe]['bbox'] + t1_boxs = [(t1_boxs[2] + t1_boxs[0]) / 2, + (t1_boxs[3] + t1_boxs[1]) / 2] + t1_boxe = [(t1_boxe[2] + t1_boxe[0]) / 2, + (t1_boxe[3] + t1_boxe[1]) / 2] + return t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe + + for t1id in sub_list: + tracklet1 = sub_list[t1id] + if tracklet1 == -1: + continue + t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe = get_trac_info( + tracklet1) + sim_dict = dict() + for t2id in mot_list: + tracklet2 = mot_list[t2id] + t2_fs, t2_fe, t2_zs, t2_ze, t2_boxs, t2_boxe = get_trac_info( + tracklet2) + if t1_ze == t2_zs: + if abs(t2_fs - t1_fe) < 5 and abs(t2_boxe[0] - t1_boxs[ + 0]) < 50 and abs(t2_boxe[1] - t1_boxs[1]) < 50: + t1_feat = tracklet1[t1_fe]['feat'] + t2_feat = tracklet2[t2_fs]['feat'] + sim_dict[t2id] = np.matmul(t1_feat, t2_feat) + if t1_zs == t2_ze: + if abs(t2_fe - t1_fs) < 5 and abs(t2_boxs[0] - t1_boxe[ + 0]) < 50 and abs(t2_boxs[1] - t1_boxe[1]) < 50: + t1_feat = tracklet1[t1_fs]['feat'] + t2_feat = tracklet2[t2_fe]['feat'] + sim_dict[t2id] = np.matmul(t1_feat, t2_feat) + if len(sim_dict) > 0: + max_sim = 0 + max_id = 0 + for t2id in sim_dict: + if sim_dict[t2id] > max_sim: + sim_dict[t2id] = max_sim + max_id = t2id + if max_sim > 0.5: + t2 = mot_list[max_id] + for t1f in tracklet1: + if t1f not in t2: + tracklet1[t1f]['id'] = max_id + t2[t1f] = tracklet1[t1f] + mot_list[max_id] = t2 + sub_list[t1id] = -1 + return mot_list, sub_list + + def do_intra_matching(self, sub_zone_dict, sub_zone): + new_zone_dict = dict() + id_list = list(sub_zone_dict) + id2index = dict() + for index, id in enumerate(id_list): + id2index[id] = index + + def get_trac_info(tracklet1): + t1_f = list(tracklet1) + t1_f.sort() + t1_fs = t1_f[0] + t1_fe = t1_f[-1] + t1_zs = tracklet1[t1_fs]['zone'] + t1_ze = tracklet1[t1_fe]['zone'] + t1_boxs = tracklet1[t1_fs]['bbox'] + t1_boxe = tracklet1[t1_fe]['bbox'] + t1_boxs = [(t1_boxs[2] + t1_boxs[0]) / 2, + (t1_boxs[3] + t1_boxs[1]) / 2] + t1_boxe = [(t1_boxe[2] + t1_boxe[0]) / 2, + (t1_boxe[3] + t1_boxe[1]) / 2] + return t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe + + sim_matrix = np.zeros([len(id_list), len(id_list)]) + + for t1id in sub_zone_dict: + tracklet1 = sub_zone_dict[t1id] + t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe = get_trac_info( + tracklet1) + t1_feat = tracklet1[t1_fe]['feat'] + for t2id in sub_zone_dict: + if t1id == t2id: + continue + tracklet2 = sub_zone_dict[t2id] + t2_fs, t2_fe, t2_zs, t2_ze, t2_boxs, t2_boxe = get_trac_info( + tracklet2) + if t1_zs != t1_ze and t2_ze != t2_zs or t1_fe > t2_fs: + continue + if abs(t1_boxe[0] - t2_boxs[0]) > 50 or abs(t1_boxe[1] - + t2_boxs[1]) > 50: + continue + if t2_fs - t1_fe > 5: + continue + t2_feat = tracklet2[t2_fs]['feat'] + sim_matrix[id2index[t1id], id2index[t2id]] = np.matmul(t1_feat, + t2_feat) + sim_matrix[id2index[t2id], id2index[t1id]] = np.matmul(t1_feat, + t2_feat) + sim_matrix = 1 - sim_matrix + cluster_labels = AgglomerativeClustering( + n_clusters=None, + distance_threshold=0.7, + affinity='precomputed', + linkage='complete').fit_predict(sim_matrix) + new_zone_dict = dict() + label2id = dict() + for index, label in enumerate(cluster_labels): + tracklet = sub_zone_dict[id_list[index]] + if label not in label2id: + new_id = tracklet[list(tracklet)[0]] + new_tracklet = dict() + else: + new_id = label2id[label] + new_tracklet = new_zone_dict[label2id[label]] + for tf in tracklet: + tracklet[tf]['id'] = new_id + new_tracklet[tf] = tracklet[tf] + new_zone_dict[label] = new_tracklet + + return new_zone_dict diff --git a/deploy/pptracking/python/mot/tracker/__init__.py b/deploy/pptracking/python/mot/tracker/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b74593b4126d878cd655326e58369f5b6f76a2ae --- /dev/null +++ b/deploy/pptracking/python/mot/tracker/__init__.py @@ -0,0 +1,23 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import base_jde_tracker +from . import base_sde_tracker +from . import jde_tracker +from . import deepsort_tracker + +from .base_jde_tracker import * +from .base_sde_tracker import * +from .jde_tracker import * +from .deepsort_tracker import * diff --git a/deploy/pptracking/python/mot/tracker/base_jde_tracker.py b/deploy/pptracking/python/mot/tracker/base_jde_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..4e1ddb6cd948d30621eddc516a2b654170ad2346 --- /dev/null +++ b/deploy/pptracking/python/mot/tracker/base_jde_tracker.py @@ -0,0 +1,290 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py +""" + +import numpy as np +from collections import defaultdict +from collections import deque, OrderedDict +from ..matching import jde_matching as matching + +__all__ = [ + 'TrackState', + 'BaseTrack', + 'STrack', + 'joint_stracks', + 'sub_stracks', + 'remove_duplicate_stracks', +] + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count_dict = defaultdict(int) # support single class and multi classes + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(cls_id): + BaseTrack._count_dict[cls_id] += 1 + return BaseTrack._count_dict[cls_id] + + # @even: reset track id + @staticmethod + def init_count(num_classes): + """ + Initiate _count for all object classes + :param num_classes: + """ + for cls_id in range(num_classes): + BaseTrack._count_dict[cls_id] = 0 + + @staticmethod + def reset_track_count(cls_id): + BaseTrack._count_dict[cls_id] = 0 + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed + + +class STrack(BaseTrack): + def __init__(self, + tlwh, + score, + temp_feat, + num_classes, + cls_id, + buff_size=30): + # object class id + self.cls_id = cls_id + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.track_len = 0 + + self.smooth_feat = None + self.update_features(temp_feat) + self.features = deque([], maxlen=buff_size) + self.alpha = 0.9 + + def update_features(self, feat): + # L2 normalizing + feat /= np.linalg.norm(feat) + self.curr_feat = feat + if self.smooth_feat is None: + self.smooth_feat = feat + else: + self.smooth_feat = self.alpha * self.smooth_feat + (1.0 - self.alpha + ) * feat + self.features.append(feat) + self.smooth_feat /= np.linalg.norm(self.smooth_feat) + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, + self.covariance) + + @staticmethod + def multi_predict(tracks, kalman_filter): + if len(tracks) > 0: + multi_mean = np.asarray([track.mean.copy() for track in tracks]) + multi_covariance = np.asarray( + [track.covariance for track in tracks]) + for i, st in enumerate(tracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = kalman_filter.multi_predict( + multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + tracks[i].mean = mean + tracks[i].covariance = cov + + def reset_track_id(self): + self.reset_track_count(self.cls_id) + + def activate(self, kalman_filter, frame_id): + """Start a new track""" + self.kalman_filter = kalman_filter + # update track id for the object class + self.track_id = self.next_id(self.cls_id) + self.mean, self.covariance = self.kalman_filter.initiate( + self.tlwh_to_xyah(self._tlwh)) + + self.track_len = 0 + self.state = TrackState.Tracked # set flag 'tracked' + + if frame_id == 1: # to record the first frame's detection result + self.is_activated = True + + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)) + self.update_features(new_track.curr_feat) + self.track_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: # update track id for the object class + self.track_id = self.next_id(self.cls_id) + + def update(self, new_track, frame_id, update_feature=True): + self.frame_id = frame_id + self.track_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked # set flag 'tracked' + self.is_activated = True # set flag 'activated' + + self.score = new_track.score + if update_feature: + self.update_features(new_track.curr_feat) + + @property + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_({}-{})_({}-{})'.format(self.cls_id, self.track_id, + self.start_frame, self.end_frame) + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb diff --git a/deploy/pptracking/python/mot/tracker/base_sde_tracker.py b/deploy/pptracking/python/mot/tracker/base_sde_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..1d50c7ee19d47a63e7a2e7774931156559d25b52 --- /dev/null +++ b/deploy/pptracking/python/mot/tracker/base_sde_tracker.py @@ -0,0 +1,153 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/track.py +""" + +import datetime + +__all__ = ['TrackState', 'Track'] + + +class TrackState(object): + """ + Enumeration type for the single target track state. Newly created tracks are + classified as `tentative` until enough evidence has been collected. Then, + the track state is changed to `confirmed`. Tracks that are no longer alive + are classified as `deleted` to mark them for removal from the set of active + tracks. + """ + Tentative = 1 + Confirmed = 2 + Deleted = 3 + + +class Track(object): + """ + A single target track with state space `(x, y, a, h)` and associated + velocities, where `(x, y)` is the center of the bounding box, `a` is the + aspect ratio and `h` is the height. + + Args: + mean (ndarray): Mean vector of the initial state distribution. + covariance (ndarray): Covariance matrix of the initial state distribution. + track_id (int): A unique track identifier. + n_init (int): Number of consecutive detections before the track is confirmed. + The track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + max_age (int): The maximum number of consecutive misses before the track + state is set to `Deleted`. + cls_id (int): The category id of the tracked box. + score (float): The confidence score of the tracked box. + feature (Optional[ndarray]): Feature vector of the detection this track + originates from. If not None, this feature is added to the `features` cache. + + Attributes: + hits (int): Total number of measurement updates. + age (int): Total number of frames since first occurance. + time_since_update (int): Total number of frames since last measurement + update. + state (TrackState): The current track state. + features (List[ndarray]): A cache of features. On each measurement update, + the associated feature vector is added to this list. + """ + + def __init__(self, + mean, + covariance, + track_id, + n_init, + max_age, + cls_id, + score, + feature=None): + self.mean = mean + self.covariance = covariance + self.track_id = track_id + self.hits = 1 + self.age = 1 + self.time_since_update = 0 + self.cls_id = cls_id + self.score = score + self.start_time = datetime.datetime.now() + + self.state = TrackState.Tentative + self.features = [] + self.feat = feature + if feature is not None: + self.features.append(feature) + + self._n_init = n_init + self._max_age = max_age + + def to_tlwh(self): + """Get position in format `(top left x, top left y, width, height)`.""" + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + def to_tlbr(self): + """Get position in bounding box format `(min x, miny, max x, max y)`.""" + ret = self.to_tlwh() + ret[2:] = ret[:2] + ret[2:] + return ret + + def predict(self, kalman_filter): + """ + Propagate the state distribution to the current time step using a Kalman + filter prediction step. + """ + self.mean, self.covariance = kalman_filter.predict(self.mean, + self.covariance) + self.age += 1 + self.time_since_update += 1 + + def update(self, kalman_filter, detection): + """ + Perform Kalman filter measurement update step and update the associated + detection feature cache. + """ + self.mean, self.covariance = kalman_filter.update(self.mean, + self.covariance, + detection.to_xyah()) + self.features.append(detection.feature) + self.feat = detection.feature + self.cls_id = detection.cls_id + self.score = detection.score + + self.hits += 1 + self.time_since_update = 0 + if self.state == TrackState.Tentative and self.hits >= self._n_init: + self.state = TrackState.Confirmed + + def mark_missed(self): + """Mark this track as missed (no association at the current time step). + """ + if self.state == TrackState.Tentative: + self.state = TrackState.Deleted + elif self.time_since_update > self._max_age: + self.state = TrackState.Deleted + + def is_tentative(self): + """Returns True if this track is tentative (unconfirmed).""" + return self.state == TrackState.Tentative + + def is_confirmed(self): + """Returns True if this track is confirmed.""" + return self.state == TrackState.Confirmed + + def is_deleted(self): + """Returns True if this track is dead and should be deleted.""" + return self.state == TrackState.Deleted diff --git a/deploy/pptracking/python/mot/tracker/deepsort_tracker.py b/deploy/pptracking/python/mot/tracker/deepsort_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..045cfba7f071865447b440d8ab786c84a0b985d9 --- /dev/null +++ b/deploy/pptracking/python/mot/tracker/deepsort_tracker.py @@ -0,0 +1,182 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/tracker.py +""" + +import numpy as np + +from ..motion import KalmanFilter +from ..matching.deepsort_matching import NearestNeighborDistanceMetric +from ..matching.deepsort_matching import iou_cost, min_cost_matching, matching_cascade, gate_cost_matrix +from .base_sde_tracker import Track +from ..utils import Detection + +__all__ = ['DeepSORTTracker'] + + +class DeepSORTTracker(object): + """ + DeepSORT tracker + + Args: + input_size (list): input feature map size to reid model, [h, w] format, + [64, 192] as default. + min_box_area (int): min box area to filter out low quality boxes + vertical_ratio (float): w/h, the vertical ratio of the bbox to filter + bad results, set 1.6 default for pedestrian tracking. If set <=0 + means no need to filter bboxes. + budget (int): If not None, fix samples per class to at most this number. + Removes the oldest samples when the budget is reached. + max_age (int): maximum number of missed misses before a track is deleted + n_init (float): Number of frames that a track remains in initialization + phase. Number of consecutive detections before the track is confirmed. + The track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + metric_type (str): either "euclidean" or "cosine", the distance metric + used for measurement to track association. + matching_threshold (float): samples with larger distance are + considered an invalid match. + max_iou_distance (float): max iou distance threshold + motion (object): KalmanFilter instance + """ + + def __init__(self, + input_size=[64, 192], + min_box_area=0, + vertical_ratio=-1, + budget=100, + max_age=70, + n_init=3, + metric_type='cosine', + matching_threshold=0.2, + max_iou_distance=0.9, + motion='KalmanFilter'): + self.input_size = input_size + self.min_box_area = min_box_area + self.vertical_ratio = vertical_ratio + self.max_age = max_age + self.n_init = n_init + self.metric = NearestNeighborDistanceMetric(metric_type, + matching_threshold, budget) + self.max_iou_distance = max_iou_distance + if motion == 'KalmanFilter': + self.motion = KalmanFilter() + + self.tracks = [] + self._next_id = 1 + + def predict(self): + """ + Propagate track state distributions one time step forward. + This function should be called once every time step, before `update`. + """ + for track in self.tracks: + track.predict(self.motion) + + def update(self, pred_dets, pred_embs): + """ + Perform measurement update and track management. + Args: + pred_dets (np.array): Detection results of the image, the shape is + [N, 6], means 'x0, y0, x1, y1, score, cls_id'. + pred_embs (np.array): Embedding results of the image, the shape is + [N, 128], usually pred_embs.shape[1] is a multiple of 128. + """ + pred_tlwhs = pred_dets[:, :4] + pred_scores = pred_dets[:, 4:5] + pred_cls_ids = pred_dets[:, 5:] + + detections = [ + Detection(tlwh, score, feat, cls_id) + for tlwh, score, feat, cls_id in zip(pred_tlwhs, pred_scores, + pred_embs, pred_cls_ids) + ] + + # Run matching cascade. + matches, unmatched_tracks, unmatched_detections = \ + self._match(detections) + + # Update track set. + for track_idx, detection_idx in matches: + self.tracks[track_idx].update(self.motion, + detections[detection_idx]) + for track_idx in unmatched_tracks: + self.tracks[track_idx].mark_missed() + for detection_idx in unmatched_detections: + self._initiate_track(detections[detection_idx]) + self.tracks = [t for t in self.tracks if not t.is_deleted()] + + # Update distance metric. + active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] + features, targets = [], [] + for track in self.tracks: + if not track.is_confirmed(): + continue + features += track.features + targets += [track.track_id for _ in track.features] + track.features = [] + self.metric.partial_fit( + np.asarray(features), np.asarray(targets), active_targets) + output_stracks = self.tracks + return output_stracks + + def _match(self, detections): + def gated_metric(tracks, dets, track_indices, detection_indices): + features = np.array([dets[i].feature for i in detection_indices]) + targets = np.array([tracks[i].track_id for i in track_indices]) + cost_matrix = self.metric.distance(features, targets) + cost_matrix = gate_cost_matrix(self.motion, cost_matrix, tracks, + dets, track_indices, + detection_indices) + return cost_matrix + + # Split track set into confirmed and unconfirmed tracks. + confirmed_tracks = [ + i for i, t in enumerate(self.tracks) if t.is_confirmed() + ] + unconfirmed_tracks = [ + i for i, t in enumerate(self.tracks) if not t.is_confirmed() + ] + + # Associate confirmed tracks using appearance features. + matches_a, unmatched_tracks_a, unmatched_detections = \ + matching_cascade( + gated_metric, self.metric.matching_threshold, self.max_age, + self.tracks, detections, confirmed_tracks) + + # Associate remaining tracks together with unconfirmed tracks using IOU. + iou_track_candidates = unconfirmed_tracks + [ + k for k in unmatched_tracks_a + if self.tracks[k].time_since_update == 1 + ] + unmatched_tracks_a = [ + k for k in unmatched_tracks_a + if self.tracks[k].time_since_update != 1 + ] + matches_b, unmatched_tracks_b, unmatched_detections = \ + min_cost_matching( + iou_cost, self.max_iou_distance, self.tracks, + detections, iou_track_candidates, unmatched_detections) + + matches = matches_a + matches_b + unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) + return matches, unmatched_tracks, unmatched_detections + + def _initiate_track(self, detection): + mean, covariance = self.motion.initiate(detection.to_xyah()) + self.tracks.append( + Track(mean, covariance, self._next_id, self.n_init, self.max_age, + detection.cls_id, detection.score, detection.feature)) + self._next_id += 1 diff --git a/deploy/pptracking/python/mot/tracker/jde_tracker.py b/deploy/pptracking/python/mot/tracker/jde_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..fdb96d4354cfd602ad251a6ee7f27f5f48a7fcad --- /dev/null +++ b/deploy/pptracking/python/mot/tracker/jde_tracker.py @@ -0,0 +1,257 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py +""" + +import numpy as np +from collections import defaultdict + +from ..matching import jde_matching as matching +from ..motion import KalmanFilter +from .base_jde_tracker import TrackState, STrack +from .base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks + +__all__ = ['JDETracker'] + + +class JDETracker(object): + __shared__ = ['num_classes'] + """ + JDE tracker, support single class and multi classes + + Args: + num_classes (int): the number of classes + det_thresh (float): threshold of detection score + track_buffer (int): buffer for tracker + min_box_area (int): min box area to filter out low quality boxes + vertical_ratio (float): w/h, the vertical ratio of the bbox to filter + bad results. If set <0 means no need to filter bboxes,usually set + 1.6 for pedestrian tracking. + tracked_thresh (float): linear assignment threshold of tracked + stracks and detections + r_tracked_thresh (float): linear assignment threshold of + tracked stracks and unmatched detections + unconfirmed_thresh (float): linear assignment threshold of + unconfirmed stracks and unmatched detections + motion (str): motion model, KalmanFilter as default + conf_thres (float): confidence threshold for tracking + metric_type (str): either "euclidean" or "cosine", the distance metric + used for measurement to track association. + """ + + def __init__(self, + num_classes=1, + det_thresh=0.3, + track_buffer=30, + min_box_area=200, + vertical_ratio=1.6, + tracked_thresh=0.7, + r_tracked_thresh=0.5, + unconfirmed_thresh=0.7, + motion='KalmanFilter', + conf_thres=0, + metric_type='euclidean'): + self.num_classes = num_classes + self.det_thresh = det_thresh + self.track_buffer = track_buffer + self.min_box_area = min_box_area + self.vertical_ratio = vertical_ratio + + self.tracked_thresh = tracked_thresh + self.r_tracked_thresh = r_tracked_thresh + self.unconfirmed_thresh = unconfirmed_thresh + if motion == 'KalmanFilter': + self.motion = KalmanFilter() + self.conf_thres = conf_thres + self.metric_type = metric_type + + self.frame_id = 0 + self.tracked_tracks_dict = defaultdict(list) # dict(list[STrack]) + self.lost_tracks_dict = defaultdict(list) # dict(list[STrack]) + self.removed_tracks_dict = defaultdict(list) # dict(list[STrack]) + + self.max_time_lost = 0 + # max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer) + + def update(self, pred_dets, pred_embs): + """ + Processes the image frame and finds bounding box(detections). + Associates the detection with corresponding tracklets and also handles + lost, removed, refound and active tracklets. + + Args: + pred_dets (np.array): Detection results of the image, the shape is + [N, 6], means 'x0, y0, x1, y1, score, cls_id'. + pred_embs (np.array): Embedding results of the image, the shape is + [N, 128] or [N, 512]. + + Return: + output_stracks_dict (dict(list)): The list contains information + regarding the online_tracklets for the recieved image tensor. + """ + self.frame_id += 1 + if self.frame_id == 1: + STrack.init_count(self.num_classes) + activated_tracks_dict = defaultdict(list) + refined_tracks_dict = defaultdict(list) + lost_tracks_dict = defaultdict(list) + removed_tracks_dict = defaultdict(list) + output_tracks_dict = defaultdict(list) + + pred_dets_dict = defaultdict(list) + pred_embs_dict = defaultdict(list) + + # unify single and multi classes detection and embedding results + for cls_id in range(self.num_classes): + cls_idx = (pred_dets[:, 5:] == cls_id).squeeze(-1) + pred_dets_dict[cls_id] = pred_dets[cls_idx] + pred_embs_dict[cls_id] = pred_embs[cls_idx] + + for cls_id in range(self.num_classes): + """ Step 1: Get detections by class""" + pred_dets_cls = pred_dets_dict[cls_id] + pred_embs_cls = pred_embs_dict[cls_id] + remain_inds = (pred_dets_cls[:, 4:5] > self.conf_thres).squeeze(-1) + if remain_inds.sum() > 0: + pred_dets_cls = pred_dets_cls[remain_inds] + pred_embs_cls = pred_embs_cls[remain_inds] + detections = [ + STrack( + STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, + self.num_classes, cls_id, 30) + for (tlbrs, f) in zip(pred_dets_cls, pred_embs_cls) + ] + else: + detections = [] + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed_dict = defaultdict(list) + tracked_tracks_dict = defaultdict(list) + for track in self.tracked_tracks_dict[cls_id]: + if not track.is_activated: + # previous tracks which are not active in the current frame are added in unconfirmed list + unconfirmed_dict[cls_id].append(track) + else: + # Active tracks are added to the local list 'tracked_stracks' + tracked_tracks_dict[cls_id].append(track) + """ Step 2: First association, with embedding""" + # building tracking pool for the current frame + track_pool_dict = defaultdict(list) + track_pool_dict[cls_id] = joint_stracks( + tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id]) + + # Predict the current location with KalmanFilter + STrack.multi_predict(track_pool_dict[cls_id], self.motion) + + dists = matching.embedding_distance( + track_pool_dict[cls_id], detections, metric=self.metric_type) + dists = matching.fuse_motion(self.motion, dists, + track_pool_dict[cls_id], detections) + matches, u_track, u_detection = matching.linear_assignment( + dists, thresh=self.tracked_thresh) + + for i_tracked, idet in matches: + # i_tracked is the id of the track and idet is the detection + track = track_pool_dict[cls_id][i_tracked] + det = detections[idet] + if track.state == TrackState.Tracked: + # If the track is active, add the detection to the track + track.update(detections[idet], self.frame_id) + activated_tracks_dict[cls_id].append(track) + else: + # We have obtained a detection from a track which is not active, + # hence put the track in refind_stracks list + track.re_activate(det, self.frame_id, new_id=False) + refined_tracks_dict[cls_id].append(track) + + # None of the steps below happen if there are no undetected tracks. + """ Step 3: Second association, with IOU""" + detections = [detections[i] for i in u_detection] + r_tracked_stracks = [] + for i in u_track: + if track_pool_dict[cls_id][i].state == TrackState.Tracked: + r_tracked_stracks.append(track_pool_dict[cls_id][i]) + + dists = matching.iou_distance(r_tracked_stracks, detections) + matches, u_track, u_detection = matching.linear_assignment( + dists, thresh=self.r_tracked_thresh) + + for i_tracked, idet in matches: + track = r_tracked_stracks[i_tracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_tracks_dict[cls_id].append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refined_tracks_dict[cls_id].append(track) + + for it in u_track: + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_tracks_dict[cls_id].append(track) + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed_dict[cls_id], detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment( + dists, thresh=self.unconfirmed_thresh) + for i_tracked, idet in matches: + unconfirmed_dict[cls_id][i_tracked].update(detections[idet], + self.frame_id) + activated_tracks_dict[cls_id].append(unconfirmed_dict[cls_id][ + i_tracked]) + for it in u_unconfirmed: + track = unconfirmed_dict[cls_id][it] + track.mark_removed() + removed_tracks_dict[cls_id].append(track) + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.motion, self.frame_id) + activated_tracks_dict[cls_id].append(track) + """ Step 5: Update state""" + for track in self.lost_tracks_dict[cls_id]: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_tracks_dict[cls_id].append(track) + + self.tracked_tracks_dict[cls_id] = [ + t for t in self.tracked_tracks_dict[cls_id] + if t.state == TrackState.Tracked + ] + self.tracked_tracks_dict[cls_id] = joint_stracks( + self.tracked_tracks_dict[cls_id], activated_tracks_dict[cls_id]) + self.tracked_tracks_dict[cls_id] = joint_stracks( + self.tracked_tracks_dict[cls_id], refined_tracks_dict[cls_id]) + self.lost_tracks_dict[cls_id] = sub_stracks( + self.lost_tracks_dict[cls_id], self.tracked_tracks_dict[cls_id]) + self.lost_tracks_dict[cls_id].extend(lost_tracks_dict[cls_id]) + self.lost_tracks_dict[cls_id] = sub_stracks( + self.lost_tracks_dict[cls_id], self.removed_tracks_dict[cls_id]) + self.removed_tracks_dict[cls_id].extend(removed_tracks_dict[cls_id]) + self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[ + cls_id] = remove_duplicate_stracks( + self.tracked_tracks_dict[cls_id], + self.lost_tracks_dict[cls_id]) + + # get scores of lost tracks + output_tracks_dict[cls_id] = [ + track for track in self.tracked_tracks_dict[cls_id] + if track.is_activated + ] + + return output_tracks_dict diff --git a/deploy/pptracking/python/mot/utils.py b/deploy/pptracking/python/mot/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a452b37e6fa0a3fd7a64762402ddfa39824d1569 --- /dev/null +++ b/deploy/pptracking/python/mot/utils.py @@ -0,0 +1,362 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import time +import numpy as np +import collections + +__all__ = [ + 'MOTTimer', 'Detection', 'write_mot_results', 'load_det_results', + 'preprocess_reid', 'get_crops', 'clip_box', 'scale_coords', 'flow_statistic', + 'plot_tracking' +] + + +class MOTTimer(object): + """ + This class used to compute and print the current FPS while evaling. + """ + + def __init__(self, window_size=20): + self.start_time = 0. + self.diff = 0. + self.duration = 0. + self.deque = collections.deque(maxlen=window_size) + + def tic(self): + # using time.time instead of time.clock because time time.clock + # does not normalize for multithreading + self.start_time = time.time() + + def toc(self, average=True): + self.diff = time.time() - self.start_time + self.deque.append(self.diff) + if average: + self.duration = np.mean(self.deque) + else: + self.duration = np.sum(self.deque) + return self.duration + + def clear(self): + self.start_time = 0. + self.diff = 0. + self.duration = 0. + + +class Detection(object): + """ + This class represents a bounding box detection in a single image. + + Args: + tlwh (Tensor): Bounding box in format `(top left x, top left y, + width, height)`. + score (Tensor): Bounding box confidence score. + feature (Tensor): A feature vector that describes the object + contained in this image. + cls_id (Tensor): Bounding box category id. + """ + + def __init__(self, tlwh, score, feature, cls_id): + self.tlwh = np.asarray(tlwh, dtype=np.float32) + self.score = float(score) + self.feature = np.asarray(feature, dtype=np.float32) + self.cls_id = int(cls_id) + + def to_tlbr(self): + """ + Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + def to_xyah(self): + """ + Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = self.tlwh.copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + +def write_mot_results(filename, results, data_type='mot', num_classes=1): + # support single and multi classes + if data_type in ['mot', 'mcmot']: + save_format = '{frame},{id},{x1},{y1},{w},{h},{score},{cls_id},-1,-1\n' + elif data_type == 'kitti': + save_format = '{frame} {id} car 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' + else: + raise ValueError(data_type) + + f = open(filename, 'w') + for cls_id in range(num_classes): + for frame_id, tlwhs, tscores, track_ids in results[cls_id]: + if data_type == 'kitti': + frame_id -= 1 + for tlwh, score, track_id in zip(tlwhs, tscores, track_ids): + if track_id < 0: continue + if data_type == 'mot': + cls_id = -1 + + x1, y1, w, h = tlwh + x2, y2 = x1 + w, y1 + h + line = save_format.format( + frame=frame_id, + id=track_id, + x1=x1, + y1=y1, + x2=x2, + y2=y2, + w=w, + h=h, + score=score, + cls_id=cls_id) + f.write(line) + print('MOT results save in {}'.format(filename)) + + +def load_det_results(det_file, num_frames): + assert os.path.exists(det_file) and os.path.isfile(det_file), \ + '{} is not exist or not a file.'.format(det_file) + labels = np.loadtxt(det_file, dtype='float32', delimiter=',') + assert labels.shape[1] == 7, \ + "Each line of {} should have 7 items: '[frame_id],[x0],[y0],[w],[h],[score],[class_id]'.".format(det_file) + results_list = [] + for frame_i in range(num_frames): + results = {'bbox': [], 'score': [], 'cls_id': []} + lables_with_frame = labels[labels[:, 0] == frame_i + 1] + # each line of lables_with_frame: + # [frame_id],[x0],[y0],[w],[h],[score],[class_id] + for l in lables_with_frame: + results['bbox'].append(l[1:5]) + results['score'].append(l[5:6]) + results['cls_id'].append(l[6:7]) + results_list.append(results) + return results_list + + +def scale_coords(coords, input_shape, im_shape, scale_factor): + # Note: ratio has only one value, scale_factor[0] == scale_factor[1] + # + # This function only used for JDE YOLOv3 or other detectors with + # LetterBoxResize and JDEBBoxPostProcess, coords output from detector had + # not scaled back to the origin image. + + ratio = scale_factor[0] + pad_w = (input_shape[1] - int(im_shape[1])) / 2 + pad_h = (input_shape[0] - int(im_shape[0])) / 2 + coords[:, 0::2] -= pad_w + coords[:, 1::2] -= pad_h + coords[:, 0:4] /= ratio + coords[:, :4] = np.clip(coords[:, :4], a_min=0, a_max=coords[:, :4].max()) + return coords.round() + + +def clip_box(xyxy, ori_image_shape): + H, W = ori_image_shape + xyxy[:, 0::2] = np.clip(xyxy[:, 0::2], a_min=0, a_max=W) + xyxy[:, 1::2] = np.clip(xyxy[:, 1::2], a_min=0, a_max=H) + w = xyxy[:, 2:3] - xyxy[:, 0:1] + h = xyxy[:, 3:4] - xyxy[:, 1:2] + mask = np.logical_and(h > 0, w > 0) + keep_idx = np.nonzero(mask) + return xyxy[keep_idx[0]], keep_idx + + +def get_crops(xyxy, ori_img, w, h): + crops = [] + xyxy = xyxy.astype(np.int64) + ori_img = ori_img.numpy() + ori_img = np.squeeze(ori_img, axis=0).transpose(1, 0, 2) # [h,w,3]->[w,h,3] + for i, bbox in enumerate(xyxy): + crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] + crops.append(crop) + crops = preprocess_reid(crops, w, h) + return crops + + +def preprocess_reid(imgs, + w=64, + h=192, + mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]): + im_batch = [] + for img in imgs: + img = cv2.resize(img, (w, h)) + img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255 + img_mean = np.array(mean).reshape((3, 1, 1)) + img_std = np.array(std).reshape((3, 1, 1)) + img -= img_mean + img /= img_std + img = np.expand_dims(img, axis=0) + im_batch.append(img) + im_batch = np.concatenate(im_batch, 0) + return im_batch + + +def flow_statistic(result, + secs_interval, + do_entrance_counting, + video_fps, + entrance, + id_set, + interval_id_set, + in_id_list, + out_id_list, + prev_center, + records, + data_type='mot', + num_classes=1): + # Count in and out number: + # Use horizontal center line as the entrance just for simplification. + # If a person located in the above the horizontal center line + # at the previous frame and is in the below the line at the current frame, + # the in number is increased by one. + # If a person was in the below the horizontal center line + # at the previous frame and locates in the below the line at the current frame, + # the out number is increased by one. + # TODO: if the entrance is not the horizontal center line, + # the counting method should be optimized. + if do_entrance_counting: + entrance_y = entrance[1] # xmin, ymin, xmax, ymax + frame_id, tlwhs, tscores, track_ids = result + for tlwh, score, track_id in zip(tlwhs, tscores, track_ids): + if track_id < 0: continue + if data_type == 'kitti': + frame_id -= 1 + + x1, y1, w, h = tlwh + center_x = x1 + w / 2. + center_y = y1 + h / 2. + if track_id in prev_center: + if prev_center[track_id][1] <= entrance_y and \ + center_y > entrance_y: + in_id_list.append(track_id) + if prev_center[track_id][1] >= entrance_y and \ + center_y < entrance_y: + out_id_list.append(track_id) + prev_center[track_id][0] = center_x + prev_center[track_id][1] = center_y + else: + prev_center[track_id] = [center_x, center_y] + # Count totol number, number at a manual-setting interval + frame_id, tlwhs, tscores, track_ids = result + for tlwh, score, track_id in zip(tlwhs, tscores, track_ids): + if track_id < 0: continue + id_set.add(track_id) + interval_id_set.add(track_id) + + # Reset counting at the interval beginning + if frame_id % video_fps == 0 and frame_id / video_fps % secs_interval == 0: + curr_interval_count = len(interval_id_set) + interval_id_set.clear() + info = "Frame id: {}, Total count: {}".format(frame_id, len(id_set)) + if do_entrance_counting: + info += ", In count: {}, Out count: {}".format( + len(in_id_list), len(out_id_list)) + if frame_id % video_fps == 0 and frame_id / video_fps % secs_interval == 0: + info += ", Count during {} secs: {}".format(secs_interval, + curr_interval_count) + interval_id_set.clear() + print(info) + info += "\n" + records.append(info) + + return { + "id_set": id_set, + "interval_id_set": interval_id_set, + "in_id_list": in_id_list, + "out_id_list": out_id_list, + "prev_center": prev_center, + "records": records + } + + +def get_color(idx): + idx = idx * 3 + color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) + return color + + +def plot_tracking(image, + tlwhs, + obj_ids, + scores=None, + frame_id=0, + fps=0., + ids2names=[], + do_entrance_counting=False, + entrance=None): + im = np.ascontiguousarray(np.copy(image)) + im_h, im_w = im.shape[:2] + + text_scale = max(1, image.shape[1] / 1600.) + text_thickness = 2 + line_thickness = max(1, int(image.shape[1] / 500.)) + + if fps > 0: + _line = 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)) + else: + _line = 'frame: %d num: %d' % (frame_id, len(tlwhs)) + cv2.putText( + im, + _line, + (0, int(15 * text_scale)), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=2) + + for i, tlwh in enumerate(tlwhs): + x1, y1, w, h = tlwh + intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) + obj_id = int(obj_ids[i]) + id_text = '{}'.format(int(obj_id)) + if ids2names != []: + assert len( + ids2names) == 1, "plot_tracking only supports single classes." + id_text = '{}_'.format(ids2names[0]) + id_text + _line_thickness = 1 if obj_id <= 0 else line_thickness + color = get_color(abs(obj_id)) + cv2.rectangle( + im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) + cv2.putText( + im, + id_text, (intbox[0], intbox[1] - 10), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=text_thickness) + + if scores is not None: + text = '{:.2f}'.format(float(scores[i])) + cv2.putText( + im, + text, (intbox[0], intbox[1] + 10), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 255, 255), + thickness=text_thickness) + + if do_entrance_counting: + entrance_line = tuple(map(int, entrance)) + cv2.rectangle( + im, + entrance_line[0:2], + entrance_line[2:4], + color=(0, 255, 255), + thickness=line_thickness) + return im diff --git a/deploy/pptracking/python/mot_jde_infer.py b/deploy/pptracking/python/mot_jde_infer.py new file mode 100644 index 0000000000000000000000000000000000000000..76091f7940fde9d366f7a57dd72c1f67e3bc9eca --- /dev/null +++ b/deploy/pptracking/python/mot_jde_infer.py @@ -0,0 +1,382 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import yaml +import cv2 +import numpy as np +from collections import defaultdict + +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor + +from utils import argsparser, Timer, get_current_memory_mb +from det_infer import Detector, get_test_images, print_arguments, PredictConfig +from benchmark_utils import PaddleInferBenchmark +from visualize import plot_tracking_dict + +from mot.tracker import JDETracker +from mot.utils import MOTTimer, write_mot_results, flow_statistic + +# Global dictionary +MOT_SUPPORT_MODELS = { + 'JDE', + 'FairMOT', +} + + +class JDE_Detector(Detector): + """ + Args: + pred_config (object): config of model, defined by `Config(model_dir)` + model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + batch_size (int): size of pre batch in inference + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN + """ + + def __init__(self, + pred_config, + model_dir, + device='CPU', + run_mode='fluid', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1088, + trt_opt_shape=608, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False): + super(JDE_Detector, self).__init__( + pred_config=pred_config, + model_dir=model_dir, + device=device, + run_mode=run_mode, + batch_size=batch_size, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn) + assert batch_size == 1, "The JDE Detector only supports batch size=1 now" + assert pred_config.tracker, "Tracking model should have tracker" + self.num_classes = len(pred_config.labels) + + tp = pred_config.tracker + min_box_area = tp['min_box_area'] if 'min_box_area' in tp else 200 + vertical_ratio = tp['vertical_ratio'] if 'vertical_ratio' in tp else 1.6 + conf_thres = tp['conf_thres'] if 'conf_thres' in tp else 0. + tracked_thresh = tp['tracked_thresh'] if 'tracked_thresh' in tp else 0.7 + metric_type = tp['metric_type'] if 'metric_type' in tp else 'euclidean' + + self.tracker = JDETracker( + num_classes=self.num_classes, + min_box_area=min_box_area, + vertical_ratio=vertical_ratio, + conf_thres=conf_thres, + tracked_thresh=tracked_thresh, + metric_type=metric_type) + + def postprocess(self, pred_dets, pred_embs, threshold): + online_targets_dict = self.tracker.update(pred_dets, pred_embs) + + online_tlwhs = defaultdict(list) + online_scores = defaultdict(list) + online_ids = defaultdict(list) + for cls_id in range(self.num_classes): + online_targets = online_targets_dict[cls_id] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + tscore = t.score + if tscore < threshold: continue + if tlwh[2] * tlwh[3] <= self.tracker.min_box_area: continue + if self.tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ + 3] > self.tracker.vertical_ratio: + continue + online_tlwhs[cls_id].append(tlwh) + online_ids[cls_id].append(tid) + online_scores[cls_id].append(tscore) + return online_tlwhs, online_scores, online_ids + + def predict(self, image_list, threshold=0.5, warmup=0, repeats=1): + ''' + Args: + image_list (list): list of image + threshold (float): threshold of predicted box' score + Returns: + online_tlwhs, online_scores, online_ids (dict[np.array]) + ''' + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(image_list) + self.det_times.preprocess_time_s.end() + + pred_dets, pred_embs = None, None + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + + for i in range(warmup): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + pred_dets = boxes_tensor.copy_to_cpu() + + self.det_times.inference_time_s.start() + for i in range(repeats): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + pred_dets = boxes_tensor.copy_to_cpu() + embs_tensor = self.predictor.get_output_handle(output_names[1]) + pred_embs = embs_tensor.copy_to_cpu() + self.det_times.inference_time_s.end(repeats=repeats) + + self.det_times.postprocess_time_s.start() + online_tlwhs, online_scores, online_ids = self.postprocess( + pred_dets, pred_embs, threshold) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += 1 + return online_tlwhs, online_scores, online_ids + + +def predict_image(detector, image_list): + results = [] + num_classes = detector.num_classes + data_type = 'mcmot' if num_classes > 1 else 'mot' + ids2names = detector.pred_config.labels + + image_list.sort() + for frame_id, img_file in enumerate(image_list): + frame = cv2.imread(img_file) + if FLAGS.run_benchmark: + detector.predict([frame], FLAGS.threshold, warmup=10, repeats=10) + cm, gm, gu = get_current_memory_mb() + detector.cpu_mem += cm + detector.gpu_mem += gm + detector.gpu_util += gu + print('Test iter {}, file name:{}'.format(frame_id, img_file)) + else: + online_tlwhs, online_scores, online_ids = detector.predict( + [frame], FLAGS.threshold) + online_im = plot_tracking_dict(frame, num_classes, online_tlwhs, + online_ids, online_scores, frame_id, + ids2names) + if FLAGS.save_images: + if not os.path.exists(FLAGS.output_dir): + os.makedirs(FLAGS.output_dir) + img_name = os.path.split(img_file)[-1] + out_path = os.path.join(FLAGS.output_dir, img_name) + cv2.imwrite(out_path, online_im) + print("save result to: " + out_path) + + +def predict_video(detector, camera_id): + video_name = 'mot_output.mp4' + if camera_id != -1: + capture = cv2.VideoCapture(camera_id) + else: + capture = cv2.VideoCapture(FLAGS.video_file) + video_name = os.path.split(FLAGS.video_file)[-1] + # Get Video info : resolution, fps, frame count + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(capture.get(cv2.CAP_PROP_FPS)) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + print("fps: %d, frame_count: %d" % (fps, frame_count)) + + if not os.path.exists(FLAGS.output_dir): + os.makedirs(FLAGS.output_dir) + out_path = os.path.join(FLAGS.output_dir, video_name) + if not FLAGS.save_images: + video_format = 'mp4v' + fourcc = cv2.VideoWriter_fourcc(*video_format) + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + frame_id = 0 + timer = MOTTimer() + results = defaultdict(list) # support single class and multi classes + num_classes = detector.num_classes + data_type = 'mcmot' if num_classes > 1 else 'mot' + ids2names = detector.pred_config.labels + center_traj = None + entrance = None + records = None + if FLAGS.draw_center_traj: + center_traj = [{} for i in range(num_classes)] + + if num_classes == 1: + id_set = set() + interval_id_set = set() + in_id_list = list() + out_id_list = list() + prev_center = dict() + records = list() + entrance = [0, height / 2., width, height / 2.] + + video_fps = fps + + while (1): + ret, frame = capture.read() + if not ret: + break + timer.tic() + online_tlwhs, online_scores, online_ids = detector.predict( + [frame], FLAGS.threshold) + timer.toc() + + for cls_id in range(num_classes): + results[cls_id].append((frame_id + 1, online_tlwhs[cls_id], + online_scores[cls_id], online_ids[cls_id])) + + fps = 1. / timer.duration + # NOTE: just implement flow statistic for one class + if num_classes == 1: + result = (frame_id + 1, online_tlwhs[0], online_scores[0], + online_ids[0]) + statistic = flow_statistic( + result, FLAGS.secs_interval, FLAGS.do_entrance_counting, + video_fps, entrance, id_set, interval_id_set, in_id_list, + out_id_list, prev_center, records, data_type, num_classes) + id_set = statistic['id_set'] + interval_id_set = statistic['interval_id_set'] + in_id_list = statistic['in_id_list'] + out_id_list = statistic['out_id_list'] + prev_center = statistic['prev_center'] + records = statistic['records'] + + elif num_classes > 1 and FLAGS.do_entrance_counting: + raise NotImplementedError( + 'Multi-class flow counting is not implemented now!') + im = plot_tracking_dict( + frame, + num_classes, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + fps=fps, + ids2names=ids2names, + do_entrance_counting=FLAGS.do_entrance_counting, + entrance=entrance, + records=records, + center_traj=center_traj) + + if FLAGS.save_images: + save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2]) + if not os.path.exists(save_dir): + os.makedirs(save_dir) + cv2.imwrite( + os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im) + else: + writer.write(im) + + frame_id += 1 + print('detect frame: %d, fps: %f' % (frame_id, fps)) + if camera_id != -1: + cv2.imshow('Tracking Detection', im) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + if FLAGS.save_mot_txts: + result_filename = os.path.join(FLAGS.output_dir, + video_name.split('.')[-2] + '.txt') + + write_mot_results(result_filename, results, data_type, num_classes) + + if num_classes == 1: + result_filename = os.path.join( + FLAGS.output_dir, + video_name.split('.')[-2] + '_flow_statistic.txt') + f = open(result_filename, 'w') + for line in records: + f.write(line) + print('Flow statistic save in {}'.format(result_filename)) + f.close() + + if FLAGS.save_images: + save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2]) + cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(save_dir, + out_path) + os.system(cmd_str) + print('Save video in {}.'.format(out_path)) + else: + writer.release() + + +def main(): + pred_config = PredictConfig(FLAGS.model_dir) + detector = JDE_Detector( + pred_config, + FLAGS.model_dir, + device=FLAGS.device, + run_mode=FLAGS.run_mode, + trt_min_shape=FLAGS.trt_min_shape, + trt_max_shape=FLAGS.trt_max_shape, + trt_opt_shape=FLAGS.trt_opt_shape, + trt_calib_mode=FLAGS.trt_calib_mode, + cpu_threads=FLAGS.cpu_threads, + enable_mkldnn=FLAGS.enable_mkldnn) + + # predict from video file or camera video stream + if FLAGS.video_file is not None or FLAGS.camera_id != -1: + predict_video(detector, FLAGS.camera_id) + else: + # predict from image + img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) + predict_image(detector, img_list) + if not FLAGS.run_benchmark: + detector.det_times.info(average=True) + else: + mems = { + 'cpu_rss_mb': detector.cpu_mem / len(img_list), + 'gpu_rss_mb': detector.gpu_mem / len(img_list), + 'gpu_util': detector.gpu_util * 100 / len(img_list) + } + perf_info = detector.det_times.report(average=True) + model_dir = FLAGS.model_dir + mode = FLAGS.run_mode + model_info = { + 'model_name': model_dir.strip('/').split('/')[-1], + 'precision': mode.split('_')[-1] + } + data_info = { + 'batch_size': 1, + 'shape': "dynamic_shape", + 'data_num': perf_info['img_num'] + } + det_log = PaddleInferBenchmark(detector.config, model_info, + data_info, perf_info, mems) + det_log('MOT') + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + print_arguments(FLAGS) + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU' + ], "device should be CPU, GPU or XPU" + + main() diff --git a/deploy/pptracking/python/mot_sde_infer.py b/deploy/pptracking/python/mot_sde_infer.py new file mode 100644 index 0000000000000000000000000000000000000000..39c8403c37165ca9a2aab28bf5704693b6fb0775 --- /dev/null +++ b/deploy/pptracking/python/mot_sde_infer.py @@ -0,0 +1,910 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import time +import yaml +import cv2 +import re +import numpy as np +from collections import defaultdict + +import paddle +from paddle.inference import Config +from paddle.inference import create_predictor + +from picodet_postprocess import PicoDetPostProcess +from utils import argsparser, Timer, get_current_memory_mb, _is_valid_video, video2frames +from det_infer import Detector, DetectorPicoDet, get_test_images, print_arguments, PredictConfig +from det_infer import load_predictor +from benchmark_utils import PaddleInferBenchmark +from visualize import plot_tracking + +from mot.tracker import DeepSORTTracker +from mot.utils import MOTTimer, write_mot_results, flow_statistic, scale_coords, clip_box, preprocess_reid + +from mot.mtmct.utils import parse_bias +from mot.mtmct.postprocess import trajectory_fusion, sub_cluster, gen_res, print_mtmct_result +from mot.mtmct.postprocess import get_mtmct_matching_results, save_mtmct_crops, save_mtmct_vis_results + +# Global dictionary +MOT_SUPPORT_MODELS = {'DeepSORT'} + + +def bench_log(detector, img_list, model_info, batch_size=1, name=None): + mems = { + 'cpu_rss_mb': detector.cpu_mem / len(img_list), + 'gpu_rss_mb': detector.gpu_mem / len(img_list), + 'gpu_util': detector.gpu_util * 100 / len(img_list) + } + perf_info = detector.det_times.report(average=True) + data_info = { + 'batch_size': batch_size, + 'shape': "dynamic_shape", + 'data_num': perf_info['img_num'] + } + log = PaddleInferBenchmark(detector.config, model_info, data_info, + perf_info, mems) + log(name) + + +class SDE_Detector(Detector): + """ + Args: + pred_config (object): config of model, defined by `Config(model_dir)` + model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN + """ + + def __init__(self, + pred_config, + model_dir, + device='CPU', + run_mode='fluid', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1088, + trt_opt_shape=608, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False): + super(SDE_Detector, self).__init__( + pred_config=pred_config, + model_dir=model_dir, + device=device, + run_mode=run_mode, + batch_size=batch_size, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn) + assert batch_size == 1, "The JDE Detector only supports batch size=1 now" + self.pred_config = pred_config + + def postprocess(self, boxes, ori_image_shape, threshold, scaled): + over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0] + if len(over_thres_idx) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + return pred_dets, pred_xyxys + else: + boxes = boxes[over_thres_idx] + + if not scaled: + # scaled means whether the coords after detector outputs + # have been scaled back to the original image, set True + # in general detector, set False in JDE YOLOv3. + pred_bboxes = scale_coords(boxes[:, 2:], input_shape, im_shape, + scale_factor) + else: + pred_bboxes = boxes[:, 2:] + + pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape) + + if len(keep_idx[0]) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + return pred_dets, pred_xyxys + + pred_scores = boxes[:, 1:2][keep_idx[0]] + pred_cls_ids = boxes[:, 0:1][keep_idx[0]] + pred_tlwhs = np.concatenate( + (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), + axis=1) + + pred_dets = np.concatenate( + (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) + + return pred_dets, pred_xyxys + + def predict(self, image_path, ori_image_shape, scaled, threshold=0.5, warmup=0, repeats=1): + ''' + Args: + image_path (list[str]): path of images, only support one image path + (batch_size=1) in tracking model + ori_image_shape (list[int]: original image shape + threshold (float): threshold of predicted box' score + scaled (bool): whether the coords after detector outputs are scaled, + default False in jde yolov3, set True in general detector. + Returns: + pred_dets (np.ndarray, [N, 6]) + ''' + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(image_path) + self.det_times.preprocess_time_s.end() + + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + + for i in range(warmup): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + boxes = boxes_tensor.copy_to_cpu() + + self.det_times.inference_time_s.start() + for i in range(repeats): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + boxes = boxes_tensor.copy_to_cpu() + self.det_times.inference_time_s.end(repeats=repeats) + + self.det_times.postprocess_time_s.start() + if len(boxes) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + else: + pred_dets, pred_xyxys = self.postprocess( + boxes, ori_image_shape, threshold, scaled) + + self.det_times.postprocess_time_s.end() + self.det_times.img_num += 1 + return pred_dets, pred_xyxys + + +class SDE_DetectorPicoDet(DetectorPicoDet): + """ + Args: + pred_config (object): config of model, defined by `Config(model_dir)` + model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU + run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + trt_min_shape (int): min shape for dynamic shape in trt + trt_max_shape (int): max shape for dynamic shape in trt + trt_opt_shape (int): opt shape for dynamic shape in trt + trt_calib_mode (bool): If the model is produced by TRT offline quantitative + calibration, trt_calib_mode need to set True + cpu_threads (int): cpu threads + enable_mkldnn (bool): whether to open MKLDNN + """ + + def __init__(self, + pred_config, + model_dir, + device='CPU', + run_mode='fluid', + batch_size=1, + trt_min_shape=1, + trt_max_shape=1088, + trt_opt_shape=608, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False): + super(SDE_DetectorPicoDet, self).__init__( + pred_config=pred_config, + model_dir=model_dir, + device=device, + run_mode=run_mode, + batch_size=batch_size, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn) + assert batch_size == 1, "The JDE Detector only supports batch size=1 now" + self.pred_config = pred_config + + def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor, + threshold): + over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0] + if len(over_thres_idx) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + return pred_dets, pred_xyxys + else: + boxes = boxes[over_thres_idx] + + pred_bboxes = boxes[:, 2:] + + pred_xyxys, keep_idx = clip_box(pred_bboxes, input_shape, im_shape, + scale_factor) + if len(keep_idx[0]) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + return pred_dets, pred_xyxys + + pred_scores = boxes[:, 1:2][keep_idx[0]] + pred_cls_ids = boxes[:, 0:1][keep_idx[0]] + pred_tlwhs = np.concatenate( + (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1), + axis=1) + + pred_dets = np.concatenate( + (pred_tlwhs, pred_scores, pred_cls_ids), axis=1) + return pred_dets, pred_xyxys + + def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1): + ''' + Args: + image (np.ndarray): image numpy data + threshold (float): threshold of predicted box' score + scaled (bool): whether the coords after detector outputs are scaled, + default False in jde yolov3, set True in general detector. + Returns: + pred_dets (np.ndarray, [N, 6]) + ''' + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(image) + self.det_times.preprocess_time_s.end() + + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + + np_score_list, np_boxes_list = [], [] + for i in range(warmup): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + boxes = boxes_tensor.copy_to_cpu() + + self.det_times.inference_time_s.start() + for i in range(repeats): + self.predictor.run() + np_score_list.clear() + np_boxes_list.clear() + output_names = self.predictor.get_output_names() + num_outs = int(len(output_names) / 2) + for out_idx in range(num_outs): + np_score_list.append( + self.predictor.get_output_handle(output_names[out_idx]) + .copy_to_cpu()) + np_boxes_list.append( + self.predictor.get_output_handle(output_names[ + out_idx + num_outs]).copy_to_cpu()) + + self.det_times.inference_time_s.end(repeats=repeats) + self.det_times.img_num += 1 + self.det_times.postprocess_time_s.start() + self.postprocess = PicoDetPostProcess( + inputs['image'].shape[2:], + inputs['im_shape'], + inputs['scale_factor'], + strides=self.pred_config.fpn_stride, + nms_threshold=self.pred_config.nms['nms_threshold']) + boxes, boxes_num = self.postprocess(np_score_list, np_boxes_list) + + if len(boxes) == 0: + pred_dets = np.zeros((1, 6), dtype=np.float32) + pred_xyxys = np.zeros((1, 4), dtype=np.float32) + else: + input_shape = inputs['image'].shape[2:] + im_shape = inputs['im_shape'] + scale_factor = inputs['scale_factor'] + pred_dets, pred_xyxys = self.postprocess_bboxes( + boxes, input_shape, im_shape, scale_factor, threshold) + + return pred_dets, pred_xyxys + + +class SDE_ReID(object): + def __init__(self, + pred_config, + model_dir, + device='CPU', + run_mode='fluid', + batch_size=50, + trt_min_shape=1, + trt_max_shape=1088, + trt_opt_shape=608, + trt_calib_mode=False, + cpu_threads=1, + enable_mkldnn=False): + self.pred_config = pred_config + self.predictor, self.config = load_predictor( + model_dir, + run_mode=run_mode, + batch_size=batch_size, + min_subgraph_size=self.pred_config.min_subgraph_size, + device=device, + use_dynamic_shape=self.pred_config.use_dynamic_shape, + trt_min_shape=trt_min_shape, + trt_max_shape=trt_max_shape, + trt_opt_shape=trt_opt_shape, + trt_calib_mode=trt_calib_mode, + cpu_threads=cpu_threads, + enable_mkldnn=enable_mkldnn) + self.det_times = Timer() + self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0 + self.batch_size = batch_size + assert pred_config.tracker, "Tracking model should have tracker" + pt = pred_config.tracker + max_age = pt['max_age'] if 'max_age' in pt else 30 + max_iou_distance = pt[ + 'max_iou_distance'] if 'max_iou_distance' in pt else 0.7 + self.tracker = DeepSORTTracker( + max_age=max_age, max_iou_distance=max_iou_distance) + + def get_crops(self, xyxy, ori_img): + w, h = self.tracker.input_size + self.det_times.preprocess_time_s.start() + crops = [] + xyxy = xyxy.astype(np.int64) + ori_img = ori_img.transpose(1, 0, 2) # [h,w,3]->[w,h,3] + for i, bbox in enumerate(xyxy): + crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] + crops.append(crop) + crops = preprocess_reid(crops, w, h) + self.det_times.preprocess_time_s.end() + + return crops + + def preprocess(self, crops): + # to keep fast speed, only use topk crops + crops = crops[:self.batch_size] + inputs = {} + inputs['crops'] = np.array(crops).astype('float32') + return inputs + + def postprocess(self, pred_dets, pred_embs): + tracker = self.tracker + tracker.predict() + online_targets = tracker.update(pred_dets, pred_embs) + + online_tlwhs, online_scores, online_ids = [], [], [] + for t in online_targets: + if not t.is_confirmed() or t.time_since_update > 1: + continue + tlwh = t.to_tlwh() + tscore = t.score + tid = t.track_id + if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue + if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ + 3] > tracker.vertical_ratio: + continue + online_tlwhs.append(tlwh) + online_scores.append(tscore) + online_ids.append(tid) + + tracking_outs = { + 'online_tlwhs': online_tlwhs, + 'online_scores': online_scores, + 'online_ids': online_ids, + } + return tracking_outs + + def postprocess_mtmct(self, pred_dets, pred_embs, frame_id, seq_name): + tracker = self.tracker + tracker.predict() + online_targets = tracker.update(pred_dets, pred_embs) + + online_tlwhs, online_scores, online_ids = [], [], [] + online_tlbrs, online_feats = [], [] + for t in online_targets: + if not t.is_confirmed() or t.time_since_update > 1: + continue + tlwh = t.to_tlwh() + tscore = t.score + tid = t.track_id + if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue + if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[ + 3] > tracker.vertical_ratio: + continue + online_tlwhs.append(tlwh) + online_scores.append(tscore) + online_ids.append(tid) + + online_tlbrs.append(t.to_tlbr()) + online_feats.append(t.feat) + + tracking_outs = { + 'online_tlwhs': online_tlwhs, + 'online_scores': online_scores, + 'online_ids': online_ids, + 'feat_data': {}, + } + for _tlbr, _id, _feat in zip(online_tlbrs, online_ids, online_feats): + feat_data = {} + feat_data['bbox'] = _tlbr + feat_data['frame'] = f"{frame_id:06d}" + feat_data['id'] = _id + _imgname = f'{seq_name}_{_id}_{frame_id}.jpg' + feat_data['imgname'] = _imgname + feat_data['feat'] = _feat + tracking_outs['feat_data'].update({_imgname: feat_data}) + return tracking_outs + + def predict(self, + crops, + pred_dets, + warmup=0, + repeats=1, + MTMCT=False, + frame_id=0, + seq_name=''): + self.det_times.preprocess_time_s.start() + inputs = self.preprocess(crops) + self.det_times.preprocess_time_s.end() + + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) + + for i in range(warmup): + self.predictor.run() + output_names = self.predictor.get_output_names() + feature_tensor = self.predictor.get_output_handle(output_names[0]) + pred_embs = feature_tensor.copy_to_cpu() + + self.det_times.inference_time_s.start() + for i in range(repeats): + self.predictor.run() + output_names = self.predictor.get_output_names() + feature_tensor = self.predictor.get_output_handle(output_names[0]) + pred_embs = feature_tensor.copy_to_cpu() + self.det_times.inference_time_s.end(repeats=repeats) + + self.det_times.postprocess_time_s.start() + if MTMCT == False: + tracking_outs = self.postprocess(pred_dets, pred_embs) + else: + tracking_outs = self.postprocess_mtmct(pred_dets, pred_embs, + frame_id, seq_name) + self.det_times.postprocess_time_s.end() + self.det_times.img_num += 1 + + return tracking_outs + + +def predict_image(detector, reid_model, image_list): + image_list.sort() + for i, img_file in enumerate(image_list): + frame = cv2.imread(img_file) + if FLAGS.run_benchmark: + pred_dets, pred_xyxys = detector.predict( + [frame], FLAGS.scaled, FLAGS.threshold, warmup=10, repeats=10) + cm, gm, gu = get_current_memory_mb() + detector.cpu_mem += cm + detector.gpu_mem += gm + detector.gpu_util += gu + print('Test iter {}, file name:{}'.format(i, img_file)) + else: + pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled, + FLAGS.threshold) + + if len(pred_dets) == 1 and np.sum(pred_dets) == 0: + print('Frame {} has no object, try to modify score threshold.'. + format(i)) + online_im = frame + else: + # reid process + crops = reid_model.get_crops(pred_xyxys, frame) + + if FLAGS.run_benchmark: + tracking_outs = reid_model.predict( + crops, pred_dets, warmup=10, repeats=10) + else: + tracking_outs = reid_model.predict(crops, pred_dets) + + online_tlwhs = tracking_outs['online_tlwhs'] + online_scores = tracking_outs['online_scores'] + online_ids = tracking_outs['online_ids'] + + online_im = plot_tracking( + frame, online_tlwhs, online_ids, online_scores, frame_id=i) + + if FLAGS.save_images: + if not os.path.exists(FLAGS.output_dir): + os.makedirs(FLAGS.output_dir) + img_name = os.path.split(img_file)[-1] + out_path = os.path.join(FLAGS.output_dir, img_name) + cv2.imwrite(out_path, online_im) + print("save result to: " + out_path) + + +def predict_video(detector, reid_model, camera_id): + if camera_id != -1: + capture = cv2.VideoCapture(camera_id) + video_name = 'mot_output.mp4' + else: + capture = cv2.VideoCapture(FLAGS.video_file) + video_name = os.path.split(FLAGS.video_file)[-1] + # Get Video info : resolution, fps, frame count + width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = int(capture.get(cv2.CAP_PROP_FPS)) + frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + print("fps: %d, frame_count: %d" % (fps, frame_count)) + + if not os.path.exists(FLAGS.output_dir): + os.makedirs(FLAGS.output_dir) + out_path = os.path.join(FLAGS.output_dir, video_name) + if not FLAGS.save_images: + video_format = 'mp4v' + fourcc = cv2.VideoWriter_fourcc(*video_format) + writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height)) + frame_id = 0 + timer = MOTTimer() + results = defaultdict(list) + id_set = set() + interval_id_set = set() + in_id_list = list() + out_id_list = list() + prev_center = dict() + records = list() + entrance = [0, height / 2., width, height / 2.] + video_fps = fps + + while (1): + ret, frame = capture.read() + if not ret: + break + timer.tic() + pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled, + FLAGS.threshold) + + if len(pred_dets) == 1 and np.sum(pred_dets) == 0: + print('Frame {} has no object, try to modify score threshold.'. + format(frame_id)) + timer.toc() + im = frame + else: + # reid process + crops = reid_model.get_crops(pred_xyxys, frame) + tracking_outs = reid_model.predict(crops, pred_dets) + + online_tlwhs = tracking_outs['online_tlwhs'] + online_scores = tracking_outs['online_scores'] + online_ids = tracking_outs['online_ids'] + + results[0].append( + (frame_id + 1, online_tlwhs, online_scores, online_ids)) + # NOTE: just implement flow statistic for one class + result = (frame_id + 1, online_tlwhs, online_scores, online_ids) + statistic = flow_statistic( + result, FLAGS.secs_interval, FLAGS.do_entrance_counting, + video_fps, entrance, id_set, interval_id_set, in_id_list, + out_id_list, prev_center, records) + id_set = statistic['id_set'] + interval_id_set = statistic['interval_id_set'] + in_id_list = statistic['in_id_list'] + out_id_list = statistic['out_id_list'] + prev_center = statistic['prev_center'] + records = statistic['records'] + + timer.toc() + + fps = 1. / timer.duration + im = plot_tracking( + frame, + online_tlwhs, + online_ids, + online_scores, + frame_id=frame_id, + fps=fps, + do_entrance_counting=FLAGS.do_entrance_counting, + entrance=entrance) + + if FLAGS.save_images: + save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2]) + if not os.path.exists(save_dir): + os.makedirs(save_dir) + cv2.imwrite( + os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im) + else: + writer.write(im) + + frame_id += 1 + print('detect frame:%d, fps: %f' % (frame_id, fps)) + + if camera_id != -1: + cv2.imshow('Tracking Detection', im) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + if FLAGS.save_mot_txts: + result_filename = os.path.join(FLAGS.output_dir, + video_name.split('.')[-2] + '.txt') + write_mot_results(result_filename, results) + + result_filename = os.path.join( + FLAGS.output_dir, video_name.split('.')[-2] + '_flow_statistic.txt') + f = open(result_filename, 'w') + for line in records: + f.write(line) + print('Flow statistic save in {}'.format(result_filename)) + f.close() + + if FLAGS.save_images: + save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2]) + cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(save_dir, + out_path) + os.system(cmd_str) + print('Save video in {}.'.format(out_path)) + else: + writer.release() + + +def predict_mtmct_seq(detector, reid_model, seq_name, output_dir): + fpath = os.path.join(FLAGS.mtmct_dir, seq_name) + if os.path.exists(os.path.join(fpath, 'img1')): + fpath = os.path.join(fpath, 'img1') + + assert os.path.isdir(fpath), '{} should be a directory'.format(fpath) + image_list = os.listdir(fpath) + image_list.sort() + assert len(image_list) > 0, '{} has no images.'.format(fpath) + + results = defaultdict(list) + mot_features_dict = {} # cid_tid_fid feats + print('Totally {} frames found in seq {}.'.format(len(image_list), seq_name)) + + for frame_id, img_file in enumerate(image_list): + if frame_id % 40 == 0: + print('Processing frame {} of seq {}.'.format(frame_id, seq_name)) + frame = cv2.imread(os.path.join(fpath, img_file)) + ori_image_shape = list(frame.shape[:2]) + frame_path = os.path.join(fpath, img_file) + pred_dets, pred_xyxys = detector.predict([frame_path], ori_image_shape, FLAGS.scaled, + FLAGS.threshold) + + if len(pred_dets) == 1 and np.sum(pred_dets) == 0: + print('Frame {} has no object, try to modify score threshold.'. + format(frame_id)) + online_im = frame + else: + # reid process + crops = reid_model.get_crops(pred_xyxys, frame) + + tracking_outs = reid_model.predict( + crops, + pred_dets, + MTMCT=True, + frame_id=frame_id, + seq_name=seq_name) + + feat_data_dict = tracking_outs['feat_data'] + mot_features_dict = dict(mot_features_dict, **feat_data_dict) + + online_tlwhs = tracking_outs['online_tlwhs'] + online_scores = tracking_outs['online_scores'] + online_ids = tracking_outs['online_ids'] + + online_im = plot_tracking(frame, online_tlwhs, online_ids, + online_scores, frame_id) + results[0].append( + (frame_id + 1, online_tlwhs, online_scores, online_ids)) + + if FLAGS.save_images: + save_dir = os.path.join(output_dir, seq_name) + if not os.path.exists(save_dir): os.makedirs(save_dir) + img_name = os.path.split(img_file)[-1] + out_path = os.path.join(save_dir, img_name) + cv2.imwrite(out_path, online_im) + + if FLAGS.save_mot_txts: + result_filename = os.path.join(output_dir, seq_name + '.txt') + write_mot_results(result_filename, results) + + return mot_features_dict + + +def predict_mtmct(detector, reid_model, mtmct_dir, mtmct_cfg): + MTMCT = mtmct_cfg['MTMCT'] + assert MTMCT == True, 'predict_mtmct should be used for MTMCT.' + + cameras_bias = mtmct_cfg['cameras_bias'] + cid_bias = parse_bias(cameras_bias) + scene_cluster = list(cid_bias.keys()) + + # 1.zone releated parameters + use_zone = mtmct_cfg['use_zone'] + zone_path = mtmct_cfg['zone_path'] + + # 2.tricks parameters, can be used for other mtmct dataset + use_ff = mtmct_cfg['use_ff'] + use_rerank = mtmct_cfg['use_rerank'] + + # 3.camera releated parameters + use_camera = mtmct_cfg['use_camera'] + use_st_filter = mtmct_cfg['use_st_filter'] + + # 4.zone releated parameters + use_roi = mtmct_cfg['use_roi'] + roi_dir = mtmct_cfg['roi_dir'] + + mot_list_breaks = [] + cid_tid_dict = dict() + + output_dir = FLAGS.output_dir + if not os.path.exists(output_dir): os.makedirs(output_dir) + + seqs = os.listdir(mtmct_dir) + seqs.sort() + + for seq in seqs: + fpath = os.path.join(mtmct_dir, seq) + if os.path.isfile(fpath) and _is_valid_video(fpath): + ext = seq.split('.')[-1] + seq = seq.split('.')[-2] + print('ffmpeg processing of video {}'.format(fpath)) + frames_path = video2frames(video_path=fpath, outpath=mtmct_dir, frame_rate=25) + fpath = os.path.join(mtmct_dir, seq) + + if os.path.isdir(fpath) == False: + print('{} is not a image folder.'.format(fpath)) + continue + + mot_features_dict = predict_mtmct_seq(detector, reid_model, + seq, output_dir) + + cid = int(re.sub('[a-z,A-Z]', "", seq)) + tid_data, mot_list_break = trajectory_fusion( + mot_features_dict, + cid, + cid_bias, + use_zone=use_zone, + zone_path=zone_path) + mot_list_breaks.append(mot_list_break) + # single seq process + for line in tid_data: + tracklet = tid_data[line] + tid = tracklet['tid'] + if (cid, tid) not in cid_tid_dict: + cid_tid_dict[(cid, tid)] = tracklet + + map_tid = sub_cluster( + cid_tid_dict, + scene_cluster, + use_ff=use_ff, + use_rerank=use_rerank, + use_camera=use_camera, + use_st_filter=use_st_filter) + + pred_mtmct_file = os.path.join(output_dir, 'mtmct_result.txt') + if use_camera: + gen_res(pred_mtmct_file, scene_cluster, map_tid, mot_list_breaks) + else: + gen_res( + pred_mtmct_file, + scene_cluster, + map_tid, + mot_list_breaks, + use_roi=use_roi, + roi_dir=roi_dir) + + if FLAGS.save_images: + carame_results, cid_tid_fid_res = get_mtmct_matching_results( + pred_mtmct_file) + + crops_dir = os.path.join(output_dir, 'mtmct_crops') + save_mtmct_crops( + cid_tid_fid_res, images_dir=mtmct_dir, crops_dir=crops_dir) + + save_dir = os.path.join(output_dir, 'mtmct_vis') + save_mtmct_vis_results( + carame_results, + images_dir=mtmct_dir, + save_dir=save_dir, + save_videos=FLAGS.save_images) + + # evalution metrics + data_root_gt = os.path.join(mtmct_dir, '..', 'gt', 'gt.txt') + if os.path.exists(data_root_gt): + print_mtmct_result(data_root_gt, pred_mtmct_file) + + +def main(): + pred_config = PredictConfig(FLAGS.model_dir) + detector_func = 'SDE_Detector' + if pred_config.arch == 'PicoDet': + detector_func = 'SDE_DetectorPicoDet' + + detector = eval(detector_func)(pred_config, + FLAGS.model_dir, + device=FLAGS.device, + run_mode=FLAGS.run_mode, + batch_size=FLAGS.batch_size, + trt_min_shape=FLAGS.trt_min_shape, + trt_max_shape=FLAGS.trt_max_shape, + trt_opt_shape=FLAGS.trt_opt_shape, + trt_calib_mode=FLAGS.trt_calib_mode, + cpu_threads=FLAGS.cpu_threads, + enable_mkldnn=FLAGS.enable_mkldnn) + + pred_config = PredictConfig(FLAGS.reid_model_dir) + reid_model = SDE_ReID( + pred_config, + FLAGS.reid_model_dir, + device=FLAGS.device, + run_mode=FLAGS.run_mode, + batch_size=FLAGS.reid_batch_size, + trt_min_shape=FLAGS.trt_min_shape, + trt_max_shape=FLAGS.trt_max_shape, + trt_opt_shape=FLAGS.trt_opt_shape, + trt_calib_mode=FLAGS.trt_calib_mode, + cpu_threads=FLAGS.cpu_threads, + enable_mkldnn=FLAGS.enable_mkldnn) + + # predict from video file or camera video stream + if FLAGS.video_file is not None or FLAGS.camera_id != -1: + predict_video(detector, reid_model, FLAGS.camera_id) + + elif FLAGS.mtmct_dir is not None: + mtmct_cfg_file = FLAGS.mtmct_cfg + with open(mtmct_cfg_file) as f: + mtmct_cfg = yaml.safe_load(f) + predict_mtmct(detector, reid_model, FLAGS.mtmct_dir, mtmct_cfg) + + else: + # predict from image + img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file) + predict_image(detector, reid_model, img_list) + + if not FLAGS.run_benchmark: + detector.det_times.info(average=True) + reid_model.det_times.info(average=True) + else: + mode = FLAGS.run_mode + det_model_dir = FLAGS.model_dir + det_model_info = { + 'model_name': det_model_dir.strip('/').split('/')[-1], + 'precision': mode.split('_')[-1] + } + bench_log(detector, img_list, det_model_info, name='Det') + + reid_model_dir = FLAGS.reid_model_dir + reid_model_info = { + 'model_name': reid_model_dir.strip('/').split('/')[-1], + 'precision': mode.split('_')[-1] + } + bench_log(reid_model, img_list, reid_model_info, name='ReID') + + +if __name__ == '__main__': + paddle.enable_static() + parser = argsparser() + FLAGS = parser.parse_args() + print_arguments(FLAGS) + FLAGS.device = FLAGS.device.upper() + assert FLAGS.device in ['CPU', 'GPU', 'XPU' + ], "device should be CPU, GPU or XPU" + + main() diff --git a/deploy/pptracking/python/mtmct_cfg.yml b/deploy/pptracking/python/mtmct_cfg.yml new file mode 100644 index 0000000000000000000000000000000000000000..72acb9d249634144df37be11d346675ed51f8cd4 --- /dev/null +++ b/deploy/pptracking/python/mtmct_cfg.yml @@ -0,0 +1,17 @@ +# config for MTMCT +MTMCT: True +cameras_bias: + c041: 0 + c042: 0 +# 1.zone releated parameters +use_zone: True +zone_path: dataset/mot/aic21mtmct_vehicle/S06/zone +# 2.tricks parameters, can be used for other mtmct dataset +use_ff: True +use_rerank: True +# 3.camera releated parameters +use_camera: True +use_st_filter: False +# 4.zone releated parameters +use_roi: True +roi_dir: dataset/mot/aic21mtmct_vehicle/S06 diff --git a/deploy/pptracking/python/picodet_postprocess.py b/deploy/pptracking/python/picodet_postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..7df13f8278d13c51179c5502987926dec637bec4 --- /dev/null +++ b/deploy/pptracking/python/picodet_postprocess.py @@ -0,0 +1,227 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from scipy.special import softmax + + +def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): + """ + Args: + box_scores (N, 5): boxes in corner-form and probabilities. + iou_threshold: intersection over union threshold. + top_k: keep top_k results. If k <= 0, keep all the results. + candidate_size: only consider the candidates with the highest scores. + Returns: + picked: a list of indexes of the kept boxes + """ + scores = box_scores[:, -1] + boxes = box_scores[:, :-1] + picked = [] + indexes = np.argsort(scores) + indexes = indexes[-candidate_size:] + while len(indexes) > 0: + current = indexes[-1] + picked.append(current) + if 0 < top_k == len(picked) or len(indexes) == 1: + break + current_box = boxes[current, :] + indexes = indexes[:-1] + rest_boxes = boxes[indexes, :] + iou = iou_of( + rest_boxes, + np.expand_dims( + current_box, axis=0), ) + indexes = indexes[iou <= iou_threshold] + + return box_scores[picked, :] + + +def iou_of(boxes0, boxes1, eps=1e-5): + """Return intersection-over-union (Jaccard index) of boxes. + Args: + boxes0 (N, 4): ground truth boxes. + boxes1 (N or 1, 4): predicted boxes. + eps: a small number to avoid 0 as denominator. + Returns: + iou (N): IoU values. + """ + overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) + overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) + + overlap_area = area_of(overlap_left_top, overlap_right_bottom) + area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) + area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) + return overlap_area / (area0 + area1 - overlap_area + eps) + + +def area_of(left_top, right_bottom): + """Compute the areas of rectangles given two corners. + Args: + left_top (N, 2): left top corner. + right_bottom (N, 2): right bottom corner. + Returns: + area (N): return the area. + """ + hw = np.clip(right_bottom - left_top, 0.0, None) + return hw[..., 0] * hw[..., 1] + + +class PicoDetPostProcess(object): + """ + Args: + input_shape (int): network input image size + ori_shape (int): ori image shape of before padding + scale_factor (float): scale factor of ori image + enable_mkldnn (bool): whether to open MKLDNN + """ + + def __init__(self, + input_shape, + ori_shape, + scale_factor, + strides=[8, 16, 32, 64], + score_threshold=0.4, + nms_threshold=0.5, + nms_top_k=1000, + keep_top_k=100): + self.ori_shape = ori_shape + self.input_shape = input_shape + self.scale_factor = scale_factor + self.strides = strides + self.score_threshold = score_threshold + self.nms_threshold = nms_threshold + self.nms_top_k = nms_top_k + self.keep_top_k = keep_top_k + + def warp_boxes(self, boxes, ori_shape): + """Apply transform to boxes + """ + width, height = ori_shape[1], ori_shape[0] + n = len(boxes) + if n: + # warp points + xy = np.ones((n * 4, 3)) + xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( + n * 4, 2) # x1y1, x2y2, x1y2, x2y1 + # xy = xy @ M.T # transform + xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + xy = np.concatenate( + (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + # clip boxes + xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) + xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) + return xy.astype(np.float32) + else: + return boxes + + def __call__(self, scores, raw_boxes): + batch_size = raw_boxes[0].shape[0] + reg_max = int(raw_boxes[0].shape[-1] / 4 - 1) + out_boxes_num = [] + out_boxes_list = [] + for batch_id in range(batch_size): + # generate centers + decode_boxes = [] + select_scores = [] + for stride, box_distribute, score in zip(self.strides, raw_boxes, + scores): + box_distribute = box_distribute[batch_id] + score = score[batch_id] + # centers + fm_h = self.input_shape[0] / stride + fm_w = self.input_shape[1] / stride + h_range = np.arange(fm_h) + w_range = np.arange(fm_w) + ww, hh = np.meshgrid(w_range, h_range) + ct_row = (hh.flatten() + 0.5) * stride + ct_col = (ww.flatten() + 0.5) * stride + center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1) + + # box distribution to distance + reg_range = np.arange(reg_max + 1) + box_distance = box_distribute.reshape((-1, reg_max + 1)) + box_distance = softmax(box_distance, axis=1) + box_distance = box_distance * np.expand_dims(reg_range, axis=0) + box_distance = np.sum(box_distance, axis=1).reshape((-1, 4)) + box_distance = box_distance * stride + + # top K candidate + topk_idx = np.argsort(score.max(axis=1))[::-1] + topk_idx = topk_idx[:self.nms_top_k] + center = center[topk_idx] + score = score[topk_idx] + box_distance = box_distance[topk_idx] + + # decode box + decode_box = center + [-1, -1, 1, 1] * box_distance + + select_scores.append(score) + decode_boxes.append(decode_box) + + # nms + bboxes = np.concatenate(decode_boxes, axis=0) + confidences = np.concatenate(select_scores, axis=0) + picked_box_probs = [] + picked_labels = [] + for class_index in range(0, confidences.shape[1]): + probs = confidences[:, class_index] + mask = probs > self.score_threshold + probs = probs[mask] + if probs.shape[0] == 0: + continue + subset_boxes = bboxes[mask, :] + box_probs = np.concatenate( + [subset_boxes, probs.reshape(-1, 1)], axis=1) + box_probs = hard_nms( + box_probs, + iou_threshold=self.nms_threshold, + top_k=self.keep_top_k, ) + picked_box_probs.append(box_probs) + picked_labels.extend([class_index] * box_probs.shape[0]) + + if len(picked_box_probs) == 0: + out_boxes_list.append(np.empty((0, 4))) + out_boxes_num.append(0) + + else: + picked_box_probs = np.concatenate(picked_box_probs) + + # resize output boxes + picked_box_probs[:, :4] = self.warp_boxes( + picked_box_probs[:, :4], self.ori_shape[batch_id]) + im_scale = np.concatenate([ + self.scale_factor[batch_id][::-1], + self.scale_factor[batch_id][::-1] + ]) + picked_box_probs[:, :4] /= im_scale + # clas score box + out_boxes_list.append( + np.concatenate( + [ + np.expand_dims( + np.array(picked_labels), + axis=-1), np.expand_dims( + picked_box_probs[:, 4], axis=-1), + picked_box_probs[:, :4] + ], + axis=1)) + out_boxes_num.append(len(picked_labels)) + + out_boxes_list = np.concatenate(out_boxes_list, axis=0) + out_boxes_num = np.asarray(out_boxes_num).astype(np.int32) + return out_boxes_list, out_boxes_num diff --git a/deploy/pptracking/python/preprocess.py b/deploy/pptracking/python/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..2df5df9c3c3dc0dcb90b0224bf0d8e022a47903e --- /dev/null +++ b/deploy/pptracking/python/preprocess.py @@ -0,0 +1,258 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import numpy as np + + +def decode_image(im_file, im_info): + """read rgb image + Args: + im_file (str|np.ndarray): input can be image path or np.ndarray + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + if isinstance(im_file, str): + with open(im_file, 'rb') as f: + im_read = f.read() + data = np.frombuffer(im_read, dtype='uint8') + im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + else: + im = im_file + im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32) + im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32) + return im, im_info + + +class Resize(object): + """resize image by target_size and max_size + Args: + target_size (int): the target size of image + keep_ratio (bool): whether keep_ratio or not, default true + interp (int): method of resize + """ + + def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR): + if isinstance(target_size, int): + target_size = [target_size, target_size] + self.target_size = target_size + self.keep_ratio = keep_ratio + self.interp = interp + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + assert len(self.target_size) == 2 + assert self.target_size[0] > 0 and self.target_size[1] > 0 + im_channel = im.shape[2] + im_scale_y, im_scale_x = self.generate_scale(im) + im = cv2.resize( + im, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) + im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') + im_info['scale_factor'] = np.array( + [im_scale_y, im_scale_x]).astype('float32') + return im, im_info + + def generate_scale(self, im): + """ + Args: + im (np.ndarray): image (np.ndarray) + Returns: + im_scale_x: the resize ratio of X + im_scale_y: the resize ratio of Y + """ + origin_shape = im.shape[:2] + im_c = im.shape[2] + if self.keep_ratio: + im_size_min = np.min(origin_shape) + im_size_max = np.max(origin_shape) + target_size_min = np.min(self.target_size) + target_size_max = np.max(self.target_size) + im_scale = float(target_size_min) / float(im_size_min) + if np.round(im_scale * im_size_max) > target_size_max: + im_scale = float(target_size_max) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + resize_h, resize_w = self.target_size + im_scale_y = resize_h / float(origin_shape[0]) + im_scale_x = resize_w / float(origin_shape[1]) + return im_scale_y, im_scale_x + + +class NormalizeImage(object): + """normalize image + Args: + mean (list): im - mean + std (list): im / std + is_scale (bool): whether need im / 255 + is_channel_first (bool): if True: image shape is CHW, else: HWC + """ + + def __init__(self, mean, std, is_scale=True): + self.mean = mean + self.std = std + self.is_scale = is_scale + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.astype(np.float32, copy=False) + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + + if self.is_scale: + im = im / 255.0 + im -= mean + im /= std + return im, im_info + + +class Permute(object): + """permute image + Args: + to_bgr (bool): whether convert RGB to BGR + channel_first (bool): whether convert HWC to CHW + """ + + def __init__(self, ): + super(Permute, self).__init__() + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.transpose((2, 0, 1)).copy() + return im, im_info + + +class PadStride(object): + """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config + Args: + stride (bool): model with FPN need image shape % stride == 0 + """ + + def __init__(self, stride=0): + self.coarsest_stride = stride + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + coarsest_stride = self.coarsest_stride + if coarsest_stride <= 0: + return im, im_info + im_c, im_h, im_w = im.shape + pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) + pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) + padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = im + return padding_im, im_info + + +class LetterBoxResize(object): + def __init__(self, target_size): + """ + Resize image to target size, convert normalized xywh to pixel xyxy + format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]). + Args: + target_size (int|list): image target size. + """ + super(LetterBoxResize, self).__init__() + if isinstance(target_size, int): + target_size = [target_size, target_size] + self.target_size = target_size + + def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)): + # letterbox: resize a rectangular image to a padded rectangular + shape = img.shape[:2] # [height, width] + ratio_h = float(height) / shape[0] + ratio_w = float(width) / shape[1] + ratio = min(ratio_h, ratio_w) + new_shape = (round(shape[1] * ratio), + round(shape[0] * ratio)) # [width, height] + padw = (width - new_shape[0]) / 2 + padh = (height - new_shape[1]) / 2 + top, bottom = round(padh - 0.1), round(padh + 0.1) + left, right = round(padw - 0.1), round(padw + 0.1) + + img = cv2.resize( + img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border + img = cv2.copyMakeBorder( + img, top, bottom, left, right, cv2.BORDER_CONSTANT, + value=color) # padded rectangular + return img, ratio, padw, padh + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + assert len(self.target_size) == 2 + assert self.target_size[0] > 0 and self.target_size[1] > 0 + height, width = self.target_size + h, w = im.shape[:2] + im, ratio, padw, padh = self.letterbox(im, height=height, width=width) + + new_shape = [round(h * ratio), round(w * ratio)] + im_info['im_shape'] = np.array(new_shape, dtype=np.float32) + im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32) + return im, im_info + + +def preprocess(im, preprocess_ops): + # process image by preprocess_ops + im_info = { + 'scale_factor': np.array( + [1., 1.], dtype=np.float32), + 'im_shape': None, + } + im, im_info = decode_image(im, im_info) + for operator in preprocess_ops: + im, im_info = operator(im, im_info) + return im, im_info diff --git a/deploy/pptracking/python/utils.py b/deploy/pptracking/python/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..241d5416434ffd733430cc111ffd7ae00c9d352d --- /dev/null +++ b/deploy/pptracking/python/utils.py @@ -0,0 +1,288 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import time +import os +import sys +import ast +import argparse + + +def argsparser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--model_dir", + type=str, + default=None, + help=("Directory include:'model.pdiparams', 'model.pdmodel', " + "'infer_cfg.yml', created by tools/export_model.py."), + required=True) + parser.add_argument( + "--image_file", type=str, default=None, help="Path of image file.") + parser.add_argument( + "--image_dir", + type=str, + default=None, + help="Dir of image file, `image_file` has a higher priority.") + parser.add_argument( + "--batch_size", type=int, default=1, help="batch_size for inference.") + parser.add_argument( + "--video_file", + type=str, + default=None, + help="Path of video file, `video_file` or `camera_id` has a highest priority." + ) + parser.add_argument( + "--camera_id", + type=int, + default=-1, + help="device id of camera to predict.") + parser.add_argument( + "--threshold", type=float, default=0.5, help="Threshold of score.") + parser.add_argument( + "--output_dir", + type=str, + default="output", + help="Directory of output visualization files.") + parser.add_argument( + "--run_mode", + type=str, + default='fluid', + help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)") + parser.add_argument( + "--device", + type=str, + default='cpu', + help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU." + ) + parser.add_argument( + "--run_benchmark", + type=ast.literal_eval, + default=False, + help="Whether to predict a image_file repeatedly for benchmark") + parser.add_argument( + "--enable_mkldnn", + type=ast.literal_eval, + default=False, + help="Whether use mkldnn with CPU.") + parser.add_argument( + "--cpu_threads", type=int, default=1, help="Num of threads with CPU.") + parser.add_argument( + "--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.") + parser.add_argument( + "--trt_max_shape", + type=int, + default=1280, + help="max_shape for TensorRT.") + parser.add_argument( + "--trt_opt_shape", + type=int, + default=640, + help="opt_shape for TensorRT.") + parser.add_argument( + "--trt_calib_mode", + type=bool, + default=False, + help="If the model is produced by TRT offline quantitative " + "calibration, trt_calib_mode need to set True.") + parser.add_argument( + '--save_images', + action='store_true', + help='Save visualization image results.') + parser.add_argument( + '--save_mot_txts', + action='store_true', + help='Save tracking results (txt).') + parser.add_argument( + '--scaled', + type=bool, + default=False, + help="Whether coords after detector outputs are scaled, False in JDE YOLOv3 " + "True in general detector.") + parser.add_argument( + "--reid_model_dir", + type=str, + default=None, + help=("Directory include:'model.pdiparams', 'model.pdmodel', " + "'infer_cfg.yml', created by tools/export_model.py.")) + parser.add_argument( + "--reid_batch_size", + type=int, + default=50, + help="max batch_size for reid model inference.") + parser.add_argument( + "--do_entrance_counting", + action='store_true', + help="Whether counting the numbers of identifiers entering " + "or getting out from the entrance. Note that only support one-class" + "counting, multi-class counting is coming soon.") + parser.add_argument( + "--secs_interval", + type=int, + default=2, + help="The seconds interval to count after tracking") + parser.add_argument( + "--draw_center_traj", + action='store_true', + help="Whether drawing the trajectory of center") + parser.add_argument( + "--mtmct_dir", + type=str, + default=None, + help="The MTMCT scene video folder.") + parser.add_argument( + "--mtmct_cfg", type=str, default=None, help="The MTMCT config.") + return parser + + +class Times(object): + def __init__(self): + self.time = 0. + # start time + self.st = 0. + # end time + self.et = 0. + + def start(self): + self.st = time.time() + + def end(self, repeats=1, accumulative=True): + self.et = time.time() + if accumulative: + self.time += (self.et - self.st) / repeats + else: + self.time = (self.et - self.st) / repeats + + def reset(self): + self.time = 0. + self.st = 0. + self.et = 0. + + def value(self): + return round(self.time, 4) + + +class Timer(Times): + def __init__(self): + super(Timer, self).__init__() + self.preprocess_time_s = Times() + self.inference_time_s = Times() + self.postprocess_time_s = Times() + self.img_num = 0 + + def info(self, average=False): + total_time = self.preprocess_time_s.value( + ) + self.inference_time_s.value() + self.postprocess_time_s.value() + total_time = round(total_time, 4) + print("------------------ Inference Time Info ----------------------") + print("total_time(ms): {}, img_num: {}".format(total_time * 1000, + self.img_num)) + preprocess_time = round( + self.preprocess_time_s.value() / max(1, self.img_num), + 4) if average else self.preprocess_time_s.value() + postprocess_time = round( + self.postprocess_time_s.value() / max(1, self.img_num), + 4) if average else self.postprocess_time_s.value() + inference_time = round(self.inference_time_s.value() / + max(1, self.img_num), + 4) if average else self.inference_time_s.value() + + average_latency = total_time / max(1, self.img_num) + qps = 0 + if total_time > 0: + qps = 1 / average_latency + print("average latency time(ms): {:.2f}, QPS: {:2f}".format( + average_latency * 1000, qps)) + print( + "preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}". + format(preprocess_time * 1000, inference_time * 1000, + postprocess_time * 1000)) + + def report(self, average=False): + dic = {} + dic['preprocess_time_s'] = round( + self.preprocess_time_s.value() / max(1, self.img_num), + 4) if average else self.preprocess_time_s.value() + dic['postprocess_time_s'] = round( + self.postprocess_time_s.value() / max(1, self.img_num), + 4) if average else self.postprocess_time_s.value() + dic['inference_time_s'] = round( + self.inference_time_s.value() / max(1, self.img_num), + 4) if average else self.inference_time_s.value() + dic['img_num'] = self.img_num + total_time = self.preprocess_time_s.value( + ) + self.inference_time_s.value() + self.postprocess_time_s.value() + dic['total_time_s'] = round(total_time, 4) + return dic + + +def get_current_memory_mb(): + """ + It is used to Obtain the memory usage of the CPU and GPU during the running of the program. + And this function Current program is time-consuming. + """ + import pynvml + import psutil + import GPUtil + gpu_id = int(os.environ.get('CUDA_VISIBLE_DEVICES', 0)) + + pid = os.getpid() + p = psutil.Process(pid) + info = p.memory_full_info() + cpu_mem = info.uss / 1024. / 1024. + gpu_mem = 0 + gpu_percent = 0 + gpus = GPUtil.getGPUs() + if gpu_id is not None and len(gpus) > 0: + gpu_percent = gpus[gpu_id].load + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(0) + meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle) + gpu_mem = meminfo.used / 1024. / 1024. + return round(cpu_mem, 4), round(gpu_mem, 4), round(gpu_percent, 4) + + +def video2frames(video_path, outpath, frame_rate=25, **kargs): + def _dict2str(kargs): + cmd_str = '' + for k, v in kargs.items(): + cmd_str += (' ' + str(k) + ' ' + str(v)) + return cmd_str + + ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error '] + vid_name = os.path.basename(video_path).split('.')[0] + out_full_path = os.path.join(outpath, vid_name) + + if not os.path.exists(out_full_path): + os.makedirs(out_full_path) + + # video file name + outformat = os.path.join(out_full_path, '%05d.jpg') + + cmd = ffmpeg + cmd = ffmpeg + [ + ' -i ', video_path, ' -r ', str(frame_rate), ' -f image2 ', outformat + ] + cmd = ''.join(cmd) + _dict2str(kargs) + + if os.system(cmd) != 0: + raise RuntimeError('ffmpeg process video: {} error'.format(video_path)) + sys.exit(-1) + + sys.stdout.flush() + return out_full_path + + +def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', '.flv')): + return f.lower().endswith(extensions) diff --git a/deploy/pptracking/python/visualize.py b/deploy/pptracking/python/visualize.py new file mode 100644 index 0000000000000000000000000000000000000000..4320b405e0ab86309293d182736af064825efb02 --- /dev/null +++ b/deploy/pptracking/python/visualize.py @@ -0,0 +1,294 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import division + +import os +import cv2 +import numpy as np +from PIL import Image, ImageDraw +from collections import deque + + +def visualize_box_mask(im, results, labels, threshold=0.5): + """ + Args: + im (str/np.ndarray): path of image/np.ndarray read by cv2 + results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + labels (list): labels:['class1', ..., 'classn'] + threshold (float): Threshold of score. + Returns: + im (PIL.Image.Image): visualized image + """ + if isinstance(im, str): + im = Image.open(im).convert('RGB') + else: + im = Image.fromarray(im) + if 'boxes' in results and len(results['boxes']) > 0: + im = draw_box(im, results['boxes'], labels, threshold=threshold) + return im + + +def get_color_map_list(num_classes): + """ + Args: + num_classes (int): number of class + Returns: + color_map (list): RGB color list + """ + color_map = num_classes * [0, 0, 0] + for i in range(0, num_classes): + j = 0 + lab = i + while lab: + color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) + color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) + color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) + j += 1 + lab >>= 3 + color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)] + return color_map + + +def draw_box(im, np_boxes, labels, threshold=0.5): + """ + Args: + im (PIL.Image.Image): PIL image + np_boxes (np.ndarray): shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + labels (list): labels:['class1', ..., 'classn'] + threshold (float): threshold of box + Returns: + im (PIL.Image.Image): visualized image + """ + draw_thickness = min(im.size) // 320 + draw = ImageDraw.Draw(im) + clsid2color = {} + color_list = get_color_map_list(len(labels)) + expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1) + np_boxes = np_boxes[expect_boxes, :] + + for dt in np_boxes: + clsid, bbox, score = int(dt[0]), dt[2:], dt[1] + if clsid not in clsid2color: + clsid2color[clsid] = color_list[clsid] + color = tuple(clsid2color[clsid]) + + if len(bbox) == 4: + xmin, ymin, xmax, ymax = bbox + print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],' + 'right_bottom:[{:.2f},{:.2f}]'.format( + int(clsid), score, xmin, ymin, xmax, ymax)) + # draw bbox + draw.line( + [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), + (xmin, ymin)], + width=draw_thickness, + fill=color) + elif len(bbox) == 8: + x1, y1, x2, y2, x3, y3, x4, y4 = bbox + draw.line( + [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)], + width=2, + fill=color) + xmin = min(x1, x2, x3, x4) + ymin = min(y1, y2, y3, y4) + + # draw label + text = "{} {:.4f}".format(labels[clsid], score) + tw, th = draw.textsize(text) + draw.rectangle( + [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color) + draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) + return im + + +def get_color(idx): + idx = idx * 3 + color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) + return color + + +def plot_tracking(image, + tlwhs, + obj_ids, + scores=None, + frame_id=0, + fps=0., + ids2names=[], + do_entrance_counting=False, + entrance=None): + im = np.ascontiguousarray(np.copy(image)) + im_h, im_w = im.shape[:2] + + text_scale = max(1, image.shape[1] / 1600.) + text_thickness = 2 + line_thickness = max(1, int(image.shape[1] / 500.)) + + cv2.putText( + im, + 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)), + (0, int(15 * text_scale)), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=2) + + for i, tlwh in enumerate(tlwhs): + x1, y1, w, h = tlwh + intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) + obj_id = int(obj_ids[i]) + id_text = '{}'.format(int(obj_id)) + if ids2names != []: + assert len( + ids2names) == 1, "plot_tracking only supports single classes." + id_text = '{}_'.format(ids2names[0]) + id_text + _line_thickness = 1 if obj_id <= 0 else line_thickness + color = get_color(abs(obj_id)) + cv2.rectangle( + im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) + cv2.putText( + im, + id_text, (intbox[0], intbox[1] - 10), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=text_thickness) + + if scores is not None: + text = '{:.2f}'.format(float(scores[i])) + cv2.putText( + im, + text, (intbox[0], intbox[1] + 10), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 255, 255), + thickness=text_thickness) + + if do_entrance_counting: + entrance_line = tuple(map(int, entrance)) + cv2.rectangle( + im, + entrance_line[0:2], + entrance_line[2:4], + color=(0, 255, 255), + thickness=line_thickness) + return im + + +def plot_tracking_dict(image, + num_classes, + tlwhs_dict, + obj_ids_dict, + scores_dict, + frame_id=0, + fps=0., + ids2names=[], + do_entrance_counting=False, + entrance=None, + records=None, + center_traj=None): + im = np.ascontiguousarray(np.copy(image)) + im_h, im_w = im.shape[:2] + + text_scale = max(1, image.shape[1] / 1600.) + text_thickness = 2 + line_thickness = max(1, int(image.shape[1] / 500.)) + + if num_classes == 1: + start = records[-1].find('Total') + end = records[-1].find('In') + cv2.putText( + im, + records[-1][start:end], (0, int(40 * text_scale)), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=2) + + if num_classes == 1 and do_entrance_counting: + entrance_line = tuple(map(int, entrance)) + cv2.rectangle( + im, + entrance_line[0:2], + entrance_line[2:4], + color=(0, 255, 255), + thickness=line_thickness) + # find start location for entrance counting data + start = records[-1].find('In') + cv2.putText( + im, + records[-1][start:-1], (0, int(60 * text_scale)), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=2) + + for cls_id in range(num_classes): + tlwhs = tlwhs_dict[cls_id] + obj_ids = obj_ids_dict[cls_id] + scores = scores_dict[cls_id] + cv2.putText( + im, + 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)), + (0, int(15 * text_scale)), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=2) + + record_id = set() + for i, tlwh in enumerate(tlwhs): + x1, y1, w, h = tlwh + intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) + center = tuple(map(int, (x1 + w / 2., y1 + h / 2.))) + obj_id = int(obj_ids[i]) + if center_traj is not None: + record_id.add(obj_id) + if obj_id not in center_traj[cls_id]: + center_traj[cls_id][obj_id] = deque(maxlen=30) + center_traj[cls_id][obj_id].append(center) + + id_text = '{}'.format(int(obj_id)) + if ids2names != []: + id_text = '{}_{}'.format(ids2names[cls_id], id_text) + else: + id_text = 'class{}_{}'.format(cls_id, id_text) + + _line_thickness = 1 if obj_id <= 0 else line_thickness + color = get_color(abs(obj_id)) + cv2.rectangle( + im, + intbox[0:2], + intbox[2:4], + color=color, + thickness=line_thickness) + cv2.putText( + im, + id_text, (intbox[0], intbox[1] - 10), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=text_thickness) + + if scores is not None: + text = '{:.2f}'.format(float(scores[i])) + cv2.putText( + im, + text, (intbox[0], intbox[1] + 10), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 255, 255), + thickness=text_thickness) + if center_traj is not None: + for traj in center_traj: + for i in traj.keys(): + if i not in record_id: + continue + for point in traj[i]: + cv2.circle(im, point, 3, (0, 0, 255), -1) + return im