未验证 提交 76d3274a 编写于 作者: W wangguanzhong 提交者: GitHub

add pptracking (#4661)

上级 48761c4a
# 实时跟踪系统PP-Tracking
PP-Tracking是基于飞桨深度学习框架的业界首个开源实时跟踪系统。针对实际业务的难点痛点,PP-Tracking内置行人车辆跟踪、跨镜头跟踪、多类别跟踪、小目标跟踪及流量计数等能力与产业应用,同时提供可视化开发界面。模型集成多目标跟踪,目标检测,ReID轻量级算法,进一步提升PP-Tracking在服务器端部署性能。同时支持python,C++部署,适配Linux,Nvidia Jetson多平台环境。
<div width="1000" align="center">
<img src="../../docs/images/pptracking.png"/>
</div>
<div width="1000" align="center">
<img src="../../docs/images/pptracking-demo.gif"/>
<br>
视频来源:VisDrone2021, BDD100K开源数据集</div>
</div>
### 一、快速开始
PP-Tracking提供了简洁的可视化界面,无需开发即可实现多种跟踪功能,可以参考[PP-Tracking可视化界面使用文档]()快速上手体验
### 二、算法介绍
PP-Tracking集成了多目标跟踪,目标检测,ReID轻量级算法,提升跟踪系统实时性能。多目标跟踪算法基于FairMOT进行优化,实现了服务器端轻量级模型,同时基于不同应用场景提供了针对性的预训练模型。
模型训练评估方法请参考[多目标跟踪快速开始](../../configs/mot/README_cn.md#快速开始)
PP-Tracking中提供的多场景预训练模型及导出模型列表如下:
| 场景 | 数据集 | 精度(MOTA) | NX模型预测速度(FPS) | 配置文件 | 模型权重 | 预测部署模型 |
| :---------:|:--------------- | :-------: | :------: | :------: |:---: | :---: |
| 行人跟踪 | MOT17 | 65.3 | 23.9 | [配置文件](../../configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.tar) |
| 行人小目标跟踪 | VisDrone-pedestrian | 40.5 | 8.35 | [配置文件](../../configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.tar) |
| 车辆跟踪 | BDD100k-vehicle | 32.6 | 24.3 | [配置文件](../../configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.tar) |
| 车辆小目标跟踪 | VisDrone-vehicle | 39.8 | 22.8 | [配置文件](../../configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.tar)
| 多类别跟踪 | BDD100k | - | 12.5 | [配置文件]() | [下载链接]() | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100k_mcmot.tar) |
| 多类别小目标跟踪 | VisDrone | 20.4 | 6.74 | [配置文件](../../configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.tar) |
**注:**
1. 模型预测速度为TensorRT FP16速度,测试环境为CUDA 10.2,JETPACK 4.5.1,TensorRT 7.1
2. 更多跟踪模型请参考[多目标跟踪模型库](../../configs/mot/README_cn.md#模型库)
检测模型使用轻量级特色模型PP-PicoDet,具体请参考[PP-PicoDet文档](../../configs/picodet)
ReID模型使用超轻量骨干网络模型PP-LCNet, 具体请参考[PP-LCNet模型介绍](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models/PP-LCNet.md)
### 三、Python端预测部署
PP-Tracking 使用python预测部署教程请参考[PP-Tracking python部署文档](python/README.md)
### 四、C++端预测部署
PP-Tracking 使用c++预测部署教程请参考[PP-Tracking c++部署文档](cpp/README.md)
cmake_minimum_required(VERSION 3.0)
project(PaddleObjectDetector CXX C)
option(WITH_MKL "Compile demo with MKL/OpenBlas support,defaultuseMKL." ON)
option(WITH_GPU "Compile demo with GPU/CPU, default use CPU." ON)
option(WITH_TENSORRT "Compile demo with TensorRT." OFF)
SET(PADDLE_DIR "" CACHE PATH "Location of libraries")
SET(PADDLE_LIB_NAME "" CACHE STRING "libpaddle_inference")
SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
SET(CUDA_LIB "" CACHE PATH "Location of libraries")
SET(CUDNN_LIB "" CACHE PATH "Location of libraries")
SET(TENSORRT_INC_DIR "" CACHE PATH "Compile demo with TensorRT")
SET(TENSORRT_LIB_DIR "" CACHE PATH "Compile demo with TensorRT")
include(cmake/yaml-cpp.cmake)
include_directories("${CMAKE_SOURCE_DIR}/")
include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include")
link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib")
set(SRCS src/main.cc src/preprocess_op.cc src/pipeline.cc src/jde_predictor.cc src/sde_predictor.cc src/tracker.cc src/trajectory.cc src/lapjv.cpp src/postprocess.cc)
macro(safe_set_static_flag)
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif(${flag_var} MATCHES "/MD")
endforeach(flag_var)
endmacro()
if (WITH_MKL)
ADD_DEFINITIONS(-DUSE_MKL)
endif()
if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "")
message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir")
endif()
message("PADDLE_DIR IS:" ${PADDLE_DIR})
if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "")
message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv")
endif()
include_directories("${CMAKE_SOURCE_DIR}/")
include_directories("${PADDLE_DIR}/")
include_directories("${PADDLE_DIR}/third_party/install/protobuf/include")
include_directories("${PADDLE_DIR}/third_party/install/glog/include")
include_directories("${PADDLE_DIR}/third_party/install/gflags/include")
include_directories("${PADDLE_DIR}/third_party/install/xxhash/include")
if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include")
include_directories("${PADDLE_DIR}/third_party/install/snappy/include")
endif()
if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
endif()
include_directories("${PADDLE_DIR}/third_party/boost")
include_directories("${PADDLE_DIR}/third_party/eigen3")
if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
link_directories("${PADDLE_DIR}/third_party/install/snappy/lib")
endif()
if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
endif()
link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib")
link_directories("${PADDLE_DIR}/paddle/lib/")
link_directories("${CMAKE_CURRENT_BINARY_DIR}")
if (WIN32)
include_directories("${PADDLE_DIR}/paddle/fluid/inference")
include_directories("${PADDLE_DIR}/paddle/include")
link_directories("${PADDLE_DIR}/paddle/fluid/inference")
find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH)
else ()
find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/share/OpenCV NO_DEFAULT_PATH)
include_directories("${PADDLE_DIR}/paddle/include")
link_directories("${PADDLE_DIR}/paddle/lib")
endif ()
include_directories(${OpenCV_INCLUDE_DIRS})
if (WIN32)
add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -o2 -fopenmp -std=c++11")
set(CMAKE_STATIC_LIBRARY_PREFIX "")
endif()
# TODO let users define cuda lib path
if (WITH_GPU)
if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "")
message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda-8.0/lib64")
endif()
if (NOT WIN32)
if (NOT DEFINED CUDNN_LIB)
message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn_v7.4/cuda/lib64")
endif()
endif(NOT WIN32)
endif()
if (NOT WIN32)
if (WITH_TENSORRT AND WITH_GPU)
include_directories("${TENSORRT_INC_DIR}/")
link_directories("${TENSORRT_LIB_DIR}/")
endif()
endif(NOT WIN32)
if (NOT WIN32)
set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph")
if(EXISTS ${NGRAPH_PATH})
include(GNUInstallDirs)
include_directories("${NGRAPH_PATH}/include")
link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}")
set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
endif()
if(WITH_MKL)
include_directories("${PADDLE_DIR}/third_party/install/mklml/include")
if (WIN32)
set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib
${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib)
else ()
set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
execute_process(COMMAND cp -r ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} /usr/lib)
endif ()
set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn")
if(EXISTS ${MKLDNN_PATH})
include_directories("${MKLDNN_PATH}/include")
if (WIN32)
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
else ()
set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
endif ()
endif()
else()
set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()
if (WIN32)
if(EXISTS "${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
set(DEPS
${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
set(DEPS
${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()
endif()
if (WIN32)
set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
else()
set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
message("PADDLE_LIB_NAME:" ${PADDLE_LIB_NAME})
message("DEPS:" $DEPS)
if (NOT WIN32)
set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
glog gflags protobuf z xxhash yaml-cpp
)
if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
set(DEPS ${DEPS} snappystream)
endif()
if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
set(DEPS ${DEPS} snappy)
endif()
else()
set(DEPS ${DEPS}
${MATH_LIB} ${MKLDNN_LIB}
glog gflags_static libprotobuf xxhash libyaml-cppmt)
set(DEPS ${DEPS} libcmt shlwapi)
if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
set(DEPS ${DEPS} snappy)
endif()
if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
set(DEPS ${DEPS} snappystream)
endif()
endif(NOT WIN32)
if(WITH_GPU)
if(NOT WIN32)
if (WITH_TENSORRT)
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
else()
set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
set(DEPS ${DEPS} ${CUDNN_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX})
endif()
endif()
if (NOT WIN32)
set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread")
set(DEPS ${DEPS} ${EXTERNAL_LIB})
endif()
set(DEPS ${DEPS} ${OpenCV_LIBS})
add_executable(main ${SRCS})
ADD_DEPENDENCIES(main ext-yaml-cpp)
message("DEPS:" $DEPS)
target_link_libraries(main ${DEPS})
if (WIN32 AND WITH_MKL)
add_custom_command(TARGET main POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll
)
endif()
if (WIN32)
add_custom_command(TARGET main POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll
)
endif()
# C++端预测部署
在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 预测引擎使用了AnalysisPredictor,专门针对推理进行了优化,该引擎可以对模型进行多项图优化,减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求,我们提供了独立于PaddleDetection的预测脚本,方便用户直接集成部署。当前C++部署支持基于Fairmot的单镜头类别预测部署,并支持人流量统计、出入口计数功能。
主要包含三个步骤:
- 准备环境
- 导出预测模型
- C++预测
## 一、准备环境
环境要求:
- GCC 8.2
- CUDA 10.1/10.2/11.1; CUDNN 7.6/8.1
- CMake 3.0+
- TensorRT 6/7
NVIDIA Jetson用户请参考[Jetson平台编译指南](../../cpp/Jetson_build.md#jetson环境搭建)完成JetPack安装
### 1. 下载代码
```
git clone https://github.com/PaddlePaddle/PaddleDetection.git
# C++部署代码与其他目录代码独立
cd deploy/pptracking/cpp
```
### 2. 下载或编译PaddlePaddle C++预测库
请根据环境选择适当的预测库进行下载,参考[C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html)
下载并解压后`./paddle_inference`目录包含内容为:
```
paddle_inference
├── paddle # paddle核心库和头文件
|
├── third_party # 第三方依赖库和头文件
|
└── version.txt # 版本和编译信息
```
**注意:** 如果用户环境与官网提供环境不一致(如cuda 、cudnn、tensorrt版本不一致等),或对飞桨源代码有修改需求,或希望进行定制化构建,可参考[文档](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)自行源码编译预测库。
### 3. 编译
编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下:
```
# 是否使用GPU(即是否使用 CUDA)
WITH_GPU=ON
# 是否使用MKL or openblas,TX2需要设置为OFF
WITH_MKL=OFF
# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
WITH_TENSORRT=ON
# TensorRT 的include路径
TENSORRT_INC_DIR=/path/to/TensorRT/include
# TensorRT 的lib路径
TENSORRT_LIB_DIR=/path/to/TensorRT/lib
# Paddle 预测库路径
PADDLE_DIR=/path/to/paddle_inference/
# Paddle 预测库名称
PADDLE_LIB_NAME=libpaddle_inference
# CUDA 的 lib 路径
CUDA_LIB=/path/to/cuda/lib
# CUDNN 的 lib 路径
CUDNN_LIB=/path/to/cudnn/lib
# OPENCV路径
OPENCV_DIR=/path/to/opencv
```
修改脚本设置好主要参数后,执行```build.sh```脚本:
```
sh ./scripts/build.sh
```
**注意:**
1. `TX2`平台的`CUDA``CUDNN`需要通过`JetPack`安装。
2. 已提供linux和tx2平台的opencv下载方式,其他环境请自行安装[opencv](https://opencv.org/)
## 二、导出预测模型
将训练保存的权重导出为预测库需要的模型格式,使用PaddleDetection下的```tools/export_model.py```导出模型
```
python tools/export_model.py -c configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams
```
预测模型会默认导出到```output_inference/fairmot_hrnetv2_w18_dlafpn_30e_576x320```目录下,包括```infer_cfg.yml```, ```model.pdiparams```, ```model.pdiparams.info```, ```model.pdmodel```
导出模型也可以通过[预测模型列表]()直接下载使用
## 三、C++预测
完成以上步骤后,可以通过```build/main```进行预测,参数列表如下:
| 参数 | 说明 |
| ---- | ---- |
| --track_model_dir | 导出的跟踪预测模型所在路径 |
| --video_file | 要预测的视频文件路径 |
| --device | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --output_dir | 输出图片所在的文件夹, 默认为output |
| --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
| --cpu_threads | 设置cpu线程数,默认为1 |
| --do_entrance_counting | 是否进行出入口流量统计,默认为否 |
| --save_result | 是否保存跟踪结果 |
样例一:
```shell
# 使用CPU测试视频 `test.mp4` , 模型和测试视频均移至`build`目录下
./main --track_model_dir=./fairmot_hrnetv2_w18_dlafpn_30e_576x320 --video_file=test.mp4
# 视频可视化预测结果默认保存在当前目录下output/test.mp4文件中
```
样例二:
```shell
# 使用GPU测试视频 `test.mp4` , 模型和测试视频均移至`build`目录下,实现出入口计数功能,并保存跟踪结果
./main -video_file=test.mp4 -track_model_dir=./fairmot_dla34_30e_1088x608/ --device=gpu --do_entrance_counting=True --save_result=True
# 视频可视化预测结果默认保存在当前目录下`output/test.mp4`中
# 跟踪结果保存在`output/mot_output.txt`中
# 计数结果保存在`output/flow_statistic.txt`中
```
find_package(Git REQUIRED)
include(ExternalProject)
message("${CMAKE_BUILD_TYPE}")
ExternalProject_Add(
ext-yaml-cpp
URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip
URL_MD5 9542d6de397d1fbd649ed468cb5850e6
CMAKE_ARGS
-DYAML_CPP_BUILD_TESTS=OFF
-DYAML_CPP_BUILD_TOOLS=OFF
-DYAML_CPP_INSTALL=OFF
-DYAML_CPP_BUILD_CONTRIB=OFF
-DMSVC_SHARED_RT=OFF
-DBUILD_SHARED_LIBS=OFF
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
-DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp"
# Disable install step
INSTALL_COMMAND ""
LOG_DOWNLOAD ON
LOG_BUILD 1
)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include "yaml-cpp/yaml.h"
#ifdef _WIN32
#define OS_PATH_SEP "\\"
#else
#define OS_PATH_SEP "/"
#endif
namespace PaddleDetection {
// Inference model configuration parser
class ConfigPaser {
public:
ConfigPaser() {}
~ConfigPaser() {}
bool load_config(const std::string& model_dir,
const std::string& cfg = "infer_cfg.yml") {
// Load as a YAML::Node
YAML::Node config;
config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg);
// Get runtime mode : fluid, trt_fp16, trt_fp32
if (config["mode"].IsDefined()) {
mode_ = config["mode"].as<std::string>();
} else {
std::cerr << "Please set mode, "
<< "support value : fluid/trt_fp16/trt_fp32." << std::endl;
return false;
}
// Get model arch: FairMot or YOLO/Picodet/LCNet for DeepSort
if (config["arch"].IsDefined()) {
arch_ = config["arch"].as<std::string>();
} else {
std::cerr << "Please set model arch,"
<< "support value : FairMot, YOLO, PicoDet, LCNet etc"
<< std::endl;
return false;
}
// Get min_subgraph_size for tensorrt
if (config["min_subgraph_size"].IsDefined()) {
min_subgraph_size_ = config["min_subgraph_size"].as<int>();
} else {
std::cerr << "Please set min_subgraph_size." << std::endl;
return false;
}
// Get draw_threshold for visualization
if (config["draw_threshold"].IsDefined()) {
draw_threshold_ = config["draw_threshold"].as<float>();
} else {
std::cerr << "Please set draw_threshold." << std::endl;
return false;
}
// Get Preprocess for preprocessing
if (config["Preprocess"].IsDefined()) {
preprocess_info_ = config["Preprocess"];
} else {
std::cerr << "Please set Preprocess." << std::endl;
return false;
}
// Get label_list for visualization
if (config["label_list"].IsDefined()) {
label_list_ = config["label_list"].as<std::vector<std::string>>();
} else {
std::cerr << "Please set label_list." << std::endl;
return false;
}
// Get use_dynamic_shape for TensorRT
if (config["use_dynamic_shape"].IsDefined()) {
use_dynamic_shape_ = config["use_dynamic_shape"].as<bool>();
} else {
std::cerr << "Please set use_dynamic_shape." << std::endl;
return false;
}
// Get conf_thresh for tracker
if (config["tracker"].IsDefined()) {
if (config["tracker"]["conf_thres"].IsDefined()) {
conf_thresh_ = config["tracker"]["conf_thres"].as<float>();
} else {
std::cerr << "Please set conf_thres in tracker." << std::endl;
return false;
}
}
// Get NMS for postprocess
if (config["NMS"].IsDefined()) {
nms_info_ = config["NMS"];
}
// Get fpn_stride in PicoDet
if (config["fpn_stride"].IsDefined()) {
fpn_stride_.clear();
for (auto item : config["fpn_stride"]) {
fpn_stride_.emplace_back(item.as<int>());
}
}
return true;
}
std::string mode_;
float draw_threshold_;
std::string arch_;
int min_subgraph_size_;
YAML::Node preprocess_info_;
YAML::Node nms_info_;
std::vector<std::string> label_list_;
std::vector<int> fpn_stride_;
bool use_dynamic_shape_;
float conf_thresh_;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctime>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "paddle_inference_api.h" // NOLINT
#include "include/config_parser.h"
#include "include/preprocess_op.h"
#include "include/utils.h"
using namespace paddle_infer; // NOLINT
namespace PaddleDetection {
class JDEPredictor {
public:
explicit JDEPredictor(const std::string& device = "CPU",
const std::string& model_dir = "",
const double threshold = -1.,
const std::string& run_mode = "fluid",
const int gpu_id = 0,
const bool use_mkldnn = false,
const int cpu_threads = 1,
bool trt_calib_mode = false,
const int min_box_area = 200) {
this->device_ = device;
this->gpu_id_ = gpu_id;
this->use_mkldnn_ = use_mkldnn;
this->cpu_math_library_num_threads_ = cpu_threads;
this->trt_calib_mode_ = trt_calib_mode;
this->min_box_area_ = min_box_area;
config_.load_config(model_dir);
this->min_subgraph_size_ = config_.min_subgraph_size_;
preprocessor_.Init(config_.preprocess_info_);
LoadModel(model_dir, run_mode);
this->conf_thresh_ = config_.conf_thresh_;
}
// Load Paddle inference model
void LoadModel(const std::string& model_dir,
const std::string& run_mode = "fluid");
// Run predictor
void Predict(const std::vector<cv::Mat> imgs,
const double threshold = 0.5,
MOTResult* result = nullptr,
std::vector<double>* times = nullptr);
private:
std::string device_ = "CPU";
float threhold = 0.5;
int gpu_id_ = 0;
bool use_mkldnn_ = false;
int cpu_math_library_num_threads_ = 1;
int min_subgraph_size_ = 3;
bool trt_calib_mode_ = false;
// Preprocess image and copy data to input buffer
void Preprocess(const cv::Mat& image_mat);
// Postprocess result
void Postprocess(const cv::Mat dets, const cv::Mat emb, MOTResult* result);
std::shared_ptr<Predictor> predictor_;
Preprocessor preprocessor_;
ImageBlob inputs_;
std::vector<float> bbox_data_;
std::vector<float> emb_data_;
double threshold_;
ConfigPaser config_;
float min_box_area_;
float conf_thresh_;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The code is based on:
// https://github.com/gatagat/lap/blob/master/lap/lapjv.h
// Ths copyright of gatagat/lap is as follows:
// MIT License
#ifndef DEPLOY_PPTRACKING_CPP_INCLUDE_LAPJV_H_
#define DEPLOY_PPTRACKING_CPP_INCLUDE_LAPJV_H_
#define LARGE 1000000
#if !defined TRUE
#define TRUE 1
#endif
#if !defined FALSE
#define FALSE 0
#endif
#define NEW(x, t, n) \
if ((x = reinterpret_cast<t *>(malloc(sizeof(t) * (n)))) == 0) { \
return -1; \
}
#define FREE(x) \
if (x != 0) { \
free(x); \
x = 0; \
}
#define SWAP_INDICES(a, b) \
{ \
int_t _temp_index = a; \
a = b; \
b = _temp_index; \
}
#include <opencv2/opencv.hpp>
namespace PaddleDetection {
typedef signed int int_t;
typedef unsigned int uint_t;
typedef double cost_t;
typedef char boolean;
typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t;
int lapjv_internal(const cv::Mat &cost,
const bool extend_cost,
const float cost_limit,
int *x,
int *y);
} // namespace PaddleDetection
#endif // DEPLOY_PPTRACKING_CPP_INCLUDE_LAPJV_H_
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DEPLOY_PPTRACKING_CPP_INCLUDE_PIPELINE_H_
#define DEPLOY_PPTRACKING_CPP_INCLUDE_PIPELINE_H_
#include <glog/logging.h>
#include <math.h>
#include <sys/types.h>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#ifdef _WIN32
#include <direct.h>
#include <io.h>
#elif LINUX
#include <stdarg.h>
#include <sys/stat.h>
#endif
#include "include/jde_predictor.h"
#include "include/sde_predictor.h"
namespace PaddleDetection {
class Pipeline {
public:
explicit Pipeline(const std::string& device,
const double threshold,
const std::string& output_dir,
const std::string& run_mode = "fluid",
const int gpu_id = 0,
const bool use_mkldnn = false,
const int cpu_threads = 1,
const bool trt_calib_mode = false,
const bool do_entrance_counting = false,
const bool save_result = false,
const std::string& scene = "pedestrian",
const bool tiny_obj = false,
const bool is_mtmct = false,
const int secs_interval = 10,
const std::string track_model_dir = "",
const std::string det_model_dir = "",
const std::string reid_model_dir = "") {
std::vector<std::string> input;
this->input_ = input;
this->device_ = device;
this->threshold_ = threshold;
this->output_dir_ = output_dir;
this->run_mode_ = run_mode;
this->gpu_id_ = gpu_id;
this->use_mkldnn_ = use_mkldnn;
this->cpu_threads_ = cpu_threads;
this->trt_calib_mode_ = trt_calib_mode;
this->do_entrance_counting_ = do_entrance_counting;
this->secs_interval_ = secs_interval_;
this->save_result_ = save_result;
SelectModel(scene,
tiny_obj,
is_mtmct,
track_model_dir,
det_model_dir,
reid_model_dir);
InitPredictor();
}
// Set input, it must execute before Run()
void SetInput(const std::string& input_video);
void ClearInput();
// Run pipeline in video
void Run();
void PredictMOT(const std::string& video_path);
void PredictMTMCT(const std::vector<std::string> video_inputs);
// Run pipeline in stream
void RunMOTStream(const cv::Mat img,
const int frame_id,
const int video_fps,
const Rect entrance,
cv::Mat out_img,
std::vector<std::string>* records,
std::set<int>* count_set,
std::set<int>* interval_count_set,
std::vector<int>* in_count_list,
std::vector<int>* out_count_list,
std::map<int, std::vector<float>>* prev_center,
std::vector<std::string>* flow_records);
void RunMTMCTStream(const std::vector<cv::Mat> imgs,
std::vector<std::string>* records);
void PrintBenchmarkLog(const std::vector<double> det_time, const int img_num);
private:
// Select model according to scenes, it must execute before Run()
void SelectModel(const std::string& scene = "pedestrian",
const bool tiny_obj = false,
const bool is_mtmct = false,
const std::string track_model_dir = "",
const std::string det_model_dir = "",
const std::string reid_model_dir = "");
void InitPredictor();
std::shared_ptr<PaddleDetection::JDEPredictor> jde_sct_;
std::shared_ptr<PaddleDetection::SDEPredictor> sde_sct_;
std::vector<std::string> input_;
std::vector<cv::Mat> stream_;
std::string device_;
double threshold_;
std::string output_dir_;
std::string track_model_dir_;
std::string det_model_dir_;
std::string reid_model_dir_;
std::string run_mode_ = "fluid";
int gpu_id_ = 0;
bool use_mkldnn_ = false;
int cpu_threads_ = 1;
bool trt_calib_mode_ = false;
bool do_entrance_counting_ = false;
bool save_result_ = false;
int secs_interval_ = 10;
};
} // namespace PaddleDetection
#endif // DEPLOY_PPTRACKING_CPP_INCLUDE_PIPELINE_H_
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
#include <ctime>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "include/utils.h"
namespace PaddleDetection {
// Generate visualization color
cv::Scalar GetColor(int idx);
// Visualize Tracking Results
cv::Mat VisualizeTrackResult(const cv::Mat& img,
const MOTResult& results,
const float fps,
const int frame_id);
// Pedestrian/Vehicle Counting
void FlowStatistic(const MOTResult& results,
const int frame_id,
const int secs_interval,
const bool do_entrance_counting,
const int video_fps,
const Rect entrance,
std::set<int>* id_set,
std::set<int>* interval_id_set,
std::vector<int>* in_id_list,
std::vector<int>* out_id_list,
std::map<int, std::vector<float>>* prev_center,
std::vector<std::string>* records);
// Save Tracking Results
void SaveMOTResult(const MOTResult& results,
const int frame_id,
std::vector<std::string>* records);
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctime>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "paddle_inference_api.h" // NOLINT
#include "include/config_parser.h"
#include "include/jde_predictor.h"
#include "include/preprocess_op.h"
#include "include/sde_predictor.h"
using namespace paddle_infer; // NOLINT
namespace PaddleDetection {
class Predictor {
public:
explicit Predictor(const std::string& device = "CPU",
const std::string& track_model_dir = "",
const std::string& det_model_dir = "",
const std::string& reid_model_dir = "",
const double threshold = -1.,
const std::string& run_mode = "fluid",
const int gpu_id = 0,
const bool use_mkldnn = false,
const int cpu_threads = 1,
bool trt_calib_mode = false,
const int min_box_area = 200) {
if (track_model_dir.empty() && det_model_dir.empty()) {
throw "Predictor must receive track_model or det_model!";
}
if (!track_model_dir.empty() && !det_model_dir.empty()) {
throw "Predictor only receive one of track_model or det_model!";
}
if (!track_model_dir.empty()) {
jde_sct_ =
std::make_shared<PaddleDetection::JDEPredictor>(device,
track_model_dir,
threshold,
run_mode,
gpu_id,
use_mkldnn,
cpu_threads,
trt_calib_mode,
min_box_area);
use_jde_ = true;
}
if (!det_model_dir.empty()) {
sde_sct_ = std::make_shared<PaddleDetection::SDEPredictor>(device,
det_model_dir,
reid_model_dir,
threshold,
run_mode,
gpu_id,
use_mkldnn,
cpu_threads,
trt_calib_mode,
min_box_area);
use_jde_ = false;
}
}
// Run predictor
void Predict(const std::vector<cv::Mat> imgs,
const double threshold = 0.5,
MOTResult* result = nullptr,
std::vector<double>* times = nullptr);
private:
std::shared_ptr<PaddleDetection::JDEPredictor> jde_sct_;
std::shared_ptr<PaddleDetection::SDEPredictor> sde_sct_;
bool use_jde_ = true;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
#include <yaml-cpp/yaml.h>
#include <iostream>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
namespace PaddleDetection {
// Object for storing all preprocessed data
class ImageBlob {
public:
// image width and height
std::vector<float> im_shape_;
// Buffer for image data after preprocessing
std::vector<float> im_data_;
// in net data shape(after pad)
std::vector<float> in_net_shape_;
// Evaluation image width and height
// std::vector<float> eval_im_size_f_;
// Scale factor for image size to origin image size
std::vector<float> scale_factor_;
};
// Abstraction of preprocessing opration class
class PreprocessOp {
public:
virtual void Init(const YAML::Node& item) = 0;
virtual void Run(cv::Mat* im, ImageBlob* data) = 0;
};
class InitInfo : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {}
virtual void Run(cv::Mat* im, ImageBlob* data);
};
class NormalizeImage : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
mean_ = item["mean"].as<std::vector<float>>();
scale_ = item["std"].as<std::vector<float>>();
is_scale_ = item["is_scale"].as<bool>();
}
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
// CHW or HWC
std::vector<float> mean_;
std::vector<float> scale_;
bool is_scale_;
};
class Permute : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {}
virtual void Run(cv::Mat* im, ImageBlob* data);
};
class Resize : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
interp_ = item["interp"].as<int>();
keep_ratio_ = item["keep_ratio"].as<bool>();
target_size_ = item["target_size"].as<std::vector<int>>();
}
// Compute best resize scale for x-dimension, y-dimension
std::pair<float, float> GenerateScale(const cv::Mat& im);
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
int interp_;
bool keep_ratio_;
std::vector<int> target_size_;
std::vector<int> in_net_shape_;
};
class LetterBoxResize : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
target_size_ = item["target_size"].as<std::vector<int>>();
}
float GenerateScale(const cv::Mat& im);
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
std::vector<int> target_size_;
std::vector<int> in_net_shape_;
};
// Models with FPN need input shape % stride == 0
class PadStride : public PreprocessOp {
public:
virtual void Init(const YAML::Node& item) {
stride_ = item["stride"].as<int>();
}
virtual void Run(cv::Mat* im, ImageBlob* data);
private:
int stride_;
};
class Preprocessor {
public:
void Init(const YAML::Node& config_node) {
// initialize image info at first
ops_["InitInfo"] = std::make_shared<InitInfo>();
for (const auto& item : config_node) {
auto op_name = item["type"].as<std::string>();
ops_[op_name] = CreateOp(op_name);
ops_[op_name]->Init(item);
}
}
std::shared_ptr<PreprocessOp> CreateOp(const std::string& name) {
if (name == "Resize") {
return std::make_shared<Resize>();
} else if (name == "LetterBoxResize") {
return std::make_shared<LetterBoxResize>();
} else if (name == "Permute") {
return std::make_shared<Permute>();
} else if (name == "NormalizeImage") {
return std::make_shared<NormalizeImage>();
} else if (name == "PadStride") {
// use PadStride instead of PadBatch
return std::make_shared<PadStride>();
}
std::cerr << "can not find function of OP: " << name
<< " and return: nullptr" << std::endl;
return nullptr;
}
void Run(cv::Mat* im, ImageBlob* data);
public:
static const std::vector<std::string> RUN_ORDER;
private:
std::unordered_map<std::string, std::shared_ptr<PreprocessOp>> ops_;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <ctime>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "paddle_inference_api.h" // NOLINT
#include "include/config_parser.h"
#include "include/preprocess_op.h"
#include "include/utils.h"
using namespace paddle_infer; // NOLINT
namespace PaddleDetection {
class SDEPredictor {
public:
explicit SDEPredictor(const std::string& device,
const std::string& det_model_dir = "",
const std::string& reid_model_dir = "",
const double threshold = -1.,
const std::string& run_mode = "fluid",
const int gpu_id = 0,
const bool use_mkldnn = false,
const int cpu_threads = 1,
bool trt_calib_mode = false,
const int min_box_area = 200) {
this->device_ = device;
this->gpu_id_ = gpu_id;
this->use_mkldnn_ = use_mkldnn;
this->cpu_math_library_num_threads_ = cpu_threads;
this->trt_calib_mode_ = trt_calib_mode;
this->min_box_area_ = min_box_area;
det_config_.load_config(det_model_dir);
this->min_subgraph_size_ = det_config_.min_subgraph_size_;
det_preprocessor_.Init(det_config_.preprocess_info_);
reid_config_.load_config(reid_model_dir);
reid_preprocessor_.Init(reid_config_.preprocess_info_);
LoadModel(det_model_dir, reid_model_dir, run_mode);
this->conf_thresh_ = det_config_.conf_thresh_;
}
// Load Paddle inference model
void LoadModel(const std::string& det_model_dir,
const std::string& reid_model_dir,
const std::string& run_mode = "fluid");
// Run predictor
void Predict(const std::vector<cv::Mat> imgs,
const double threshold = 0.5,
MOTResult* result = nullptr,
std::vector<double>* times = nullptr);
private:
std::string device_ = "CPU";
float threhold = 0.5;
int gpu_id_ = 0;
bool use_mkldnn_ = false;
int cpu_math_library_num_threads_ = 1;
int min_subgraph_size_ = 3;
bool trt_calib_mode_ = false;
// Preprocess image and copy data to input buffer
void Preprocess(const cv::Mat& image_mat);
// Postprocess result
void Postprocess(const cv::Mat dets, const cv::Mat emb, MOTResult* result);
std::shared_ptr<Predictor> det_predictor_;
std::shared_ptr<Predictor> reid_predictor_;
Preprocessor det_preprocessor_;
Preprocessor reid_preprocessor_;
ImageBlob inputs_;
std::vector<float> bbox_data_;
std::vector<float> emb_data_;
double threshold_;
ConfigPaser det_config_;
ConfigPaser reid_config_;
float min_box_area_ = 200;
float conf_thresh_;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The code is based on:
// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.h
// Ths copyright of CnybTseng/JDE is as follows:
// MIT License
#pragma once
#include <map>
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "include/trajectory.h"
namespace PaddleDetection {
typedef std::map<int, int> Match;
typedef std::map<int, int>::iterator MatchIterator;
struct Track {
int id;
float score;
cv::Vec4f ltrb;
};
class JDETracker {
public:
static JDETracker *instance(void);
virtual bool update(const cv::Mat &dets,
const cv::Mat &emb,
std::vector<Track> *tracks);
private:
JDETracker(void);
virtual ~JDETracker(void) {}
cv::Mat motion_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b);
void linear_assignment(const cv::Mat &cost,
float cost_limit,
Match *matches,
std::vector<int> *mismatch_row,
std::vector<int> *mismatch_col);
void remove_duplicate_trajectory(TrajectoryPool *a,
TrajectoryPool *b,
float iou_thresh = 0.15f);
private:
static JDETracker *me;
int timestamp;
TrajectoryPool tracked_trajectories;
TrajectoryPool lost_trajectories;
TrajectoryPool removed_trajectories;
int max_lost_time;
float lambda;
float det_thresh;
};
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The code is based on:
// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.h
// Ths copyright of CnybTseng/JDE is as follows:
// MIT License
#pragma once
#include <vector>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include "opencv2/video/tracking.hpp"
namespace PaddleDetection {
typedef enum { New = 0, Tracked = 1, Lost = 2, Removed = 3 } TrajectoryState;
class Trajectory;
typedef std::vector<Trajectory> TrajectoryPool;
typedef std::vector<Trajectory>::iterator TrajectoryPoolIterator;
typedef std::vector<Trajectory *> TrajectoryPtrPool;
typedef std::vector<Trajectory *>::iterator TrajectoryPtrPoolIterator;
class TKalmanFilter : public cv::KalmanFilter {
public:
TKalmanFilter(void);
virtual ~TKalmanFilter(void) {}
virtual void init(const cv::Mat &measurement);
virtual const cv::Mat &predict();
virtual const cv::Mat &correct(const cv::Mat &measurement);
virtual void project(cv::Mat *mean, cv::Mat *covariance) const;
private:
float std_weight_position;
float std_weight_velocity;
};
inline TKalmanFilter::TKalmanFilter(void) : cv::KalmanFilter(8, 4) {
cv::KalmanFilter::transitionMatrix = cv::Mat::eye(8, 8, CV_32F);
for (int i = 0; i < 4; ++i)
cv::KalmanFilter::transitionMatrix.at<float>(i, i + 4) = 1;
cv::KalmanFilter::measurementMatrix = cv::Mat::eye(4, 8, CV_32F);
std_weight_position = 1 / 20.f;
std_weight_velocity = 1 / 160.f;
}
class Trajectory : public TKalmanFilter {
public:
Trajectory();
Trajectory(const cv::Vec4f &ltrb, float score, const cv::Mat &embedding);
Trajectory(const Trajectory &other);
Trajectory &operator=(const Trajectory &rhs);
virtual ~Trajectory(void) {}
static int next_id();
virtual const cv::Mat &predict(void);
virtual void update(Trajectory *traj,
int timestamp,
bool update_embedding = true);
virtual void activate(int timestamp);
virtual void reactivate(Trajectory *traj, int timestamp, bool newid = false);
virtual void mark_lost(void);
virtual void mark_removed(void);
friend TrajectoryPool operator+(const TrajectoryPool &a,
const TrajectoryPool &b);
friend TrajectoryPool operator+(const TrajectoryPool &a,
const TrajectoryPtrPool &b);
friend TrajectoryPool &operator+=(TrajectoryPool &a, // NOLINT
const TrajectoryPtrPool &b);
friend TrajectoryPool operator-(const TrajectoryPool &a,
const TrajectoryPool &b);
friend TrajectoryPool &operator-=(TrajectoryPool &a, // NOLINT
const TrajectoryPool &b);
friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a,
const TrajectoryPtrPool &b);
friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a,
TrajectoryPool *b);
friend TrajectoryPtrPool operator-(const TrajectoryPtrPool &a,
const TrajectoryPtrPool &b);
friend cv::Mat embedding_distance(const TrajectoryPool &a,
const TrajectoryPool &b);
friend cv::Mat embedding_distance(const TrajectoryPtrPool &a,
const TrajectoryPtrPool &b);
friend cv::Mat embedding_distance(const TrajectoryPtrPool &a,
const TrajectoryPool &b);
friend cv::Mat mahalanobis_distance(const TrajectoryPool &a,
const TrajectoryPool &b);
friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
const TrajectoryPtrPool &b);
friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
const TrajectoryPool &b);
friend cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b);
friend cv::Mat iou_distance(const TrajectoryPtrPool &a,
const TrajectoryPtrPool &b);
friend cv::Mat iou_distance(const TrajectoryPtrPool &a,
const TrajectoryPool &b);
private:
void update_embedding(const cv::Mat &embedding);
public:
TrajectoryState state;
cv::Vec4f ltrb;
cv::Mat smooth_embedding;
int id;
bool is_activated;
int timestamp;
int starttime;
float score;
private:
static int count;
cv::Vec4f xyah;
cv::Mat current_embedding;
float eta;
int length;
};
inline cv::Vec4f ltrb2xyah(const cv::Vec4f &ltrb) {
cv::Vec4f xyah;
xyah[0] = (ltrb[0] + ltrb[2]) * 0.5f;
xyah[1] = (ltrb[1] + ltrb[3]) * 0.5f;
xyah[3] = ltrb[3] - ltrb[1];
xyah[2] = (ltrb[2] - ltrb[0]) / xyah[3];
return xyah;
}
inline Trajectory::Trajectory()
: state(New),
ltrb(cv::Vec4f()),
smooth_embedding(cv::Mat()),
id(0),
is_activated(false),
timestamp(0),
starttime(0),
score(0),
eta(0.9),
length(0) {}
inline Trajectory::Trajectory(const cv::Vec4f &ltrb_,
float score_,
const cv::Mat &embedding)
: state(New),
ltrb(ltrb_),
smooth_embedding(cv::Mat()),
id(0),
is_activated(false),
timestamp(0),
starttime(0),
score(score_),
eta(0.9),
length(0) {
xyah = ltrb2xyah(ltrb);
update_embedding(embedding);
}
inline Trajectory::Trajectory(const Trajectory &other)
: state(other.state),
ltrb(other.ltrb),
id(other.id),
is_activated(other.is_activated),
timestamp(other.timestamp),
starttime(other.starttime),
xyah(other.xyah),
score(other.score),
eta(other.eta),
length(other.length) {
other.smooth_embedding.copyTo(smooth_embedding);
other.current_embedding.copyTo(current_embedding);
// copy state in KalmanFilter
other.statePre.copyTo(cv::KalmanFilter::statePre);
other.statePost.copyTo(cv::KalmanFilter::statePost);
other.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre);
other.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost);
}
inline Trajectory &Trajectory::operator=(const Trajectory &rhs) {
this->state = rhs.state;
this->ltrb = rhs.ltrb;
rhs.smooth_embedding.copyTo(this->smooth_embedding);
this->id = rhs.id;
this->is_activated = rhs.is_activated;
this->timestamp = rhs.timestamp;
this->starttime = rhs.starttime;
this->xyah = rhs.xyah;
this->score = rhs.score;
rhs.current_embedding.copyTo(this->current_embedding);
this->eta = rhs.eta;
this->length = rhs.length;
// copy state in KalmanFilter
rhs.statePre.copyTo(cv::KalmanFilter::statePre);
rhs.statePost.copyTo(cv::KalmanFilter::statePost);
rhs.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre);
rhs.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost);
return *this;
}
inline int Trajectory::next_id() {
++count;
return count;
}
inline void Trajectory::mark_lost(void) { state = Lost; }
inline void Trajectory::mark_removed(void) { state = Removed; }
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <ctime>
#include <numeric>
#include <string>
#include <utility>
#include <vector>
#include "include/tracker.h"
namespace PaddleDetection {
struct Rect {
float left;
float top;
float right;
float bottom;
};
struct MOTTrack {
int ids;
float score;
Rect rects;
int class_id = -1;
};
typedef std::vector<MOTTrack> MOTResult;
} // namespace PaddleDetection
# 是否使用GPU(即是否使用 CUDA)
WITH_GPU=OFF
# 是否使用MKL or openblas,TX2需要设置为OFF
WITH_MKL=ON
# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
WITH_TENSORRT=OFF
# paddle 预测库lib名称,由于不同平台不同版本预测库lib名称不同,请查看所下载的预测库中`paddle_inference/lib/`文件夹下`lib`的名称
PADDLE_LIB_NAME=libpaddle_inference
# TensorRT 的include路径
TENSORRT_INC_DIR=/path/to/tensorrt/include
# TensorRT 的lib路径
TENSORRT_LIB_DIR=/path/to/tensorrt/lib
# Paddle 预测库路径
PADDLE_DIR=/path/to/paddle_inference
# CUDA 的 lib 路径
CUDA_LIB=/path/to/cuda/lib
# CUDNN 的 lib 路径
CUDNN_LIB=/path/to/cudnn/lib
MACHINE_TYPE=`uname -m`
echo "MACHINE_TYPE: "${MACHINE_TYPE}
if [ "$MACHINE_TYPE" = "x86_64" ]
then
echo "set OPENCV_DIR for x86_64"
# linux系统通过以下命令下载预编译的opencv
mkdir -p $(pwd)/deps && cd $(pwd)/deps
wget -c https://paddledet.bj.bcebos.com/data/opencv-3.4.16_gcc8.2_ffmpeg.tar.gz
tar -xvf opencv-3.4.16_gcc8.2_ffmpeg.tar.gz && cd ..
# set OPENCV_DIR
OPENCV_DIR=$(pwd)/deps/opencv-3.4.16_gcc8.2_ffmpeg
elif [ "$MACHINE_TYPE" = "aarch64" ]
then
echo "set OPENCV_DIR for aarch64"
# TX2平台通过以下命令下载预编译的opencv
mkdir -p $(pwd)/deps && cd $(pwd)/deps
wget -c https://bj.bcebos.com/v1/paddledet/data/TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0.tar.gz
tar -xvf TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0.tar.gz && cd ..
# set OPENCV_DIR
OPENCV_DIR=$(pwd)/deps/TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0/
else
echo "Please set OPENCV_DIR manually"
fi
echo "OPENCV_DIR: "$OPENCV_DIR
# 以下无需改动
rm -rf build
mkdir -p build
cd build
cmake .. \
-DWITH_GPU=${WITH_GPU} \
-DWITH_MKL=${WITH_MKL} \
-DWITH_TENSORRT=${WITH_TENSORRT} \
-DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR} \
-DTENSORRT_INC_DIR=${TENSORRT_INC_DIR} \
-DPADDLE_DIR=${PADDLE_DIR} \
-DWITH_STATIC_LIB=${WITH_STATIC_LIB} \
-DCUDA_LIB=${CUDA_LIB} \
-DCUDNN_LIB=${CUDNN_LIB} \
-DOPENCV_DIR=${OPENCV_DIR} \
-DPADDLE_LIB_NAME=${PADDLE_LIB_NAME} \
make
echo "make finished!"
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include "include/jde_predictor.h"
using namespace paddle_infer; // NOLINT
namespace PaddleDetection {
// Load Model and create model predictor
void JDEPredictor::LoadModel(const std::string& model_dir,
const std::string& run_mode) {
paddle_infer::Config config;
std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel";
std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams";
config.SetModel(prog_file, params_file);
if (this->device_ == "GPU") {
config.EnableUseGpu(200, this->gpu_id_);
config.SwitchIrOptim(true);
// use tensorrt
if (run_mode != "fluid") {
auto precision = paddle_infer::Config::Precision::kFloat32;
if (run_mode == "trt_fp32") {
precision = paddle_infer::Config::Precision::kFloat32;
} else if (run_mode == "trt_fp16") {
precision = paddle_infer::Config::Precision::kHalf;
} else if (run_mode == "trt_int8") {
precision = paddle_infer::Config::Precision::kInt8;
} else {
printf(
"run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'");
}
// set tensorrt
config.EnableTensorRtEngine(1 << 30,
1,
this->min_subgraph_size_,
precision,
false,
this->trt_calib_mode_);
}
} else if (this->device_ == "XPU") {
config.EnableXpu(10 * 1024 * 1024);
} else {
config.DisableGpu();
if (this->use_mkldnn_) {
config.EnableMKLDNN();
// cache 10 different shapes for mkldnn to avoid memory leak
config.SetMkldnnCacheCapacity(10);
}
config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
}
config.SwitchUseFeedFetchOps(false);
config.SwitchIrOptim(true);
config.DisableGlogInfo();
// Memory optimization
config.EnableMemoryOptim();
predictor_ = std::move(CreatePredictor(config));
}
void FilterDets(const float conf_thresh,
const cv::Mat dets,
std::vector<int>* index) {
for (int i = 0; i < dets.rows; ++i) {
float score = *dets.ptr<float>(i, 4);
if (score > conf_thresh) {
index->push_back(i);
}
}
}
void JDEPredictor::Preprocess(const cv::Mat& ori_im) {
// Clone the image : keep the original mat for postprocess
cv::Mat im = ori_im.clone();
preprocessor_.Run(&im, &inputs_);
}
void JDEPredictor::Postprocess(const cv::Mat dets,
const cv::Mat emb,
MOTResult* result) {
result->clear();
std::vector<Track> tracks;
std::vector<int> valid;
FilterDets(conf_thresh_, dets, &valid);
cv::Mat new_dets, new_emb;
for (int i = 0; i < valid.size(); ++i) {
new_dets.push_back(dets.row(valid[i]));
new_emb.push_back(emb.row(valid[i]));
}
JDETracker::instance()->update(new_dets, new_emb, &tracks);
if (tracks.size() == 0) {
MOTTrack mot_track;
Rect ret = {*dets.ptr<float>(0, 0),
*dets.ptr<float>(0, 1),
*dets.ptr<float>(0, 2),
*dets.ptr<float>(0, 3)};
mot_track.ids = 1;
mot_track.score = *dets.ptr<float>(0, 4);
mot_track.rects = ret;
result->push_back(mot_track);
} else {
std::vector<Track>::iterator titer;
for (titer = tracks.begin(); titer != tracks.end(); ++titer) {
if (titer->score < threshold_) {
continue;
} else {
float w = titer->ltrb[2] - titer->ltrb[0];
float h = titer->ltrb[3] - titer->ltrb[1];
bool vertical = w / h > 1.6;
float area = w * h;
if (area > min_box_area_ && !vertical) {
MOTTrack mot_track;
Rect ret = {
titer->ltrb[0], titer->ltrb[1], titer->ltrb[2], titer->ltrb[3]};
mot_track.rects = ret;
mot_track.score = titer->score;
mot_track.ids = titer->id;
result->push_back(mot_track);
}
}
}
}
}
void JDEPredictor::Predict(const std::vector<cv::Mat> imgs,
const double threshold,
MOTResult* result,
std::vector<double>* times) {
auto preprocess_start = std::chrono::steady_clock::now();
int batch_size = imgs.size();
// in_data_batch
std::vector<float> in_data_all;
std::vector<float> im_shape_all(batch_size * 2);
std::vector<float> scale_factor_all(batch_size * 2);
// Preprocess image
for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
cv::Mat im = imgs.at(bs_idx);
Preprocess(im);
im_shape_all[bs_idx * 2] = inputs_.im_shape_[0];
im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1];
scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
in_data_all.insert(
in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end());
}
// Prepare input tensor
auto input_names = predictor_->GetInputNames();
for (const auto& tensor_name : input_names) {
auto in_tensor = predictor_->GetInputHandle(tensor_name);
if (tensor_name == "image") {
int rh = inputs_.in_net_shape_[0];
int rw = inputs_.in_net_shape_[1];
in_tensor->Reshape({batch_size, 3, rh, rw});
in_tensor->CopyFromCpu(in_data_all.data());
} else if (tensor_name == "im_shape") {
in_tensor->Reshape({batch_size, 2});
in_tensor->CopyFromCpu(im_shape_all.data());
} else if (tensor_name == "scale_factor") {
in_tensor->Reshape({batch_size, 2});
in_tensor->CopyFromCpu(scale_factor_all.data());
}
}
auto preprocess_end = std::chrono::steady_clock::now();
std::vector<int> bbox_shape;
std::vector<int> emb_shape;
// Run predictor
auto inference_start = std::chrono::steady_clock::now();
predictor_->Run();
// Get output tensor
auto output_names = predictor_->GetOutputNames();
auto bbox_tensor = predictor_->GetOutputHandle(output_names[0]);
bbox_shape = bbox_tensor->shape();
auto emb_tensor = predictor_->GetOutputHandle(output_names[1]);
emb_shape = emb_tensor->shape();
// Calculate bbox length
int bbox_size = 1;
for (int j = 0; j < bbox_shape.size(); ++j) {
bbox_size *= bbox_shape[j];
}
// Calculate emb length
int emb_size = 1;
for (int j = 0; j < emb_shape.size(); ++j) {
emb_size *= emb_shape[j];
}
bbox_data_.resize(bbox_size);
bbox_tensor->CopyToCpu(bbox_data_.data());
emb_data_.resize(emb_size);
emb_tensor->CopyToCpu(emb_data_.data());
auto inference_end = std::chrono::steady_clock::now();
// Postprocessing result
auto postprocess_start = std::chrono::steady_clock::now();
result->clear();
cv::Mat dets(bbox_shape[0], 6, CV_32FC1, bbox_data_.data());
cv::Mat emb(bbox_shape[0], emb_shape[1], CV_32FC1, emb_data_.data());
Postprocess(dets, emb, result);
auto postprocess_end = std::chrono::steady_clock::now();
std::chrono::duration<float> preprocess_diff =
preprocess_end - preprocess_start;
(*times)[0] += static_cast<double>(preprocess_diff.count() * 1000);
std::chrono::duration<float> inference_diff = inference_end - inference_start;
(*times)[1] += static_cast<double>(inference_diff.count() * 1000);
std::chrono::duration<float> postprocess_diff =
postprocess_end - postprocess_start;
(*times)[2] += static_cast<double>(postprocess_diff.count() * 1000);
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The code is based on:
// https://github.com/gatagat/lap/blob/master/lap/lapjv.cpp
// Ths copyright of gatagat/lap is as follows:
// MIT License
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "include/lapjv.h"
namespace PaddleDetection {
/** Column-reduction and reduction transfer for a dense cost matrix.
*/
int _ccrrt_dense(
const int n, float *cost[], int *free_rows, int *x, int *y, float *v) {
int n_free_rows;
bool *unique;
for (int i = 0; i < n; i++) {
x[i] = -1;
v[i] = LARGE;
y[i] = 0;
}
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
const float c = cost[i][j];
if (c < v[j]) {
v[j] = c;
y[j] = i;
}
}
}
NEW(unique, bool, n);
memset(unique, TRUE, n);
{
int j = n;
do {
j--;
const int i = y[j];
if (x[i] < 0) {
x[i] = j;
} else {
unique[i] = FALSE;
y[j] = -1;
}
} while (j > 0);
}
n_free_rows = 0;
for (int i = 0; i < n; i++) {
if (x[i] < 0) {
free_rows[n_free_rows++] = i;
} else if (unique[i]) {
const int j = x[i];
float min = LARGE;
for (int j2 = 0; j2 < n; j2++) {
if (j2 == static_cast<int>(j)) {
continue;
}
const float c = cost[i][j2] - v[j2];
if (c < min) {
min = c;
}
}
v[j] -= min;
}
}
FREE(unique);
return n_free_rows;
}
/** Augmenting row reduction for a dense cost matrix.
*/
int _carr_dense(const int n,
float *cost[],
const int n_free_rows,
int *free_rows,
int *x,
int *y,
float *v) {
int current = 0;
int new_free_rows = 0;
int rr_cnt = 0;
while (current < n_free_rows) {
int i0;
int j1, j2;
float v1, v2, v1_new;
bool v1_lowers;
rr_cnt++;
const int free_i = free_rows[current++];
j1 = 0;
v1 = cost[free_i][0] - v[0];
j2 = -1;
v2 = LARGE;
for (int j = 1; j < n; j++) {
const float c = cost[free_i][j] - v[j];
if (c < v2) {
if (c >= v1) {
v2 = c;
j2 = j;
} else {
v2 = v1;
v1 = c;
j2 = j1;
j1 = j;
}
}
}
i0 = y[j1];
v1_new = v[j1] - (v2 - v1);
v1_lowers = v1_new < v[j1];
if (rr_cnt < current * n) {
if (v1_lowers) {
v[j1] = v1_new;
} else if (i0 >= 0 && j2 >= 0) {
j1 = j2;
i0 = y[j2];
}
if (i0 >= 0) {
if (v1_lowers) {
free_rows[--current] = i0;
} else {
free_rows[new_free_rows++] = i0;
}
}
} else {
if (i0 >= 0) {
free_rows[new_free_rows++] = i0;
}
}
x[free_i] = j1;
y[j1] = free_i;
}
return new_free_rows;
}
/** Find columns with minimum d[j] and put them on the SCAN list.
*/
int _find_dense(const int n, int lo, float *d, int *cols, int *y) {
int hi = lo + 1;
float mind = d[cols[lo]];
for (int k = hi; k < n; k++) {
int j = cols[k];
if (d[j] <= mind) {
if (d[j] < mind) {
hi = lo;
mind = d[j];
}
cols[k] = cols[hi];
cols[hi++] = j;
}
}
return hi;
}
// Scan all columns in TODO starting from arbitrary column in SCAN
// and try to decrease d of the TODO columns using the SCAN column.
int _scan_dense(const int n,
float *cost[],
int *plo,
int *phi,
float *d,
int *cols,
int *pred,
int *y,
float *v) {
int lo = *plo;
int hi = *phi;
float h, cred_ij;
while (lo != hi) {
int j = cols[lo++];
const int i = y[j];
const float mind = d[j];
h = cost[i][j] - v[j] - mind;
// For all columns in TODO
for (int k = hi; k < n; k++) {
j = cols[k];
cred_ij = cost[i][j] - v[j] - h;
if (cred_ij < d[j]) {
d[j] = cred_ij;
pred[j] = i;
if (cred_ij == mind) {
if (y[j] < 0) {
return j;
}
cols[k] = cols[hi];
cols[hi++] = j;
}
}
}
}
*plo = lo;
*phi = hi;
return -1;
}
/** Single iteration of modified Dijkstra shortest path algorithm as explained
* in the JV paper.
*
* This is a dense matrix version.
*
* \return The closest free column index.
*/
int find_path_dense(const int n,
float *cost[],
const int start_i,
int *y,
float *v,
int *pred) {
int lo = 0, hi = 0;
int final_j = -1;
int n_ready = 0;
int *cols;
float *d;
NEW(cols, int, n);
NEW(d, float, n);
for (int i = 0; i < n; i++) {
cols[i] = i;
pred[i] = start_i;
d[i] = cost[start_i][i] - v[i];
}
while (final_j == -1) {
// No columns left on the SCAN list.
if (lo == hi) {
n_ready = lo;
hi = _find_dense(n, lo, d, cols, y);
for (int k = lo; k < hi; k++) {
const int j = cols[k];
if (y[j] < 0) {
final_j = j;
}
}
}
if (final_j == -1) {
final_j = _scan_dense(n, cost, &lo, &hi, d, cols, pred, y, v);
}
}
{
const float mind = d[cols[lo]];
for (int k = 0; k < n_ready; k++) {
const int j = cols[k];
v[j] += d[j] - mind;
}
}
FREE(cols);
FREE(d);
return final_j;
}
/** Augment for a dense cost matrix.
*/
int _ca_dense(const int n,
float *cost[],
const int n_free_rows,
int *free_rows,
int *x,
int *y,
float *v) {
int *pred;
NEW(pred, int, n);
for (int *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) {
int i = -1, j;
int k = 0;
j = find_path_dense(n, cost, *pfree_i, y, v, pred);
while (i != *pfree_i) {
i = pred[j];
y[j] = i;
SWAP_INDICES(j, x[i]);
k++;
}
}
FREE(pred);
return 0;
}
/** Solve dense sparse LAP.
*/
int lapjv_internal(const cv::Mat &cost,
const bool extend_cost,
const float cost_limit,
int *x,
int *y) {
int n_rows = cost.rows;
int n_cols = cost.cols;
int n;
if (n_rows == n_cols) {
n = n_rows;
} else if (!extend_cost) {
throw std::invalid_argument(
"Square cost array expected. If cost is intentionally non-square, pass "
"extend_cost=True.");
}
// Get extend cost
if (extend_cost || cost_limit < LARGE) {
n = n_rows + n_cols;
}
cv::Mat cost_expand(n, n, CV_32F);
float expand_value;
if (cost_limit < LARGE) {
expand_value = cost_limit / 2;
} else {
double max_v;
minMaxLoc(cost, nullptr, &max_v);
expand_value = static_cast<float>(max_v) + 1.;
}
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
cost_expand.at<float>(i, j) = expand_value;
if (i >= n_rows && j >= n_cols) {
cost_expand.at<float>(i, j) = 0;
} else if (i < n_rows && j < n_cols) {
cost_expand.at<float>(i, j) = cost.at<float>(i, j);
}
}
}
// Convert Mat to pointer array
float **cost_ptr;
NEW(cost_ptr, float *, n);
for (int i = 0; i < n; ++i) {
NEW(cost_ptr[i], float, n);
}
for (int i = 0; i < n; ++i) {
for (int j = 0; j < n; ++j) {
cost_ptr[i][j] = cost_expand.at<float>(i, j);
}
}
int ret;
int *free_rows;
float *v;
int *x_c;
int *y_c;
NEW(free_rows, int, n);
NEW(v, float, n);
NEW(x_c, int, n);
NEW(y_c, int, n);
ret = _ccrrt_dense(n, cost_ptr, free_rows, x_c, y_c, v);
int i = 0;
while (ret > 0 && i < 2) {
ret = _carr_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v);
i++;
}
if (ret > 0) {
ret = _ca_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v);
}
FREE(v);
FREE(free_rows);
for (int i = 0; i < n; ++i) {
FREE(cost_ptr[i]);
}
FREE(cost_ptr);
if (ret != 0) {
if (ret == -1) {
throw "Out of memory.";
}
throw "Unknown error (lapjv_internal)";
}
// Get output of x, y, opt
for (int i = 0; i < n; ++i) {
if (i < n_rows) {
x[i] = x_c[i];
if (x[i] >= n_cols) {
x[i] = -1;
}
}
if (i < n_cols) {
y[i] = y_c[i];
if (y[i] >= n_rows) {
y[i] = -1;
}
}
}
FREE(x_c);
FREE(y_c);
return ret;
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <glog/logging.h>
#include <math.h>
#include <sys/types.h>
#include <algorithm>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
#ifdef _WIN32
#include <direct.h>
#include <io.h>
#else
#include <stdarg.h>
#include <sys/stat.h>
#endif
#include <gflags/gflags.h>
#include "include/pipeline.h"
DEFINE_string(video_file, "", "Path of input video.");
DEFINE_string(video_other_file,
"",
"Path of other input video used for MTMCT.");
DEFINE_string(device,
"CPU",
"Choose the device you want to run, it can be: CPU/GPU/XPU, "
"default is CPU.");
DEFINE_double(threshold, 0.5, "Threshold of score.");
DEFINE_string(output_dir, "output", "Directory of output visualization files.");
DEFINE_string(run_mode,
"fluid",
"Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)");
DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU");
DEFINE_int32(cpu_threads, 1, "Num of threads with CPU");
DEFINE_bool(trt_calib_mode,
false,
"If the model is produced by TRT offline quantitative calibration, "
"trt_calib_mode need to set True");
DEFINE_bool(tiny_obj, false, "Whether tracking tiny object");
DEFINE_bool(do_entrance_counting,
false,
"Whether counting the numbers of identifiers entering "
"or getting out from the entrance.");
DEFINE_int32(secs_interval, 10, "The seconds interval to count after tracking");
DEFINE_bool(save_result, false, "Whether saving result after tracking");
DEFINE_string(
scene,
"",
"scene of tracking system, it can be : pedestrian/vehicle/multiclass");
DEFINE_bool(is_mtmct, false, "Whether use multi-target multi-camera tracking");
DEFINE_string(track_model_dir, "", "Path of tracking model");
DEFINE_string(det_model_dir, "", "Path of detection model");
DEFINE_string(reid_model_dir, "", "Path of reid model");
static std::string DirName(const std::string& filepath) {
auto pos = filepath.rfind(OS_PATH_SEP);
if (pos == std::string::npos) {
return "";
}
return filepath.substr(0, pos);
}
static bool PathExists(const std::string& path) {
#ifdef _WIN32
struct _stat buffer;
return (_stat(path.c_str(), &buffer) == 0);
#else
struct stat buffer;
return (stat(path.c_str(), &buffer) == 0);
#endif // !_WIN32
}
static void MkDir(const std::string& path) {
if (PathExists(path)) return;
int ret = 0;
#ifdef _WIN32
ret = _mkdir(path.c_str());
#else
ret = mkdir(path.c_str(), 0755);
#endif // !_WIN32
if (ret != 0) {
std::string path_error(path);
path_error += " mkdir failed!";
throw std::runtime_error(path_error);
}
}
static void MkDirs(const std::string& path) {
if (path.empty()) return;
if (PathExists(path)) return;
MkDirs(DirName(path));
MkDir(path);
}
int main(int argc, char** argv) {
// Parsing command-line
google::ParseCommandLineFlags(&argc, &argv, true);
bool has_model_dir =
!(FLAGS_track_model_dir.empty() && FLAGS_det_model_dir.empty() &&
FLAGS_reid_model_dir.empty());
if (FLAGS_video_file.empty() || (FLAGS_scene.empty() && !has_model_dir)) {
LOG(ERROR) << "Usage: \n"
<< "1. ./main -video_file=/PATH/TO/INPUT/IMAGE/ "
<< "-scene=pedestrian/vehicle/multiclass\n"
<< "2. ./main -video_file=/PATH/TO/INPUT/IMAGE/ "
<< "-track_model_dir=/PATH/TO/MODEL_DIR" << std::endl;
return -1;
}
if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" ||
FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) {
LOG(ERROR)
<< "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
return -1;
}
transform(FLAGS_device.begin(),
FLAGS_device.end(),
FLAGS_device.begin(),
::toupper);
if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" ||
FLAGS_device == "XPU")) {
LOG(ERROR) << "device should be 'CPU', 'GPU' or 'XPU'.";
return -1;
}
if (!PathExists(FLAGS_output_dir)) {
MkDirs(FLAGS_output_dir);
}
PaddleDetection::Pipeline pipeline(FLAGS_device,
FLAGS_threshold,
FLAGS_output_dir,
FLAGS_run_mode,
FLAGS_gpu_id,
FLAGS_use_mkldnn,
FLAGS_cpu_threads,
FLAGS_trt_calib_mode,
FLAGS_do_entrance_counting,
FLAGS_save_result,
FLAGS_scene,
FLAGS_tiny_obj,
FLAGS_is_mtmct,
FLAGS_secs_interval,
FLAGS_track_model_dir,
FLAGS_det_model_dir,
FLAGS_reid_model_dir);
pipeline.SetInput(FLAGS_video_file);
if (!FLAGS_video_other_file.empty()) {
pipeline.SetInput(FLAGS_video_other_file);
}
pipeline.Run();
return 0;
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include <iostream>
#include <string>
#include "include/pipeline.h"
#include "include/postprocess.h"
#include "include/predictor.h"
namespace PaddleDetection {
void Pipeline::SetInput(const std::string& input_video) {
input_.push_back(input_video);
}
void Pipeline::ClearInput() {
input_.clear();
stream_.clear();
}
void Pipeline::SelectModel(const std::string& scene,
const bool tiny_obj,
const bool is_mtmct,
const std::string track_model_dir,
const std::string det_model_dir,
const std::string reid_model_dir) {
// model_dir has higher priority
if (!track_model_dir.empty()) {
track_model_dir_ = track_model_dir;
return;
}
if (!det_model_dir.empty() && !reid_model_dir.empty()) {
det_model_dir_ = det_model_dir;
reid_model_dir_ = reid_model_dir;
return;
}
// Single camera model, based on FairMot
if (scene == "pedestrian") {
if (tiny_obj) {
track_model_dir_ = "../pedestrian_track_tiny";
} else {
track_model_dir_ = "../pedestrian_track";
}
} else if (scene != "vehicle") {
if (tiny_obj) {
track_model_dir_ = "../vehicle_track_tiny";
} else {
track_model_dir_ = "../vehicle_track";
}
} else if (scene == "multiclass") {
if (tiny_obj) {
track_model_dir_ = "../multiclass_track_tiny";
} else {
track_model_dir_ = "../multiclass_track";
}
}
// Multi-camera model, based on PicoDet & LCNet
if (is_mtmct && scene == "pedestrian") {
det_model_dir_ = "../pedestrian_det";
reid_model_dir_ = "../pedestrian_reid";
} else if (is_mtmct && scene == "vehicle") {
det_model_dir_ = "../vehicle_det";
reid_model_dir_ = "../vehicle_reid";
} else if (is_mtmct && scene == "multiclass") {
throw "Multi-camera tracking is not supported in multiclass scene now.";
}
}
void Pipeline::InitPredictor() {
if (track_model_dir_.empty() && det_model_dir_.empty()) {
throw "Predictor must receive track_model or det_model!";
}
if (!track_model_dir_.empty()) {
jde_sct_ = std::make_shared<PaddleDetection::JDEPredictor>(device_,
track_model_dir_,
threshold_,
run_mode_,
gpu_id_,
use_mkldnn_,
cpu_threads_,
trt_calib_mode_);
}
if (!det_model_dir_.empty()) {
sde_sct_ = std::make_shared<PaddleDetection::SDEPredictor>(device_,
det_model_dir_,
reid_model_dir_,
threshold_,
run_mode_,
gpu_id_,
use_mkldnn_,
cpu_threads_,
trt_calib_mode_);
}
}
void Pipeline::Run() {
if (track_model_dir_.empty() && det_model_dir_.empty()) {
LOG(ERROR) << "Pipeline must use SelectModel before Run";
return;
}
if (input_.size() == 0) {
LOG(ERROR) << "Pipeline must use SetInput before Run";
return;
}
if (!track_model_dir_.empty()) {
// single camera
if (input_.size() > 1) {
throw "Single camera tracking except single video, but received %d",
input_.size();
}
PredictMOT(input_[0]);
} else {
// multi cameras
if (input_.size() != 2) {
throw "Multi camera tracking except two videos, but received %d",
input_.size();
}
PredictMTMCT(input_);
}
}
void Pipeline::PredictMOT(const std::string& video_path) {
// Open video
cv::VideoCapture capture;
capture.open(video_path.c_str());
if (!capture.isOpened()) {
printf("can not open video : %s\n", video_path.c_str());
return;
}
// Get Video info : resolution, fps
int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH));
int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));
LOG(INFO) << "----------------------- Input info -----------------------";
LOG(INFO) << "video_width: " << video_width;
LOG(INFO) << "video_height: " << video_height;
LOG(INFO) << "input fps: " << video_fps;
// Create VideoWriter for output
cv::VideoWriter video_out;
std::string video_out_path = output_dir_ + OS_PATH_SEP + "mot_output.mp4";
int fcc = cv::VideoWriter::fourcc('m', 'p', '4', 'v');
video_out.open(video_out_path.c_str(),
fcc, // 0x00000021,
video_fps,
cv::Size(video_width, video_height),
true);
if (!video_out.isOpened()) {
printf("create video writer failed!\n");
return;
}
PaddleDetection::MOTResult result;
std::vector<double> det_times(3);
std::set<int> id_set;
std::set<int> interval_id_set;
std::vector<int> in_id_list;
std::vector<int> out_id_list;
std::map<int, std::vector<float>> prev_center;
Rect entrance = {0,
static_cast<float>(video_height) / 2,
static_cast<float>(video_width),
static_cast<float>(video_height) / 2};
double times;
double total_time;
// Capture all frames and do inference
cv::Mat frame;
int frame_id = 0;
std::vector<std::string> records;
std::vector<std::string> flow_records;
records.push_back("result format: frame_id, track_id, x1, y1, w, h\n");
LOG(INFO) << "------------------- Predict info ------------------------";
while (capture.read(frame)) {
if (frame.empty()) {
break;
}
std::vector<cv::Mat> imgs;
imgs.push_back(frame);
jde_sct_->Predict(imgs, threshold_, &result, &det_times);
frame_id += 1;
total_time = std::accumulate(det_times.begin(), det_times.end(), 0.);
times = total_time / frame_id;
LOG(INFO) << "frame_id: " << frame_id
<< " predict time(s): " << total_time / 1000;
cv::Mat out_img = PaddleDetection::VisualizeTrackResult(
frame, result, 1000. / times, frame_id);
// TODO(qianhui): the entrance line can be set by users
PaddleDetection::FlowStatistic(result,
frame_id,
secs_interval_,
do_entrance_counting_,
video_fps,
entrance,
&id_set,
&interval_id_set,
&in_id_list,
&out_id_list,
&prev_center,
&flow_records);
if (save_result_) {
PaddleDetection::SaveMOTResult(result, frame_id, &records);
}
// Draw the entrance line
if (do_entrance_counting_) {
float line_thickness = std::max(1, static_cast<int>(video_width / 500.));
cv::Point pt1 = cv::Point(entrance.left, entrance.top);
cv::Point pt2 = cv::Point(entrance.right, entrance.bottom);
cv::line(out_img, pt1, pt2, cv::Scalar(0, 255, 255), line_thickness);
}
video_out.write(out_img);
}
capture.release();
video_out.release();
PrintBenchmarkLog(det_times, frame_id);
LOG(INFO) << "-------------------- Final Output info -------------------";
LOG(INFO) << "Total frame: " << frame_id;
LOG(INFO) << "Visualized output saved as " << video_out_path.c_str();
if (save_result_) {
FILE* fp;
std::string result_output_path =
output_dir_ + OS_PATH_SEP + "mot_output.txt";
if ((fp = fopen(result_output_path.c_str(), "w+")) == NULL) {
printf("Open %s error.\n", result_output_path.c_str());
return;
}
for (int l; l < records.size(); ++l) {
fprintf(fp, records[l].c_str());
}
fclose(fp);
LOG(INFO) << "txt result output saved as " << result_output_path.c_str();
result_output_path = output_dir_ + OS_PATH_SEP + "flow_statistic.txt";
if ((fp = fopen(result_output_path.c_str(), "w+")) == NULL) {
printf("Open %s error.\n", result_output_path);
return;
}
for (int l; l < flow_records.size(); ++l) {
fprintf(fp, flow_records[l].c_str());
}
fclose(fp);
LOG(INFO) << "txt flow statistic saved as " << result_output_path.c_str();
}
}
void Pipeline::PredictMTMCT(const std::vector<std::string> video_path) {
throw "Not Implement!";
}
void Pipeline::RunMOTStream(const cv::Mat img,
const int frame_id,
const int video_fps,
const Rect entrance,
cv::Mat out_img,
std::vector<std::string>* records,
std::set<int>* id_set,
std::set<int>* interval_id_set,
std::vector<int>* in_id_list,
std::vector<int>* out_id_list,
std::map<int, std::vector<float>>* prev_center,
std::vector<std::string>* flow_records) {
PaddleDetection::MOTResult result;
std::vector<double> det_times(3);
double times;
double total_time;
LOG(INFO) << "------------------- Predict info ------------------------";
std::vector<cv::Mat> imgs;
imgs.push_back(img);
jde_sct_->Predict(imgs, threshold_, &result, &det_times);
total_time = std::accumulate(det_times.begin(), det_times.end(), 0.);
times = total_time / frame_id;
LOG(INFO) << "frame_id: " << frame_id
<< " predict time(s): " << total_time / 1000;
out_img = PaddleDetection::VisualizeTrackResult(
img, result, 1000. / times, frame_id);
// Count total number
// Count in & out number
PaddleDetection::FlowStatistic(result,
frame_id,
secs_interval_,
do_entrance_counting_,
video_fps,
entrance,
id_set,
interval_id_set,
in_id_list,
out_id_list,
prev_center,
flow_records);
PrintBenchmarkLog(det_times, frame_id);
if (save_result_) {
PaddleDetection::SaveMOTResult(result, frame_id, records);
}
}
void Pipeline::RunMTMCTStream(const std::vector<cv::Mat> imgs,
std::vector<std::string>* records) {
throw "Not Implement!";
}
void Pipeline::PrintBenchmarkLog(const std::vector<double> det_time,
const int img_num) {
LOG(INFO) << "----------------------- Config info -----------------------";
LOG(INFO) << "runtime_device: " << device_;
LOG(INFO) << "ir_optim: "
<< "True";
LOG(INFO) << "enable_memory_optim: "
<< "True";
int has_trt = run_mode_.find("trt");
if (has_trt >= 0) {
LOG(INFO) << "enable_tensorrt: "
<< "True";
std::string precision = run_mode_.substr(4, 8);
LOG(INFO) << "precision: " << precision;
} else {
LOG(INFO) << "enable_tensorrt: "
<< "False";
LOG(INFO) << "precision: "
<< "fp32";
}
LOG(INFO) << "enable_mkldnn: " << (use_mkldnn_ ? "True" : "False");
LOG(INFO) << "cpu_math_library_num_threads: " << cpu_threads_;
LOG(INFO) << "----------------------- Perf info ------------------------";
LOG(INFO) << "Total number of predicted data: " << img_num
<< " and total time spent(s): "
<< std::accumulate(det_time.begin(), det_time.end(), 0.) / 1000;
int num = std::max(1, img_num);
LOG(INFO) << "preproce_time(ms): " << det_time[0] / num
<< ", inference_time(ms): " << det_time[1] / num
<< ", postprocess_time(ms): " << det_time[2] / num;
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include <iostream>
#include "include/postprocess.h"
namespace PaddleDetection {
cv::Scalar GetColor(int idx) {
idx = idx * 3;
cv::Scalar color =
cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255);
return color;
}
cv::Mat VisualizeTrackResult(const cv::Mat& img,
const MOTResult& results,
const float fps,
const int frame_id) {
cv::Mat vis_img = img.clone();
int im_h = img.rows;
int im_w = img.cols;
float text_scale = std::max(1, static_cast<int>(im_w / 1600.));
float text_thickness = 2.;
float line_thickness = std::max(1, static_cast<int>(im_w / 500.));
std::ostringstream oss;
oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
oss << "frame: " << frame_id << " ";
oss << "fps: " << fps << " ";
oss << "num: " << results.size();
std::string text = oss.str();
cv::Point origin;
origin.x = 0;
origin.y = static_cast<int>(15 * text_scale);
cv::putText(vis_img,
text,
origin,
cv::FONT_HERSHEY_PLAIN,
text_scale,
(0, 0, 255),
2);
for (int i = 0; i < results.size(); ++i) {
const int obj_id = results[i].ids;
const float score = results[i].score;
cv::Scalar color = GetColor(obj_id);
cv::Point pt1 = cv::Point(results[i].rects.left, results[i].rects.top);
cv::Point pt2 = cv::Point(results[i].rects.right, results[i].rects.bottom);
cv::Point id_pt =
cv::Point(results[i].rects.left, results[i].rects.top + 10);
cv::Point score_pt =
cv::Point(results[i].rects.left, results[i].rects.top - 10);
cv::rectangle(vis_img, pt1, pt2, color, line_thickness);
std::ostringstream idoss;
idoss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
idoss << obj_id;
std::string id_text = idoss.str();
cv::putText(vis_img,
id_text,
id_pt,
cv::FONT_HERSHEY_PLAIN,
text_scale,
cv::Scalar(0, 255, 255),
text_thickness);
std::ostringstream soss;
soss << std::setiosflags(std::ios::fixed) << std::setprecision(2);
soss << score;
std::string score_text = soss.str();
cv::putText(vis_img,
score_text,
score_pt,
cv::FONT_HERSHEY_PLAIN,
text_scale,
cv::Scalar(0, 255, 255),
text_thickness);
}
return vis_img;
}
void FlowStatistic(const MOTResult& results,
const int frame_id,
const int secs_interval,
const bool do_entrance_counting,
const int video_fps,
const Rect entrance,
std::set<int>* id_set,
std::set<int>* interval_id_set,
std::vector<int>* in_id_list,
std::vector<int>* out_id_list,
std::map<int, std::vector<float>>* prev_center,
std::vector<std::string>* records) {
if (frame_id == 0) interval_id_set->clear();
if (do_entrance_counting) {
// Count in and out number:
// Use horizontal center line as the entrance just for simplification.
// If a person located in the above the horizontal center line
// at the previous frame and is in the below the line at the current frame,
// the in number is increased by one.
// If a person was in the below the horizontal center line
// at the previous frame and locates in the below the line at the current
// frame,
// the out number is increased by one.
// TODO(qianhui): if the entrance is not the horizontal center line,
// the counting method should be optimized.
float entrance_y = entrance.top;
for (const auto& result : results) {
float center_x = (result.rects.left + result.rects.right) / 2;
float center_y = (result.rects.top + result.rects.bottom) / 2;
int ids = result.ids;
std::map<int, std::vector<float>>::iterator iter;
iter = prev_center->find(ids);
if (iter != prev_center->end()) {
if (iter->second[1] <= entrance_y && center_y > entrance_y) {
in_id_list->push_back(ids);
}
if (iter->second[1] >= entrance_y && center_y < entrance_y) {
out_id_list->push_back(ids);
}
(*prev_center)[ids][0] = center_x;
(*prev_center)[ids][1] = center_y;
} else {
prev_center->insert(
std::pair<int, std::vector<float>>(ids, {center_x, center_y}));
}
}
}
// Count totol number, number at a manual-setting interval
for (const auto& result : results) {
id_set->insert(result.ids);
interval_id_set->insert(result.ids);
}
std::ostringstream os;
os << "Frame id: " << frame_id << ", Total count: " << id_set->size();
if (do_entrance_counting) {
os << ", In count: " << in_id_list->size()
<< ", Out count: " << out_id_list->size();
}
// Reset counting at the interval beginning
int curr_interval_count = -1;
if (frame_id % video_fps == 0 && frame_id / video_fps % secs_interval == 0) {
curr_interval_count = interval_id_set->size();
os << ", Count during " << secs_interval
<< " secs: " << curr_interval_count;
interval_id_set->clear();
}
os << "\n";
std::string record = os.str();
records->push_back(record);
LOG(INFO) << record;
}
void SaveMOTResult(const MOTResult& results,
const int frame_id,
std::vector<std::string>* records) {
// result format: frame_id, track_id, x1, y1, w, h
std::string record;
for (int i = 0; i < results.size(); ++i) {
MOTTrack mot_track = results[i];
int ids = mot_track.ids;
float score = mot_track.score;
Rect rects = mot_track.rects;
float x1 = rects.left;
float y1 = rects.top;
float x2 = rects.right;
float y2 = rects.bottom;
float w = x2 - x1;
float h = y2 - y1;
if (w == 0 || h == 0) {
continue;
}
std::ostringstream os;
os << frame_id << " " << ids << "" << x1 << " " << y1 << " " << w << " "
<< h << "\n";
record = os.str();
records->push_back(record);
}
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include "include/predictor.h"
using namespace paddle_infer; // NOLINT
namespace PaddleDetection {
void Predictor::Predict(const std::vector<cv::Mat> imgs,
const double threshold,
MOTResult* result,
std::vector<double>* times) {
if (use_jde_) {
jde_sct_->Predict(imgs, threshold, result, times);
} else {
sde_sct_->Predict(imgs, threshold, result, times);
}
}
} // namespace PaddleDetection
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <thread>
#include <vector>
#include "include/preprocess_op.h"
namespace PaddleDetection {
void InitInfo::Run(cv::Mat* im, ImageBlob* data) {
data->im_shape_ = {static_cast<float>(im->rows),
static_cast<float>(im->cols)};
data->scale_factor_ = {1., 1.};
data->in_net_shape_ = {static_cast<float>(im->rows),
static_cast<float>(im->cols)};
}
void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) {
double e = 1.0;
if (is_scale_) {
e /= 255.0;
}
(*im).convertTo(*im, CV_32FC3, e);
for (int h = 0; h < im->rows; h++) {
for (int w = 0; w < im->cols; w++) {
im->at<cv::Vec3f>(h, w)[0] =
(im->at<cv::Vec3f>(h, w)[0] - mean_[0]) / scale_[0];
im->at<cv::Vec3f>(h, w)[1] =
(im->at<cv::Vec3f>(h, w)[1] - mean_[1]) / scale_[1];
im->at<cv::Vec3f>(h, w)[2] =
(im->at<cv::Vec3f>(h, w)[2] - mean_[2]) / scale_[2];
}
}
}
void Permute::Run(cv::Mat* im, ImageBlob* data) {
(*im).convertTo(*im, CV_32FC3);
int rh = im->rows;
int rw = im->cols;
int rc = im->channels();
(data->im_data_).resize(rc * rh * rw);
float* base = (data->im_data_).data();
for (int i = 0; i < rc; ++i) {
cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i);
}
}
void Resize::Run(cv::Mat* im, ImageBlob* data) {
auto resize_scale = GenerateScale(*im);
data->im_shape_ = {static_cast<float>(im->cols * resize_scale.first),
static_cast<float>(im->rows * resize_scale.second)};
data->in_net_shape_ = {static_cast<float>(im->cols * resize_scale.first),
static_cast<float>(im->rows * resize_scale.second)};
cv::resize(
*im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_);
data->im_shape_ = {
static_cast<float>(im->rows), static_cast<float>(im->cols),
};
data->scale_factor_ = {
resize_scale.second, resize_scale.first,
};
}
std::pair<float, float> Resize::GenerateScale(const cv::Mat& im) {
std::pair<float, float> resize_scale;
int origin_w = im.cols;
int origin_h = im.rows;
if (keep_ratio_) {
int im_size_max = std::max(origin_w, origin_h);
int im_size_min = std::min(origin_w, origin_h);
int target_size_max =
*std::max_element(target_size_.begin(), target_size_.end());
int target_size_min =
*std::min_element(target_size_.begin(), target_size_.end());
float scale_min =
static_cast<float>(target_size_min) / static_cast<float>(im_size_min);
float scale_max =
static_cast<float>(target_size_max) / static_cast<float>(im_size_max);
float scale_ratio = std::min(scale_min, scale_max);
resize_scale = {scale_ratio, scale_ratio};
} else {
resize_scale.first =
static_cast<float>(target_size_[1]) / static_cast<float>(origin_w);
resize_scale.second =
static_cast<float>(target_size_[0]) / static_cast<float>(origin_h);
}
return resize_scale;
}
void LetterBoxResize::Run(cv::Mat* im, ImageBlob* data) {
float resize_scale = GenerateScale(*im);
int new_shape_w = std::round(im->cols * resize_scale);
int new_shape_h = std::round(im->rows * resize_scale);
data->im_shape_ = {static_cast<float>(new_shape_h),
static_cast<float>(new_shape_w)};
float padw = (target_size_[1] - new_shape_w) / 2.;
float padh = (target_size_[0] - new_shape_h) / 2.;
int top = std::round(padh - 0.1);
int bottom = std::round(padh + 0.1);
int left = std::round(padw - 0.1);
int right = std::round(padw + 0.1);
cv::resize(
*im, *im, cv::Size(new_shape_w, new_shape_h), 0, 0, cv::INTER_AREA);
data->in_net_shape_ = {
static_cast<float>(im->rows), static_cast<float>(im->cols),
};
cv::copyMakeBorder(*im,
*im,
top,
bottom,
left,
right,
cv::BORDER_CONSTANT,
cv::Scalar(127.5));
data->in_net_shape_ = {
static_cast<float>(im->rows), static_cast<float>(im->cols),
};
data->scale_factor_ = {
resize_scale, resize_scale,
};
}
float LetterBoxResize::GenerateScale(const cv::Mat& im) {
int origin_w = im.cols;
int origin_h = im.rows;
int target_h = target_size_[0];
int target_w = target_size_[1];
float ratio_h = static_cast<float>(target_h) / static_cast<float>(origin_h);
float ratio_w = static_cast<float>(target_w) / static_cast<float>(origin_w);
float resize_scale = std::min(ratio_h, ratio_w);
return resize_scale;
}
void PadStride::Run(cv::Mat* im, ImageBlob* data) {
if (stride_ <= 0) {
return;
}
int rc = im->channels();
int rh = im->rows;
int rw = im->cols;
int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_;
int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_;
cv::copyMakeBorder(
*im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT, cv::Scalar(0));
data->in_net_shape_ = {
static_cast<float>(im->rows), static_cast<float>(im->cols),
};
}
// Preprocessor op running order
const std::vector<std::string> Preprocessor::RUN_ORDER = {"InitInfo",
"Resize",
"LetterBoxResize",
"NormalizeImage",
"PadStride",
"Permute"};
void Preprocessor::Run(cv::Mat* im, ImageBlob* data) {
for (const auto& name : RUN_ORDER) {
if (ops_.find(name) != ops_.end()) {
ops_[name]->Run(im, data);
}
}
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <sstream>
// for setprecision
#include <chrono>
#include <iomanip>
#include "include/sde_predictor.h"
using namespace paddle_infer; // NOLINT
namespace PaddleDetection {
// Load Model and create model predictor
void SDEPredictor::LoadModel(const std::string& det_model_dir,
const std::string& reid_model_dir,
const std::string& run_mode) {
throw "Not Implement";
}
void SDEPredictor::Preprocess(const cv::Mat& ori_im) { throw "Not Implement"; }
void SDEPredictor::Postprocess(const cv::Mat dets,
const cv::Mat emb,
MOTResult* result) {
throw "Not Implement";
}
void SDEPredictor::Predict(const std::vector<cv::Mat> imgs,
const double threshold,
MOTResult* result,
std::vector<double>* times) {
throw "Not Implement";
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The code is based on:
// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.cpp
// Ths copyright of CnybTseng/JDE is as follows:
// MIT License
#include <limits.h>
#include <stdio.h>
#include <algorithm>
#include <map>
#include "include/lapjv.h"
#include "include/tracker.h"
#define mat2vec4f(m) \
cv::Vec4f(*m.ptr<float>(0, 0), \
*m.ptr<float>(0, 1), \
*m.ptr<float>(0, 2), \
*m.ptr<float>(0, 3))
namespace PaddleDetection {
static std::map<int, float> chi2inv95 = {{1, 3.841459f},
{2, 5.991465f},
{3, 7.814728f},
{4, 9.487729f},
{5, 11.070498f},
{6, 12.591587f},
{7, 14.067140f},
{8, 15.507313f},
{9, 16.918978f}};
JDETracker *JDETracker::me = new JDETracker;
JDETracker *JDETracker::instance(void) { return me; }
JDETracker::JDETracker(void)
: timestamp(0), max_lost_time(30), lambda(0.98f), det_thresh(0.3f) {}
bool JDETracker::update(const cv::Mat &dets,
const cv::Mat &emb,
std::vector<Track> *tracks) {
++timestamp;
TrajectoryPool candidates(dets.rows);
for (int i = 0; i < dets.rows; ++i) {
float score = *dets.ptr<float>(i, 4);
const cv::Mat &ltrb_ = dets(cv::Rect(0, i, 4, 1));
cv::Vec4f ltrb = mat2vec4f(ltrb_);
const cv::Mat &embedding = emb(cv::Rect(0, i, emb.cols, 1));
candidates[i] = Trajectory(ltrb, score, embedding);
}
TrajectoryPtrPool tracked_trajectories;
TrajectoryPtrPool unconfirmed_trajectories;
for (size_t i = 0; i < this->tracked_trajectories.size(); ++i) {
if (this->tracked_trajectories[i].is_activated)
tracked_trajectories.push_back(&this->tracked_trajectories[i]);
else
unconfirmed_trajectories.push_back(&this->tracked_trajectories[i]);
}
TrajectoryPtrPool trajectory_pool =
tracked_trajectories + &(this->lost_trajectories);
for (size_t i = 0; i < trajectory_pool.size(); ++i)
trajectory_pool[i]->predict();
Match matches;
std::vector<int> mismatch_row;
std::vector<int> mismatch_col;
cv::Mat cost = motion_distance(trajectory_pool, candidates);
linear_assignment(cost, 0.7f, &matches, &mismatch_row, &mismatch_col);
MatchIterator miter;
TrajectoryPtrPool activated_trajectories;
TrajectoryPtrPool retrieved_trajectories;
for (miter = matches.begin(); miter != matches.end(); miter++) {
Trajectory *pt = trajectory_pool[miter->first];
Trajectory &ct = candidates[miter->second];
if (pt->state == Tracked) {
pt->update(&ct, timestamp);
activated_trajectories.push_back(pt);
} else {
pt->reactivate(&ct, timestamp);
retrieved_trajectories.push_back(pt);
}
}
TrajectoryPtrPool next_candidates(mismatch_col.size());
for (size_t i = 0; i < mismatch_col.size(); ++i)
next_candidates[i] = &candidates[mismatch_col[i]];
TrajectoryPtrPool next_trajectory_pool;
for (size_t i = 0; i < mismatch_row.size(); ++i) {
int j = mismatch_row[i];
if (trajectory_pool[j]->state == Tracked)
next_trajectory_pool.push_back(trajectory_pool[j]);
}
cost = iou_distance(next_trajectory_pool, next_candidates);
linear_assignment(cost, 0.5f, &matches, &mismatch_row, &mismatch_col);
for (miter = matches.begin(); miter != matches.end(); miter++) {
Trajectory *pt = next_trajectory_pool[miter->first];
Trajectory *ct = next_candidates[miter->second];
if (pt->state == Tracked) {
pt->update(ct, timestamp);
activated_trajectories.push_back(pt);
} else {
pt->reactivate(ct, timestamp);
retrieved_trajectories.push_back(pt);
}
}
TrajectoryPtrPool lost_trajectories;
for (size_t i = 0; i < mismatch_row.size(); ++i) {
Trajectory *pt = next_trajectory_pool[mismatch_row[i]];
if (pt->state != Lost) {
pt->mark_lost();
lost_trajectories.push_back(pt);
}
}
TrajectoryPtrPool nnext_candidates(mismatch_col.size());
for (size_t i = 0; i < mismatch_col.size(); ++i)
nnext_candidates[i] = next_candidates[mismatch_col[i]];
cost = iou_distance(unconfirmed_trajectories, nnext_candidates);
linear_assignment(cost, 0.7f, &matches, &mismatch_row, &mismatch_col);
for (miter = matches.begin(); miter != matches.end(); miter++) {
unconfirmed_trajectories[miter->first]->update(
nnext_candidates[miter->second], timestamp);
activated_trajectories.push_back(unconfirmed_trajectories[miter->first]);
}
TrajectoryPtrPool removed_trajectories;
for (size_t i = 0; i < mismatch_row.size(); ++i) {
unconfirmed_trajectories[mismatch_row[i]]->mark_removed();
removed_trajectories.push_back(unconfirmed_trajectories[mismatch_row[i]]);
}
for (size_t i = 0; i < mismatch_col.size(); ++i) {
if (nnext_candidates[mismatch_col[i]]->score < det_thresh) continue;
nnext_candidates[mismatch_col[i]]->activate(timestamp);
activated_trajectories.push_back(nnext_candidates[mismatch_col[i]]);
}
for (size_t i = 0; i < this->lost_trajectories.size(); ++i) {
Trajectory &lt = this->lost_trajectories[i];
if (timestamp - lt.timestamp > max_lost_time) {
lt.mark_removed();
removed_trajectories.push_back(&lt);
}
}
TrajectoryPoolIterator piter;
for (piter = this->tracked_trajectories.begin();
piter != this->tracked_trajectories.end();) {
if (piter->state != Tracked)
piter = this->tracked_trajectories.erase(piter);
else
++piter;
}
this->tracked_trajectories += activated_trajectories;
this->tracked_trajectories += retrieved_trajectories;
this->lost_trajectories -= this->tracked_trajectories;
this->lost_trajectories += lost_trajectories;
this->lost_trajectories -= this->removed_trajectories;
this->removed_trajectories += removed_trajectories;
remove_duplicate_trajectory(&this->tracked_trajectories,
&this->lost_trajectories);
tracks->clear();
for (size_t i = 0; i < this->tracked_trajectories.size(); ++i) {
if (this->tracked_trajectories[i].is_activated) {
Track track = {.id = this->tracked_trajectories[i].id,
.score = this->tracked_trajectories[i].score,
.ltrb = this->tracked_trajectories[i].ltrb};
tracks->push_back(track);
}
}
return 0;
}
cv::Mat JDETracker::motion_distance(const TrajectoryPtrPool &a,
const TrajectoryPool &b) {
if (0 == a.size() || 0 == b.size())
return cv::Mat(a.size(), b.size(), CV_32F);
cv::Mat edists = embedding_distance(a, b);
cv::Mat mdists = mahalanobis_distance(a, b);
cv::Mat fdists = lambda * edists + (1 - lambda) * mdists;
const float gate_thresh = chi2inv95[4];
for (int i = 0; i < fdists.rows; ++i) {
for (int j = 0; j < fdists.cols; ++j) {
if (*mdists.ptr<float>(i, j) > gate_thresh)
*fdists.ptr<float>(i, j) = FLT_MAX;
}
}
return fdists;
}
void JDETracker::linear_assignment(const cv::Mat &cost,
float cost_limit,
Match *matches,
std::vector<int> *mismatch_row,
std::vector<int> *mismatch_col) {
matches->clear();
mismatch_row->clear();
mismatch_col->clear();
if (cost.empty()) {
for (int i = 0; i < cost.rows; ++i) mismatch_row->push_back(i);
for (int i = 0; i < cost.cols; ++i) mismatch_col->push_back(i);
return;
}
float opt = 0;
cv::Mat x(cost.rows, 1, CV_32S);
cv::Mat y(cost.cols, 1, CV_32S);
lapjv_internal(cost,
true,
cost_limit,
reinterpret_cast<int *>(x.data),
reinterpret_cast<int *>(y.data));
for (int i = 0; i < x.rows; ++i) {
int j = *x.ptr<int>(i);
if (j >= 0)
matches->insert({i, j});
else
mismatch_row->push_back(i);
}
for (int i = 0; i < y.rows; ++i) {
int j = *y.ptr<int>(i);
if (j < 0) mismatch_col->push_back(i);
}
return;
}
void JDETracker::remove_duplicate_trajectory(TrajectoryPool *a,
TrajectoryPool *b,
float iou_thresh) {
if (a->size() == 0 || b->size() == 0) return;
cv::Mat dist = iou_distance(*a, *b);
cv::Mat mask = dist < iou_thresh;
std::vector<cv::Point> idx;
cv::findNonZero(mask, idx);
std::vector<int> da;
std::vector<int> db;
for (size_t i = 0; i < idx.size(); ++i) {
int ta = (*a)[idx[i].y].timestamp - (*a)[idx[i].y].starttime;
int tb = (*b)[idx[i].x].timestamp - (*b)[idx[i].x].starttime;
if (ta > tb)
db.push_back(idx[i].x);
else
da.push_back(idx[i].y);
}
int id = 0;
TrajectoryPoolIterator piter;
for (piter = a->begin(); piter != a->end();) {
std::vector<int>::iterator iter = find(da.begin(), da.end(), id++);
if (iter != da.end())
piter = a->erase(piter);
else
++piter;
}
id = 0;
for (piter = b->begin(); piter != b->end();) {
std::vector<int>::iterator iter = find(db.begin(), db.end(), id++);
if (iter != db.end())
piter = b->erase(piter);
else
++piter;
}
}
} // namespace PaddleDetection
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// The code is based on:
// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.cpp
// Ths copyright of CnybTseng/JDE is as follows:
// MIT License
#include "include/trajectory.h"
#include <algorithm>
namespace PaddleDetection {
void TKalmanFilter::init(const cv::Mat &measurement) {
measurement.copyTo(statePost(cv::Rect(0, 0, 1, 4)));
statePost(cv::Rect(0, 4, 1, 4)).setTo(0);
statePost.copyTo(statePre);
float varpos = 2 * std_weight_position * (*measurement.ptr<float>(3));
varpos *= varpos;
float varvel = 10 * std_weight_velocity * (*measurement.ptr<float>(3));
varvel *= varvel;
errorCovPost.setTo(0);
*errorCovPost.ptr<float>(0, 0) = varpos;
*errorCovPost.ptr<float>(1, 1) = varpos;
*errorCovPost.ptr<float>(2, 2) = 1e-4f;
*errorCovPost.ptr<float>(3, 3) = varpos;
*errorCovPost.ptr<float>(4, 4) = varvel;
*errorCovPost.ptr<float>(5, 5) = varvel;
*errorCovPost.ptr<float>(6, 6) = 1e-10f;
*errorCovPost.ptr<float>(7, 7) = varvel;
errorCovPost.copyTo(errorCovPre);
}
const cv::Mat &TKalmanFilter::predict() {
float varpos = std_weight_position * (*statePre.ptr<float>(3));
varpos *= varpos;
float varvel = std_weight_velocity * (*statePre.ptr<float>(3));
varvel *= varvel;
processNoiseCov.setTo(0);
*processNoiseCov.ptr<float>(0, 0) = varpos;
*processNoiseCov.ptr<float>(1, 1) = varpos;
*processNoiseCov.ptr<float>(2, 2) = 1e-4f;
*processNoiseCov.ptr<float>(3, 3) = varpos;
*processNoiseCov.ptr<float>(4, 4) = varvel;
*processNoiseCov.ptr<float>(5, 5) = varvel;
*processNoiseCov.ptr<float>(6, 6) = 1e-10f;
*processNoiseCov.ptr<float>(7, 7) = varvel;
return cv::KalmanFilter::predict();
}
const cv::Mat &TKalmanFilter::correct(const cv::Mat &measurement) {
float varpos = std_weight_position * (*measurement.ptr<float>(3));
varpos *= varpos;
measurementNoiseCov.setTo(0);
*measurementNoiseCov.ptr<float>(0, 0) = varpos;
*measurementNoiseCov.ptr<float>(1, 1) = varpos;
*measurementNoiseCov.ptr<float>(2, 2) = 1e-2f;
*measurementNoiseCov.ptr<float>(3, 3) = varpos;
return cv::KalmanFilter::correct(measurement);
}
void TKalmanFilter::project(cv::Mat *mean, cv::Mat *covariance) const {
float varpos = std_weight_position * (*statePost.ptr<float>(3));
varpos *= varpos;
cv::Mat measurementNoiseCov_ = cv::Mat::eye(4, 4, CV_32F);
*measurementNoiseCov_.ptr<float>(0, 0) = varpos;
*measurementNoiseCov_.ptr<float>(1, 1) = varpos;
*measurementNoiseCov_.ptr<float>(2, 2) = 1e-2f;
*measurementNoiseCov_.ptr<float>(3, 3) = varpos;
*mean = measurementMatrix * statePost;
cv::Mat temp = measurementMatrix * errorCovPost;
gemm(temp,
measurementMatrix,
1,
measurementNoiseCov_,
1,
*covariance,
cv::GEMM_2_T);
}
int Trajectory::count = 0;
const cv::Mat &Trajectory::predict(void) {
if (state != Tracked) *cv::KalmanFilter::statePost.ptr<float>(7) = 0;
return TKalmanFilter::predict();
}
void Trajectory::update(Trajectory *traj,
int timestamp_,
bool update_embedding_) {
timestamp = timestamp_;
++length;
ltrb = traj->ltrb;
xyah = traj->xyah;
TKalmanFilter::correct(cv::Mat(traj->xyah));
state = Tracked;
is_activated = true;
score = traj->score;
if (update_embedding_) update_embedding(traj->current_embedding);
}
void Trajectory::activate(int timestamp_) {
id = next_id();
TKalmanFilter::init(cv::Mat(xyah));
length = 0;
state = Tracked;
if (timestamp_ == 1) {
is_activated = true;
}
timestamp = timestamp_;
starttime = timestamp_;
}
void Trajectory::reactivate(Trajectory *traj, int timestamp_, bool newid) {
TKalmanFilter::correct(cv::Mat(traj->xyah));
update_embedding(traj->current_embedding);
length = 0;
state = Tracked;
is_activated = true;
timestamp = timestamp_;
if (newid) id = next_id();
}
void Trajectory::update_embedding(const cv::Mat &embedding) {
current_embedding = embedding / cv::norm(embedding);
if (smooth_embedding.empty()) {
smooth_embedding = current_embedding;
} else {
smooth_embedding = eta * smooth_embedding + (1 - eta) * current_embedding;
}
smooth_embedding = smooth_embedding / cv::norm(smooth_embedding);
}
TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPool &b) {
TrajectoryPool sum;
sum.insert(sum.end(), a.begin(), a.end());
std::vector<int> ids(a.size());
for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i].id;
for (size_t i = 0; i < b.size(); ++i) {
std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i].id);
if (iter == ids.end()) {
sum.push_back(b[i]);
ids.push_back(b[i].id);
}
}
return sum;
}
TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPtrPool &b) {
TrajectoryPool sum;
sum.insert(sum.end(), a.begin(), a.end());
std::vector<int> ids(a.size());
for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i].id;
for (size_t i = 0; i < b.size(); ++i) {
std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i]->id);
if (iter == ids.end()) {
sum.push_back(*b[i]);
ids.push_back(b[i]->id);
}
}
return sum;
}
TrajectoryPool &operator+=(TrajectoryPool &a, // NOLINT
const TrajectoryPtrPool &b) {
std::vector<int> ids(a.size());
for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i].id;
for (size_t i = 0; i < b.size(); ++i) {
if (b[i]->smooth_embedding.empty()) continue;
std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i]->id);
if (iter == ids.end()) {
a.push_back(*b[i]);
ids.push_back(b[i]->id);
}
}
return a;
}
TrajectoryPool operator-(const TrajectoryPool &a, const TrajectoryPool &b) {
TrajectoryPool dif;
std::vector<int> ids(b.size());
for (size_t i = 0; i < b.size(); ++i) ids[i] = b[i].id;
for (size_t i = 0; i < a.size(); ++i) {
std::vector<int>::iterator iter = find(ids.begin(), ids.end(), a[i].id);
if (iter == ids.end()) dif.push_back(a[i]);
}
return dif;
}
TrajectoryPool &operator-=(TrajectoryPool &a, // NOLINT
const TrajectoryPool &b) {
std::vector<int> ids(b.size());
for (size_t i = 0; i < b.size(); ++i) ids[i] = b[i].id;
TrajectoryPoolIterator piter;
for (piter = a.begin(); piter != a.end();) {
std::vector<int>::iterator iter = find(ids.begin(), ids.end(), piter->id);
if (iter == ids.end())
++piter;
else
piter = a.erase(piter);
}
return a;
}
TrajectoryPtrPool operator+(const TrajectoryPtrPool &a,
const TrajectoryPtrPool &b) {
TrajectoryPtrPool sum;
sum.insert(sum.end(), a.begin(), a.end());
std::vector<int> ids(a.size());
for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i]->id;
for (size_t i = 0; i < b.size(); ++i) {
std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i]->id);
if (iter == ids.end()) {
sum.push_back(b[i]);
ids.push_back(b[i]->id);
}
}
return sum;
}
TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, TrajectoryPool *b) {
TrajectoryPtrPool sum;
sum.insert(sum.end(), a.begin(), a.end());
std::vector<int> ids(a.size());
for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i]->id;
for (size_t i = 0; i < b->size(); ++i) {
std::vector<int>::iterator iter = find(ids.begin(), ids.end(), (*b)[i].id);
if (iter == ids.end()) {
sum.push_back(&(*b)[i]);
ids.push_back((*b)[i].id);
}
}
return sum;
}
TrajectoryPtrPool operator-(const TrajectoryPtrPool &a,
const TrajectoryPtrPool &b) {
TrajectoryPtrPool dif;
std::vector<int> ids(b.size());
for (size_t i = 0; i < b.size(); ++i) ids[i] = b[i]->id;
for (size_t i = 0; i < a.size(); ++i) {
std::vector<int>::iterator iter = find(ids.begin(), ids.end(), a[i]->id);
if (iter == ids.end()) dif.push_back(a[i]);
}
return dif;
}
cv::Mat embedding_distance(const TrajectoryPool &a, const TrajectoryPool &b) {
cv::Mat dists(a.size(), b.size(), CV_32F);
for (size_t i = 0; i < a.size(); ++i) {
float *distsi = dists.ptr<float>(i);
for (size_t j = 0; j < b.size(); ++j) {
cv::Mat u = a[i].smooth_embedding;
cv::Mat v = b[j].smooth_embedding;
double uv = u.dot(v);
double uu = u.dot(u);
double vv = v.dot(v);
double dist = std::abs(1. - uv / std::sqrt(uu * vv));
// double dist = cv::norm(a[i].smooth_embedding, b[j].smooth_embedding,
// cv::NORM_L2);
distsi[j] = static_cast<float>(std::max(std::min(dist, 2.), 0.));
}
}
return dists;
}
cv::Mat embedding_distance(const TrajectoryPtrPool &a,
const TrajectoryPtrPool &b) {
cv::Mat dists(a.size(), b.size(), CV_32F);
for (size_t i = 0; i < a.size(); ++i) {
float *distsi = dists.ptr<float>(i);
for (size_t j = 0; j < b.size(); ++j) {
// double dist = cv::norm(a[i]->smooth_embedding, b[j]->smooth_embedding,
// cv::NORM_L2);
// distsi[j] = static_cast<float>(dist);
cv::Mat u = a[i]->smooth_embedding;
cv::Mat v = b[j]->smooth_embedding;
double uv = u.dot(v);
double uu = u.dot(u);
double vv = v.dot(v);
double dist = std::abs(1. - uv / std::sqrt(uu * vv));
distsi[j] = static_cast<float>(std::max(std::min(dist, 2.), 0.));
}
}
return dists;
}
cv::Mat embedding_distance(const TrajectoryPtrPool &a,
const TrajectoryPool &b) {
cv::Mat dists(a.size(), b.size(), CV_32F);
for (size_t i = 0; i < a.size(); ++i) {
float *distsi = dists.ptr<float>(i);
for (size_t j = 0; j < b.size(); ++j) {
// double dist = cv::norm(a[i]->smooth_embedding, b[j].smooth_embedding,
// cv::NORM_L2);
// distsi[j] = static_cast<float>(dist);
cv::Mat u = a[i]->smooth_embedding;
cv::Mat v = b[j].smooth_embedding;
double uv = u.dot(v);
double uu = u.dot(u);
double vv = v.dot(v);
double dist = std::abs(1. - uv / std::sqrt(uu * vv));
distsi[j] = static_cast<float>(std::max(std::min(dist, 2.), 0.));
}
}
return dists;
}
cv::Mat mahalanobis_distance(const TrajectoryPool &a, const TrajectoryPool &b) {
std::vector<cv::Mat> means(a.size());
std::vector<cv::Mat> icovariances(a.size());
for (size_t i = 0; i < a.size(); ++i) {
cv::Mat covariance;
a[i].project(&means[i], &covariance);
cv::invert(covariance, icovariances[i]);
}
cv::Mat dists(a.size(), b.size(), CV_32F);
for (size_t i = 0; i < a.size(); ++i) {
float *distsi = dists.ptr<float>(i);
for (size_t j = 0; j < b.size(); ++j) {
const cv::Mat x(b[j].xyah);
float dist =
static_cast<float>(cv::Mahalanobis(x, means[i], icovariances[i]));
distsi[j] = dist * dist;
}
}
return dists;
}
cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
const TrajectoryPtrPool &b) {
std::vector<cv::Mat> means(a.size());
std::vector<cv::Mat> icovariances(a.size());
for (size_t i = 0; i < a.size(); ++i) {
cv::Mat covariance;
a[i]->project(&means[i], &covariance);
cv::invert(covariance, icovariances[i]);
}
cv::Mat dists(a.size(), b.size(), CV_32F);
for (size_t i = 0; i < a.size(); ++i) {
float *distsi = dists.ptr<float>(i);
for (size_t j = 0; j < b.size(); ++j) {
const cv::Mat x(b[j]->xyah);
float dist =
static_cast<float>(cv::Mahalanobis(x, means[i], icovariances[i]));
distsi[j] = dist * dist;
}
}
return dists;
}
cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
const TrajectoryPool &b) {
std::vector<cv::Mat> means(a.size());
std::vector<cv::Mat> icovariances(a.size());
for (size_t i = 0; i < a.size(); ++i) {
cv::Mat covariance;
a[i]->project(&means[i], &covariance);
cv::invert(covariance, icovariances[i]);
}
cv::Mat dists(a.size(), b.size(), CV_32F);
for (size_t i = 0; i < a.size(); ++i) {
float *distsi = dists.ptr<float>(i);
for (size_t j = 0; j < b.size(); ++j) {
const cv::Mat x(b[j].xyah);
float dist =
static_cast<float>(cv::Mahalanobis(x, means[i], icovariances[i]));
distsi[j] = dist * dist;
}
}
return dists;
}
static inline float calc_inter_area(const cv::Vec4f &a, const cv::Vec4f &b) {
if (a[2] < b[0] || a[0] > b[2] || a[3] < b[1] || a[1] > b[3]) return 0.f;
float w = std::min(a[2], b[2]) - std::max(a[0], b[0]);
float h = std::min(a[3], b[3]) - std::max(a[1], b[1]);
return w * h;
}
cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b) {
std::vector<float> areaa(a.size());
for (size_t i = 0; i < a.size(); ++i) {
float w = a[i].ltrb[2] - a[i].ltrb[0];
float h = a[i].ltrb[3] - a[i].ltrb[1];
areaa[i] = w * h;
}
std::vector<float> areab(b.size());
for (size_t j = 0; j < b.size(); ++j) {
float w = b[j].ltrb[2] - b[j].ltrb[0];
float h = b[j].ltrb[3] - b[j].ltrb[1];
areab[j] = w * h;
}
cv::Mat dists(a.size(), b.size(), CV_32F);
for (size_t i = 0; i < a.size(); ++i) {
const cv::Vec4f &boxa = a[i].ltrb;
float *distsi = dists.ptr<float>(i);
for (size_t j = 0; j < b.size(); ++j) {
const cv::Vec4f &boxb = b[j].ltrb;
float inters = calc_inter_area(boxa, boxb);
distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters);
}
}
return dists;
}
cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPtrPool &b) {
std::vector<float> areaa(a.size());
for (size_t i = 0; i < a.size(); ++i) {
float w = a[i]->ltrb[2] - a[i]->ltrb[0];
float h = a[i]->ltrb[3] - a[i]->ltrb[1];
areaa[i] = w * h;
}
std::vector<float> areab(b.size());
for (size_t j = 0; j < b.size(); ++j) {
float w = b[j]->ltrb[2] - b[j]->ltrb[0];
float h = b[j]->ltrb[3] - b[j]->ltrb[1];
areab[j] = w * h;
}
cv::Mat dists(a.size(), b.size(), CV_32F);
for (size_t i = 0; i < a.size(); ++i) {
const cv::Vec4f &boxa = a[i]->ltrb;
float *distsi = dists.ptr<float>(i);
for (size_t j = 0; j < b.size(); ++j) {
const cv::Vec4f &boxb = b[j]->ltrb;
float inters = calc_inter_area(boxa, boxb);
distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters);
}
}
return dists;
}
cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b) {
std::vector<float> areaa(a.size());
for (size_t i = 0; i < a.size(); ++i) {
float w = a[i]->ltrb[2] - a[i]->ltrb[0];
float h = a[i]->ltrb[3] - a[i]->ltrb[1];
areaa[i] = w * h;
}
std::vector<float> areab(b.size());
for (size_t j = 0; j < b.size(); ++j) {
float w = b[j].ltrb[2] - b[j].ltrb[0];
float h = b[j].ltrb[3] - b[j].ltrb[1];
areab[j] = w * h;
}
cv::Mat dists(a.size(), b.size(), CV_32F);
for (size_t i = 0; i < a.size(); ++i) {
const cv::Vec4f &boxa = a[i]->ltrb;
float *distsi = dists.ptr<float>(i);
for (size_t j = 0; j < b.size(); ++j) {
const cv::Vec4f &boxb = b[j].ltrb;
float inters = calc_inter_area(boxa, boxb);
distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters);
}
}
return dists;
}
} // namespace PaddleDetection
# Python端预测部署
在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 预测引擎使用了AnalysisPredictor,专门针对推理进行了优化,是基于[C++预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/native_infer.html)的Python接口,该引擎可以对模型进行多项图优化,减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求,我们提供了独立于PaddleDetection的预测脚本,方便用户直接集成部署。
主要包含两个步骤:
- 导出预测模型
- 基于Python进行预测
PaddleDetection在训练过程包括网络的前向和优化器相关参数,而在部署过程中,我们只需要前向参数,具体参考:[导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/deploy/EXPORT_MODEL.md)
导出后目录下,包括`infer_cfg.yml`, `model.pdiparams`, `model.pdiparams.info`, `model.pdmodel`四个文件。
## 1. 对FairMOT模型的导出和预测
### 1.1 导出预测模型
```bash
CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams
```
### 1.2 用导出的模型基于Python去预测
```bash
python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/fairmot_hrnetv2_w18_dlafpn_30e_576x320 --video_file={your video name}.mp4 --device=GPU --save_mot_txts
```
**注意:**
- 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`表示保存跟踪结果的txt文件,或`--save_images`表示保存跟踪结果可视化图片。
- 对于多类别或车辆的FairMOT模型的导出和Python预测只需更改相应的config和模型权重即可。如:
```
job_name=mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone
model_type=mot/mcfairmot
config=configs/${model_type}/${job_name}.yml
CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=https://paddledet.bj.bcebos.com/models/mot/${job_name}.pdparams
python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/${job_name} --video_file={your video name}.mp4 --device=GPU --save_mot_txts
```
## 2. 对DeepSORT模型的导出和预测
### 2.1 导出预测模型
Step 1:导出检测模型
```bash
# 导出JDE YOLOv3行人检测模型
CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/jde_yolov3_darknet53_30e_1088x608_mix.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams
# 或导出PPYOLOv2行人检测模型
CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/ppyolov2_r50vd_dcn_365e_640x640_mot17half.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/ppyolov2_r50vd_dcn_365e_640x640_mot17half.pdparams
```
Step 2:导出ReID模型
```bash
# 导出PCB Pyramid ReID模型
CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/reid/deepsort_pcb_pyramid_r101.yml -o reid_weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams
# 或者导出PPLCNet ReID模型
CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/reid/deepsort_pplcnet.yml -o reid_weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams
```
### 2.2 用导出的模型基于Python去预测
```bash
# 用导出JDE YOLOv3行人检测模型和PCB Pyramid ReID模型
python deploy/pptracking/python/mot_sde_infer.py --model_dir=output_inference/jde_yolov3_darknet53_30e_1088x608_mix/ --reid_model_dir=output_inference/deepsort_pcb_pyramid_r101/ --video_file={your video name}.mp4 --device=GPU --save_mot_txts
# 或用导出的PPYOLOv2行人检测模型和PPLCNet ReID模型
python deploy/pptracking/python/mot_sde_infer.py --model_dir=output_inference/ppyolov2_r50vd_dcn_365e_640x640_mot17half/ --reid_model_dir=output_inference/deepsort_pplcnet/ --video_file={your video name}.mp4 --device=GPU --scaled=True --save_mot_txts
```
**注意:**
- 跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`(对每个视频保存一个txt)或`--save_images`表示保存跟踪结果可视化图片。
- `--scaled`表示在模型输出结果的坐标是否已经是缩放回原图的,如果使用的检测模型是JDE的YOLOv3则为False,如果使用通用检测模型则为True。
## 3. 跨境跟踪模型的导出和预测
### 3.1 导出预测模型
Step 1:下载导出的检测模型
```bash
wget https://paddledet.bj.bcebos.com/models/mot/deepsort/picodet_l_640_aic21mtmct_vehicle.tar
tar -xvf picodet_l_640_aic21mtmct_vehicle.tar
```
Step 2:下载导出的ReID模型
```bash
wget https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet_vehicle.tar
tar -xvf deepsort_pplcnet_vehicle.tar
```
### 3.2 用导出的模型基于Python去预测
```bash
# 用导出PicoDet车辆检测模型和PPLCNet车辆ReID模型
python deploy/pptracking/python/mot_sde_infer.py --model_dir=picodet_l_640_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --mtmct_dir={your mtmct scene video folder} --mtmct_cfg=mtmct_cfg --device=GPU --scaled=True --save_mot_txts --save_images
```
**注意:**
跟踪模型是对视频进行预测,不支持单张图的预测,默认保存跟踪结果可视化后的视频,可添加`--save_mot_txts`(对每个视频保存一个txt),或`--save_images`表示保存跟踪结果可视化图片。
`--scaled`表示在模型输出结果的坐标是否已经是缩放回原图的,如果使用的检测模型是JDE的YOLOv3则为False,如果使用通用检测模型则为True。
`--mtmct_dir`是MTMCT预测的某个场景的文件夹名字,里面包含该场景不同摄像头拍摄视频的图片文件夹,其数量至少为两个。
## 参数说明:
| 参数 | 是否必须|含义 |
|-------|-------|----------|
| --model_dir | Yes| 上述导出的模型路径 |
| --image_file | Option | 需要预测的图片 |
| --image_dir | Option | 要预测的图片文件夹路径 |
| --video_file | Option | 需要预测的视频 |
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4|
| --device | Option | 运行时的设备,可选择`CPU/GPU/XPU`,默认为`CPU`|
| --run_mode | Option |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | Option |预测时的batch size,在指定`image_dir`时有效,默认为1 |
| --threshold | Option|预测得分的阈值,默认为0.5|
| --output_dir | Option|可视化结果保存的根目录,默认为output/|
| --run_benchmark | Option| 是否运行benchmark,同时需指定`--image_file``--image_dir`,默认为False |
| --enable_mkldnn | Option | CPU预测中是否开启MKLDNN加速,默认为False |
| --cpu_threads | Option| 设置cpu线程数,默认为1 |
| --trt_calib_mode | Option| TensorRT是否使用校准功能,默认为False。使用TensorRT的int8功能时,需设置为True,使用PaddleSlim量化后的模型时需要设置为False |
| --do_entrance_counting | Option | 是否统计出入口流量,默认为False |
| --draw_center_traj | Option | 是否绘制跟踪轨迹,默认为False |
| --mtmct_dir | Option | 需要进行MTMCT跨境头跟踪预测的图片文件夹路径,默认为None |
| --mtmct_cfg | Option | 需要进行MTMCT跨境头跟踪预测的配置文件路径,默认为None |
说明:
- 参数优先级顺序:`camera_id` > `video_file` > `image_dir` > `image_file`
- run_mode:fluid代表使用AnalysisPredictor,精度float32来推理,其他参数指用AnalysisPredictor,TensorRT不同精度来推理。
- 如果安装的PaddlePaddle不支持基于TensorRT进行预测,需要自行编译,详细可参考[预测库编译教程](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)
- --run_benchmark如果设置为True,则需要安装依赖`pip install pynvml psutil GPUtil`
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import logging
import paddle
import paddle.inference as paddle_infer
from pathlib import Path
CUR_DIR = os.path.dirname(os.path.abspath(__file__))
LOG_PATH_ROOT = f"{CUR_DIR}/../../output"
class PaddleInferBenchmark(object):
def __init__(self,
config,
model_info: dict={},
data_info: dict={},
perf_info: dict={},
resource_info: dict={},
**kwargs):
"""
Construct PaddleInferBenchmark Class to format logs.
args:
config(paddle.inference.Config): paddle inference config
model_info(dict): basic model info
{'model_name': 'resnet50'
'precision': 'fp32'}
data_info(dict): input data info
{'batch_size': 1
'shape': '3,224,224'
'data_num': 1000}
perf_info(dict): performance result
{'preprocess_time_s': 1.0
'inference_time_s': 2.0
'postprocess_time_s': 1.0
'total_time_s': 4.0}
resource_info(dict):
cpu and gpu resources
{'cpu_rss': 100
'gpu_rss': 100
'gpu_util': 60}
"""
# PaddleInferBenchmark Log Version
self.log_version = "1.0.3"
# Paddle Version
self.paddle_version = paddle.__version__
self.paddle_commit = paddle.__git_commit__
paddle_infer_info = paddle_infer.get_version()
self.paddle_branch = paddle_infer_info.strip().split(': ')[-1]
# model info
self.model_info = model_info
# data info
self.data_info = data_info
# perf info
self.perf_info = perf_info
try:
# required value
self.model_name = model_info['model_name']
self.precision = model_info['precision']
self.batch_size = data_info['batch_size']
self.shape = data_info['shape']
self.data_num = data_info['data_num']
self.inference_time_s = round(perf_info['inference_time_s'], 4)
except:
self.print_help()
raise ValueError(
"Set argument wrong, please check input argument and its type")
self.preprocess_time_s = perf_info.get('preprocess_time_s', 0)
self.postprocess_time_s = perf_info.get('postprocess_time_s', 0)
self.total_time_s = perf_info.get('total_time_s', 0)
self.inference_time_s_90 = perf_info.get("inference_time_s_90", "")
self.inference_time_s_99 = perf_info.get("inference_time_s_99", "")
self.succ_rate = perf_info.get("succ_rate", "")
self.qps = perf_info.get("qps", "")
# conf info
self.config_status = self.parse_config(config)
# mem info
if isinstance(resource_info, dict):
self.cpu_rss_mb = int(resource_info.get('cpu_rss_mb', 0))
self.cpu_vms_mb = int(resource_info.get('cpu_vms_mb', 0))
self.cpu_shared_mb = int(resource_info.get('cpu_shared_mb', 0))
self.cpu_dirty_mb = int(resource_info.get('cpu_dirty_mb', 0))
self.cpu_util = round(resource_info.get('cpu_util', 0), 2)
self.gpu_rss_mb = int(resource_info.get('gpu_rss_mb', 0))
self.gpu_util = round(resource_info.get('gpu_util', 0), 2)
self.gpu_mem_util = round(resource_info.get('gpu_mem_util', 0), 2)
else:
self.cpu_rss_mb = 0
self.cpu_vms_mb = 0
self.cpu_shared_mb = 0
self.cpu_dirty_mb = 0
self.cpu_util = 0
self.gpu_rss_mb = 0
self.gpu_util = 0
self.gpu_mem_util = 0
# init benchmark logger
self.benchmark_logger()
def benchmark_logger(self):
"""
benchmark logger
"""
# remove other logging handler
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
# Init logger
FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
log_output = f"{LOG_PATH_ROOT}/{self.model_name}.log"
Path(f"{LOG_PATH_ROOT}").mkdir(parents=True, exist_ok=True)
logging.basicConfig(
level=logging.INFO,
format=FORMAT,
handlers=[
logging.FileHandler(
filename=log_output, mode='w'),
logging.StreamHandler(),
])
self.logger = logging.getLogger(__name__)
self.logger.info(
f"Paddle Inference benchmark log will be saved to {log_output}")
def parse_config(self, config) -> dict:
"""
parse paddle predictor config
args:
config(paddle.inference.Config): paddle inference config
return:
config_status(dict): dict style config info
"""
if isinstance(config, paddle_infer.Config):
config_status = {}
config_status['runtime_device'] = "gpu" if config.use_gpu(
) else "cpu"
config_status['ir_optim'] = config.ir_optim()
config_status['enable_tensorrt'] = config.tensorrt_engine_enabled()
config_status['precision'] = self.precision
config_status['enable_mkldnn'] = config.mkldnn_enabled()
config_status[
'cpu_math_library_num_threads'] = config.cpu_math_library_num_threads(
)
elif isinstance(config, dict):
config_status['runtime_device'] = config.get('runtime_device', "")
config_status['ir_optim'] = config.get('ir_optim', "")
config_status['enable_tensorrt'] = config.get('enable_tensorrt', "")
config_status['precision'] = config.get('precision', "")
config_status['enable_mkldnn'] = config.get('enable_mkldnn', "")
config_status['cpu_math_library_num_threads'] = config.get(
'cpu_math_library_num_threads', "")
else:
self.print_help()
raise ValueError(
"Set argument config wrong, please check input argument and its type"
)
return config_status
def report(self, identifier=None):
"""
print log report
args:
identifier(string): identify log
"""
if identifier:
identifier = f"[{identifier}]"
else:
identifier = ""
self.logger.info("\n")
self.logger.info(
"---------------------- Paddle info ----------------------")
self.logger.info(f"{identifier} paddle_version: {self.paddle_version}")
self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}")
self.logger.info(f"{identifier} paddle_branch: {self.paddle_branch}")
self.logger.info(f"{identifier} log_api_version: {self.log_version}")
self.logger.info(
"----------------------- Conf info -----------------------")
self.logger.info(
f"{identifier} runtime_device: {self.config_status['runtime_device']}"
)
self.logger.info(
f"{identifier} ir_optim: {self.config_status['ir_optim']}")
self.logger.info(f"{identifier} enable_memory_optim: {True}")
self.logger.info(
f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}"
)
self.logger.info(
f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}")
self.logger.info(
f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}"
)
self.logger.info(
"----------------------- Model info ----------------------")
self.logger.info(f"{identifier} model_name: {self.model_name}")
self.logger.info(f"{identifier} precision: {self.precision}")
self.logger.info(
"----------------------- Data info -----------------------")
self.logger.info(f"{identifier} batch_size: {self.batch_size}")
self.logger.info(f"{identifier} input_shape: {self.shape}")
self.logger.info(f"{identifier} data_num: {self.data_num}")
self.logger.info(
"----------------------- Perf info -----------------------")
self.logger.info(
f"{identifier} cpu_rss(MB): {self.cpu_rss_mb}, cpu_vms: {self.cpu_vms_mb}, cpu_shared_mb: {self.cpu_shared_mb}, cpu_dirty_mb: {self.cpu_dirty_mb}, cpu_util: {self.cpu_util}%"
)
self.logger.info(
f"{identifier} gpu_rss(MB): {self.gpu_rss_mb}, gpu_util: {self.gpu_util}%, gpu_mem_util: {self.gpu_mem_util}%"
)
self.logger.info(
f"{identifier} total time spent(s): {self.total_time_s}")
self.logger.info(
f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, inference_time(ms): {round(self.inference_time_s*1000, 1)}, postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}"
)
if self.inference_time_s_90:
self.looger.info(
f"{identifier} 90%_cost: {self.inference_time_s_90}, 99%_cost: {self.inference_time_s_99}, succ_rate: {self.succ_rate}"
)
if self.qps:
self.logger.info(f"{identifier} QPS: {self.qps}")
def print_help(self):
"""
print function help
"""
print("""Usage:
==== Print inference benchmark logs. ====
config = paddle.inference.Config()
model_info = {'model_name': 'resnet50'
'precision': 'fp32'}
data_info = {'batch_size': 1
'shape': '3,224,224'
'data_num': 1000}
perf_info = {'preprocess_time_s': 1.0
'inference_time_s': 2.0
'postprocess_time_s': 1.0
'total_time_s': 4.0}
resource_info = {'cpu_rss_mb': 100
'gpu_rss_mb': 100
'gpu_util': 60}
log = PaddleInferBenchmark(config, model_info, data_info, perf_info, resource_info)
log('Test')
""")
def __call__(self, identifier=None):
"""
__call__
args:
identifier(string): identify log
"""
self.report(identifier)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import yaml
import glob
from functools import reduce
import cv2
import numpy as np
import math
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
from benchmark_utils import PaddleInferBenchmark
from picodet_postprocess import PicoDetPostProcess
from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride, LetterBoxResize
from visualize import visualize_box_mask
from utils import argsparser, Timer, get_current_memory_mb
# Global dictionary
SUPPORT_MODELS = {
'YOLO',
'PicoDet',
'JDE',
'FairMOT',
'DeepSORT',
}
class Detector(object):
"""
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
"""
def __init__(self,
pred_config,
model_dir,
device='CPU',
run_mode='fluid',
batch_size=1,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
self.pred_config = pred_config
self.predictor, self.config = load_predictor(
model_dir,
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn)
self.det_times = Timer()
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
def preprocess(self, image_list):
preprocess_ops = []
for op_info in self.pred_config.preprocess_infos:
new_op_info = op_info.copy()
op_type = new_op_info.pop('type')
preprocess_ops.append(eval(op_type)(**new_op_info))
input_im_lst = []
input_im_info_lst = []
for im_path in image_list:
im, im_info = preprocess(im_path, preprocess_ops)
input_im_lst.append(im)
input_im_info_lst.append(im_info)
inputs = create_inputs(input_im_lst, input_im_info_lst)
return inputs
def postprocess(self,
np_boxes,
np_masks,
inputs,
np_boxes_num,
threshold=0.5):
# postprocess output of predictor
results = {}
results['boxes'] = np_boxes
results['boxes_num'] = np_boxes_num
if np_masks is not None:
results['masks'] = np_masks
return results
def predict(self, image_list, threshold=0.5, warmup=0, repeats=1):
'''
Args:
image_list (list): list of image
threshold (float): threshold of predicted box' score
Returns:
results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max]
MaskRCNN's results include 'masks': np.ndarray:
shape: [N, im_h, im_w]
'''
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image_list)
self.det_times.preprocess_time_s.end()
np_boxes, np_masks = None, None
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])
for i in range(warmup):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
if self.pred_config.mask:
masks_tensor = self.predictor.get_output_handle(output_names[2])
np_masks = masks_tensor.copy_to_cpu()
self.det_times.inference_time_s.start()
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
boxes_num = self.predictor.get_output_handle(output_names[1])
np_boxes_num = boxes_num.copy_to_cpu()
if self.pred_config.mask:
masks_tensor = self.predictor.get_output_handle(output_names[2])
np_masks = masks_tensor.copy_to_cpu()
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start()
results = []
if reduce(lambda x, y: x * y, np_boxes.shape) < 6:
print('[WARNNING] No object detected.')
results = {'boxes': np.zeros([0, 6]), 'boxes_num': [0]}
else:
results = self.postprocess(
np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += len(image_list)
return results
def get_timer(self):
return self.det_times
class DetectorPicoDet(Detector):
"""
Args:
config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
"""
def __init__(self,
pred_config,
model_dir,
device='CPU',
run_mode='fluid',
batch_size=1,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
self.pred_config = pred_config
self.predictor, self.config = load_predictor(
model_dir,
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn)
self.det_times = Timer()
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
def predict(self, image, threshold=0.5, warmup=0, repeats=1):
'''
Args:
image (str/np.ndarray): path of image/ np.ndarray read by cv2
threshold (float): threshold of predicted box' score
Returns:
results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max]
'''
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])
np_score_list, np_boxes_list = [], []
for i in range(warmup):
self.predictor.run()
np_score_list.clear()
np_boxes_list.clear()
output_names = self.predictor.get_output_names()
num_outs = int(len(output_names) / 2)
for out_idx in range(num_outs):
np_score_list.append(
self.predictor.get_output_handle(output_names[out_idx])
.copy_to_cpu())
np_boxes_list.append(
self.predictor.get_output_handle(output_names[
out_idx + num_outs]).copy_to_cpu())
self.det_times.inference_time_s.start()
for i in range(repeats):
self.predictor.run()
np_score_list.clear()
np_boxes_list.clear()
output_names = self.predictor.get_output_names()
num_outs = int(len(output_names) / 2)
for out_idx in range(num_outs):
np_score_list.append(
self.predictor.get_output_handle(output_names[out_idx])
.copy_to_cpu())
np_boxes_list.append(
self.predictor.get_output_handle(output_names[
out_idx + num_outs]).copy_to_cpu())
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.img_num += 1
self.det_times.postprocess_time_s.start()
self.postprocess = PicoDetPostProcess(
inputs['image'].shape[2:],
inputs['im_shape'],
inputs['scale_factor'],
strides=self.pred_config.fpn_stride,
nms_threshold=self.pred_config.nms['nms_threshold'])
np_boxes, np_boxes_num = self.postprocess(np_score_list, np_boxes_list)
self.det_times.postprocess_time_s.end()
return dict(boxes=np_boxes, boxes_num=np_boxes_num)
def create_inputs(imgs, im_info):
"""generate input for different model type
Args:
imgs (list(numpy)): list of images (np.ndarray)
im_info (list(dict)): list of image info
Returns:
inputs (dict): input of model
"""
inputs = {}
im_shape = []
scale_factor = []
if len(imgs) == 1:
inputs['image'] = np.array((imgs[0], )).astype('float32')
inputs['im_shape'] = np.array(
(im_info[0]['im_shape'], )).astype('float32')
inputs['scale_factor'] = np.array(
(im_info[0]['scale_factor'], )).astype('float32')
return inputs
for e in im_info:
im_shape.append(np.array((e['im_shape'], )).astype('float32'))
scale_factor.append(np.array((e['scale_factor'], )).astype('float32'))
inputs['im_shape'] = np.concatenate(im_shape, axis=0)
inputs['scale_factor'] = np.concatenate(scale_factor, axis=0)
imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs]
max_shape_h = max([e[0] for e in imgs_shape])
max_shape_w = max([e[1] for e in imgs_shape])
padding_imgs = []
for img in imgs:
im_c, im_h, im_w = img.shape[:]
padding_im = np.zeros(
(im_c, max_shape_h, max_shape_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = img
padding_imgs.append(padding_im)
inputs['image'] = np.stack(padding_imgs, axis=0)
return inputs
class PredictConfig():
"""set config of preprocess, postprocess and visualize
Args:
model_dir (str): root path of model.yml
"""
def __init__(self, model_dir):
# parsing Yaml config for Preprocess
deploy_file = os.path.join(model_dir, 'infer_cfg.yml')
with open(deploy_file) as f:
yml_conf = yaml.safe_load(f)
self.check_model(yml_conf)
self.arch = yml_conf['arch']
self.preprocess_infos = yml_conf['Preprocess']
self.min_subgraph_size = yml_conf['min_subgraph_size']
self.labels = yml_conf['label_list']
self.mask = False
self.use_dynamic_shape = yml_conf['use_dynamic_shape']
if 'mask' in yml_conf:
self.mask = yml_conf['mask']
self.tracker = None
if 'tracker' in yml_conf:
self.tracker = yml_conf['tracker']
if 'NMS' in yml_conf:
self.nms = yml_conf['NMS']
if 'fpn_stride' in yml_conf:
self.fpn_stride = yml_conf['fpn_stride']
self.print_config()
def check_model(self, yml_conf):
"""
Raises:
ValueError: loaded model not in supported model type
"""
for support_model in SUPPORT_MODELS:
if support_model in yml_conf['arch']:
return True
raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[
'arch'], SUPPORT_MODELS))
def print_config(self):
print('----------- Model Configuration -----------')
print('%s: %s' % ('Model Arch', self.arch))
print('%s: ' % ('Transform Order'))
for op_info in self.preprocess_infos:
print('--%s: %s' % ('transform op', op_info['type']))
print('--------------------------------------------')
def load_predictor(model_dir,
run_mode='fluid',
batch_size=1,
device='CPU',
min_subgraph_size=3,
use_dynamic_shape=False,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
"""set AnalysisConfig, generate AnalysisPredictor
Args:
model_dir (str): root path of __model__ and __params__
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
Returns:
predictor (PaddlePredictor): AnalysisPredictor
Raises:
ValueError: predict by TensorRT need device == 'GPU'.
"""
if device != 'GPU' and run_mode != 'fluid':
raise ValueError(
"Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
.format(run_mode, device))
config = Config(
os.path.join(model_dir, 'model.pdmodel'),
os.path.join(model_dir, 'model.pdiparams'))
if device == 'GPU':
# initial GPU memory(M), device ID
config.enable_use_gpu(200, 0)
# optimize graph and fuse op
config.switch_ir_optim(True)
elif device == 'XPU':
config.enable_xpu(10 * 1024 * 1024)
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(cpu_threads)
if enable_mkldnn:
try:
# cache 10 different shapes for mkldnn to avoid memory leak
config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn()
except Exception as e:
print(
"The current environment does not support `mkldnn`, so disable mkldnn."
)
pass
precision_map = {
'trt_int8': Config.Precision.Int8,
'trt_fp32': Config.Precision.Float32,
'trt_fp16': Config.Precision.Half
}
if run_mode in precision_map.keys():
config.enable_tensorrt_engine(
workspace_size=1 << 10,
max_batch_size=batch_size,
min_subgraph_size=min_subgraph_size,
precision_mode=precision_map[run_mode],
use_static=False,
use_calib_mode=trt_calib_mode)
if use_dynamic_shape:
min_input_shape = {
'image': [batch_size, 3, trt_min_shape, trt_min_shape]
}
max_input_shape = {
'image': [batch_size, 3, trt_max_shape, trt_max_shape]
}
opt_input_shape = {
'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]
}
config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
opt_input_shape)
print('trt set dynamic shape done!')
# disable print log when predict
config.disable_glog_info()
# enable shared memory
config.enable_memory_optim()
# disable feed, fetch OP, needed by zero_copy_run
config.switch_use_feed_fetch_ops(False)
predictor = create_predictor(config)
return predictor, config
def get_test_images(infer_dir, infer_img):
"""
Get image path list in TEST mode
"""
assert infer_img is not None or infer_dir is not None, \
"--infer_img or --infer_dir should be set"
assert infer_img is None or os.path.isfile(infer_img), \
"{} is not a file".format(infer_img)
assert infer_dir is None or os.path.isdir(infer_dir), \
"{} is not a directory".format(infer_dir)
# infer_img has a higher priority
if infer_img and os.path.isfile(infer_img):
return [infer_img]
images = set()
infer_dir = os.path.abspath(infer_dir)
assert os.path.isdir(infer_dir), \
"infer_dir {} is not a directory".format(infer_dir)
exts = ['jpg', 'jpeg', 'png', 'bmp']
exts += [ext.upper() for ext in exts]
for ext in exts:
images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
images = list(images)
assert len(images) > 0, "no image found in {}".format(infer_dir)
print("Found {} inference images in total.".format(len(images)))
return images
def visualize(image_list, results, labels, output_dir='output/', threshold=0.5):
# visualize the predict result
start_idx = 0
for idx, image_file in enumerate(image_list):
im_bboxes_num = results['boxes_num'][idx]
im_results = {}
if 'boxes' in results:
im_results['boxes'] = results['boxes'][start_idx:start_idx +
im_bboxes_num, :]
if 'label' in results:
im_results['label'] = results['label'][start_idx:start_idx +
im_bboxes_num]
if 'score' in results:
im_results['score'] = results['score'][start_idx:start_idx +
im_bboxes_num]
start_idx += im_bboxes_num
im = visualize_box_mask(
image_file, im_results, labels, threshold=threshold)
img_name = os.path.split(image_file)[-1]
if not os.path.exists(output_dir):
os.makedirs(output_dir)
out_path = os.path.join(output_dir, img_name)
im.save(out_path, quality=95)
print("save result to: " + out_path)
def print_arguments(args):
print('----------- Running Arguments -----------')
for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value))
print('------------------------------------------')
def predict_image(detector, image_list, batch_size=1):
batch_loop_cnt = math.ceil(float(len(image_list)) / batch_size)
for i in range(batch_loop_cnt):
start_index = i * batch_size
end_index = min((i + 1) * batch_size, len(image_list))
batch_image_list = image_list[start_index:end_index]
if FLAGS.run_benchmark:
detector.predict(
batch_image_list, FLAGS.threshold, warmup=10, repeats=10)
cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm
detector.gpu_mem += gm
detector.gpu_util += gu
print('Test iter {}'.format(i))
else:
results = detector.predict(batch_image_list, FLAGS.threshold)
visualize(
batch_image_list,
results,
detector.pred_config.labels,
output_dir=FLAGS.output_dir,
threshold=FLAGS.threshold)
def predict_video(detector, camera_id):
video_out_name = 'output.mp4'
if camera_id != -1:
capture = cv2.VideoCapture(camera_id)
else:
capture = cv2.VideoCapture(FLAGS.video_file)
video_out_name = os.path.split(FLAGS.video_file)[-1]
# Get Video info : resolution, fps, frame count
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(capture.get(cv2.CAP_PROP_FPS))
frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
print("fps: %d, frame_count: %d" % (fps, frame_count))
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_out_name)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
index = 1
while (1):
ret, frame = capture.read()
if not ret:
break
print('detect frame: %d' % (index))
index += 1
results = detector.predict([frame], FLAGS.threshold)
im = visualize_box_mask(
frame,
results,
detector.pred_config.labels,
threshold=FLAGS.threshold)
im = np.array(im)
writer.write(im)
if camera_id != -1:
cv2.imshow('Mask Detection', im)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
writer.release()
def main():
pred_config = PredictConfig(FLAGS.model_dir)
detector_func = 'Detector'
if pred_config.arch == 'PicoDet':
detector_func = 'DetectorPicoDet'
detector = eval(detector_func)(pred_config,
FLAGS.model_dir,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn)
# predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1:
predict_video(detector, FLAGS.camera_id)
else:
# predict from image
if FLAGS.image_dir is None and FLAGS.image_file is not None:
assert FLAGS.batch_size == 1, "batch_size should be 1, when image_file is not None"
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
predict_image(detector, img_list, FLAGS.batch_size)
if not FLAGS.run_benchmark:
detector.det_times.info(average=True)
else:
mems = {
'cpu_rss_mb': detector.cpu_mem / len(img_list),
'gpu_rss_mb': detector.gpu_mem / len(img_list),
'gpu_util': detector.gpu_util * 100 / len(img_list)
}
perf_info = detector.det_times.report(average=True)
model_dir = FLAGS.model_dir
mode = FLAGS.run_mode
model_info = {
'model_name': model_dir.strip('/').split('/')[-1],
'precision': mode.split('_')[-1]
}
data_info = {
'batch_size': FLAGS.batch_size,
'shape': "dynamic_shape",
'data_num': perf_info['img_num']
}
det_log = PaddleInferBenchmark(detector.config, model_info,
data_info, perf_info, mems)
det_log('Det')
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import matching
from . import tracker
from . import motion
from . import utils
from . import mtmct
from .matching import *
from .tracker import *
from .motion import *
from .utils import *
from .mtmct import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import jde_matching
from . import deepsort_matching
from .jde_matching import *
from .deepsort_matching import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/nwojke/deep_sort/tree/master/deep_sort
"""
import numpy as np
from scipy.optimize import linear_sum_assignment
from ..motion import kalman_filter
INFTY_COST = 1e+5
__all__ = [
'iou_1toN',
'iou_cost',
'_nn_euclidean_distance',
'_nn_cosine_distance',
'NearestNeighborDistanceMetric',
'min_cost_matching',
'matching_cascade',
'gate_cost_matrix',
]
def iou_1toN(bbox, candidates):
"""
Computer intersection over union (IoU) by one box to N candidates.
Args:
bbox (ndarray): A bounding box in format `(top left x, top left y, width, height)`.
candidates (ndarray): A matrix of candidate bounding boxes (one per row) in the
same format as `bbox`.
Returns:
ious (ndarray): The intersection over union in [0, 1] between the `bbox`
and each candidate. A higher score means a larger fraction of the
`bbox` is occluded by the candidate.
"""
bbox_tl = bbox[:2]
bbox_br = bbox[:2] + bbox[2:]
candidates_tl = candidates[:, :2]
candidates_br = candidates[:, :2] + candidates[:, 2:]
tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
wh = np.maximum(0., br - tl)
area_intersection = wh.prod(axis=1)
area_bbox = bbox[2:].prod()
area_candidates = candidates[:, 2:].prod(axis=1)
ious = area_intersection / (area_bbox + area_candidates - area_intersection)
return ious
def iou_cost(tracks, detections, track_indices=None, detection_indices=None):
"""
IoU distance metric.
Args:
tracks (list[Track]): A list of tracks.
detections (list[Detection]): A list of detections.
track_indices (Optional[list[int]]): A list of indices to tracks that
should be matched. Defaults to all `tracks`.
detection_indices (Optional[list[int]]): A list of indices to detections
that should be matched. Defaults to all `detections`.
Returns:
cost_matrix (ndarray): A cost matrix of shape len(track_indices),
len(detection_indices) where entry (i, j) is
`1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
for row, track_idx in enumerate(track_indices):
if tracks[track_idx].time_since_update > 1:
cost_matrix[row, :] = 1e+5
continue
bbox = tracks[track_idx].to_tlwh()
candidates = np.asarray([detections[i].tlwh for i in detection_indices])
cost_matrix[row, :] = 1. - iou_1toN(bbox, candidates)
return cost_matrix
def _nn_euclidean_distance(s, q):
"""
Compute pair-wise squared (Euclidean) distance between points in `s` and `q`.
Args:
s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M.
q (ndarray): Query points: an LxM matrix of L samples of dimensionality M.
Returns:
distances (ndarray): A vector of length M that contains for each entry in `q` the
smallest Euclidean distance to a sample in `s`.
"""
s, q = np.asarray(s), np.asarray(q)
if len(s) == 0 or len(q) == 0:
return np.zeros((len(s), len(q)))
s2, q2 = np.square(s).sum(axis=1), np.square(q).sum(axis=1)
distances = -2. * np.dot(s, q.T) + s2[:, None] + q2[None, :]
distances = np.clip(distances, 0., float(np.inf))
return np.maximum(0.0, distances.min(axis=0))
def _nn_cosine_distance(s, q):
"""
Compute pair-wise cosine distance between points in `s` and `q`.
Args:
s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M.
q (ndarray): Query points: an LxM matrix of L samples of dimensionality M.
Returns:
distances (ndarray): A vector of length M that contains for each entry in `q` the
smallest Euclidean distance to a sample in `s`.
"""
s = np.asarray(s) / np.linalg.norm(s, axis=1, keepdims=True)
q = np.asarray(q) / np.linalg.norm(q, axis=1, keepdims=True)
distances = 1. - np.dot(s, q.T)
return distances.min(axis=0)
class NearestNeighborDistanceMetric(object):
"""
A nearest neighbor distance metric that, for each target, returns
the closest distance to any sample that has been observed so far.
Args:
metric (str): Either "euclidean" or "cosine".
matching_threshold (float): The matching threshold. Samples with larger
distance are considered an invalid match.
budget (Optional[int]): If not None, fix samples per class to at most
this number. Removes the oldest samples when the budget is reached.
Attributes:
samples (Dict[int -> List[ndarray]]): A dictionary that maps from target
identities to the list of samples that have been observed so far.
"""
def __init__(self, metric, matching_threshold, budget=None):
if metric == "euclidean":
self._metric = _nn_euclidean_distance
elif metric == "cosine":
self._metric = _nn_cosine_distance
else:
raise ValueError(
"Invalid metric; must be either 'euclidean' or 'cosine'")
self.matching_threshold = matching_threshold
self.budget = budget
self.samples = {}
def partial_fit(self, features, targets, active_targets):
"""
Update the distance metric with new data.
Args:
features (ndarray): An NxM matrix of N features of dimensionality M.
targets (ndarray): An integer array of associated target identities.
active_targets (List[int]): A list of targets that are currently
present in the scene.
"""
for feature, target in zip(features, targets):
self.samples.setdefault(target, []).append(feature)
if self.budget is not None:
self.samples[target] = self.samples[target][-self.budget:]
self.samples = {k: self.samples[k] for k in active_targets}
def distance(self, features, targets):
"""
Compute distance between features and targets.
Args:
features (ndarray): An NxM matrix of N features of dimensionality M.
targets (list[int]): A list of targets to match the given `features` against.
Returns:
cost_matrix (ndarray): a cost matrix of shape len(targets), len(features),
where element (i, j) contains the closest squared distance between
`targets[i]` and `features[j]`.
"""
cost_matrix = np.zeros((len(targets), len(features)))
for i, target in enumerate(targets):
cost_matrix[i, :] = self._metric(self.samples[target], features)
return cost_matrix
def min_cost_matching(distance_metric,
max_distance,
tracks,
detections,
track_indices=None,
detection_indices=None):
"""
Solve linear assignment problem.
Args:
distance_metric :
Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as
well as a list of N track indices and M detection indices. The
metric should return the NxM dimensional cost matrix, where element
(i, j) is the association cost between the i-th track in the given
track indices and the j-th detection in the given detection_indices.
max_distance (float): Gating threshold. Associations with cost larger
than this value are disregarded.
tracks (list[Track]): A list of predicted tracks at the current time
step.
detections (list[Detection]): A list of detections at the current time
step.
track_indices (list[int]): List of track indices that maps rows in
`cost_matrix` to tracks in `tracks`.
detection_indices (List[int]): List of detection indices that maps
columns in `cost_matrix` to detections in `detections`.
Returns:
A tuple (List[(int, int)], List[int], List[int]) with the following
three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = np.arange(len(tracks))
if detection_indices is None:
detection_indices = np.arange(len(detections))
if len(detection_indices) == 0 or len(track_indices) == 0:
return [], track_indices, detection_indices # Nothing to match.
cost_matrix = distance_metric(tracks, detections, track_indices,
detection_indices)
cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
indices = linear_sum_assignment(cost_matrix)
matches, unmatched_tracks, unmatched_detections = [], [], []
for col, detection_idx in enumerate(detection_indices):
if col not in indices[1]:
unmatched_detections.append(detection_idx)
for row, track_idx in enumerate(track_indices):
if row not in indices[0]:
unmatched_tracks.append(track_idx)
for row, col in zip(indices[0], indices[1]):
track_idx = track_indices[row]
detection_idx = detection_indices[col]
if cost_matrix[row, col] > max_distance:
unmatched_tracks.append(track_idx)
unmatched_detections.append(detection_idx)
else:
matches.append((track_idx, detection_idx))
return matches, unmatched_tracks, unmatched_detections
def matching_cascade(distance_metric,
max_distance,
cascade_depth,
tracks,
detections,
track_indices=None,
detection_indices=None):
"""
Run matching cascade.
Args:
distance_metric :
Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
The distance metric is given a list of tracks and detections as
well as a list of N track indices and M detection indices. The
metric should return the NxM dimensional cost matrix, where element
(i, j) is the association cost between the i-th track in the given
track indices and the j-th detection in the given detection_indices.
max_distance (float): Gating threshold. Associations with cost larger
than this value are disregarded.
cascade_depth (int): The cascade depth, should be se to the maximum
track age.
tracks (list[Track]): A list of predicted tracks at the current time
step.
detections (list[Detection]): A list of detections at the current time
step.
track_indices (list[int]): List of track indices that maps rows in
`cost_matrix` to tracks in `tracks`.
detection_indices (List[int]): List of detection indices that maps
columns in `cost_matrix` to detections in `detections`.
Returns:
A tuple (List[(int, int)], List[int], List[int]) with the following
three entries:
* A list of matched track and detection indices.
* A list of unmatched track indices.
* A list of unmatched detection indices.
"""
if track_indices is None:
track_indices = list(range(len(tracks)))
if detection_indices is None:
detection_indices = list(range(len(detections)))
unmatched_detections = detection_indices
matches = []
for level in range(cascade_depth):
if len(unmatched_detections) == 0: # No detections left
break
track_indices_l = [
k for k in track_indices if tracks[k].time_since_update == 1 + level
]
if len(track_indices_l) == 0: # Nothing to match at this level
continue
matches_l, _, unmatched_detections = \
min_cost_matching(
distance_metric, max_distance, tracks, detections,
track_indices_l, unmatched_detections)
matches += matches_l
unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
return matches, unmatched_tracks, unmatched_detections
def gate_cost_matrix(kf,
cost_matrix,
tracks,
detections,
track_indices,
detection_indices,
gated_cost=INFTY_COST,
only_position=False):
"""
Invalidate infeasible entries in cost matrix based on the state
distributions obtained by Kalman filtering.
Args:
kf (object): The Kalman filter.
cost_matrix (ndarray): The NxM dimensional cost matrix, where N is the
number of track indices and M is the number of detection indices,
such that entry (i, j) is the association cost between
`tracks[track_indices[i]]` and `detections[detection_indices[j]]`.
tracks (list[Track]): A list of predicted tracks at the current time
step.
detections (list[Detection]): A list of detections at the current time
step.
track_indices (List[int]): List of track indices that maps rows in
`cost_matrix` to tracks in `tracks`.
detection_indices (List[int]): List of detection indices that maps
columns in `cost_matrix` to detections in `detections`.
gated_cost (Optional[float]): Entries in the cost matrix corresponding
to infeasible associations are set this value. Defaults to a very
large value.
only_position (Optional[bool]): If True, only the x, y position of the
state distribution is considered during gating. Default False.
"""
gating_dim = 2 if only_position else 4
gating_threshold = kalman_filter.chi2inv95[gating_dim]
measurements = np.asarray(
[detections[i].to_xyah() for i in detection_indices])
for row, track_idx in enumerate(track_indices):
track = tracks[track_idx]
gating_distance = kf.gating_distance(track.mean, track.covariance,
measurements, only_position)
cost_matrix[row, gating_distance > gating_threshold] = gated_cost
return cost_matrix
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/matching.py
"""
import lap
import scipy
import numpy as np
from scipy.spatial.distance import cdist
from ..motion import kalman_filter
__all__ = [
'merge_matches',
'linear_assignment',
'cython_bbox_ious',
'iou_distance',
'embedding_distance',
'fuse_motion',
]
def merge_matches(m1, m2, shape):
O, P, Q = shape
m1 = np.asarray(m1)
m2 = np.asarray(m2)
M1 = scipy.sparse.coo_matrix(
(np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
M2 = scipy.sparse.coo_matrix(
(np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
mask = M1 * M2
match = mask.nonzero()
match = list(zip(match[0], match[1]))
unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
return match, unmatched_O, unmatched_Q
def linear_assignment(cost_matrix, thresh):
if cost_matrix.size == 0:
return np.empty(
(0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(
range(cost_matrix.shape[1]))
matches, unmatched_a, unmatched_b = [], [], []
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
for ix, mx in enumerate(x):
if mx >= 0:
matches.append([ix, mx])
unmatched_a = np.where(x < 0)[0]
unmatched_b = np.where(y < 0)[0]
matches = np.asarray(matches)
return matches, unmatched_a, unmatched_b
def cython_bbox_ious(atlbrs, btlbrs):
ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
if ious.size == 0:
return ious
try:
import cython_bbox
except Exception as e:
print('cython_bbox not found, please install cython_bbox.'
'for example: `pip install cython_bbox`.')
exit()
ious = cython_bbox.bbox_overlaps(
np.ascontiguousarray(
atlbrs, dtype=np.float),
np.ascontiguousarray(
btlbrs, dtype=np.float))
return ious
def iou_distance(atracks, btracks):
"""
Compute cost based on IoU between two list[STrack].
"""
if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (
len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
atlbrs = [track.tlbr for track in atracks]
btlbrs = [track.tlbr for track in btracks]
_ious = cython_bbox_ious(atlbrs, btlbrs)
cost_matrix = 1 - _ious
return cost_matrix
def embedding_distance(tracks, detections, metric='euclidean'):
"""
Compute cost based on features between two list[STrack].
"""
cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
if cost_matrix.size == 0:
return cost_matrix
det_features = np.asarray(
[track.curr_feat for track in detections], dtype=np.float)
track_features = np.asarray(
[track.smooth_feat for track in tracks], dtype=np.float)
cost_matrix = np.maximum(0.0, cdist(track_features, det_features,
metric)) # Nomalized features
return cost_matrix
def fuse_motion(kf,
cost_matrix,
tracks,
detections,
only_position=False,
lambda_=0.98):
if cost_matrix.size == 0:
return cost_matrix
gating_dim = 2 if only_position else 4
gating_threshold = kalman_filter.chi2inv95[gating_dim]
measurements = np.asarray([det.to_xyah() for det in detections])
for row, track in enumerate(tracks):
gating_distance = kf.gating_distance(
track.mean,
track.covariance,
measurements,
only_position,
metric='maha')
cost_matrix[row, gating_distance > gating_threshold] = np.inf
cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_
) * gating_distance
return cost_matrix
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import kalman_filter
from .kalman_filter import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/kalman_filter.py
"""
import numpy as np
import scipy.linalg
__all__ = ['KalmanFilter']
"""
Table for the 0.95 quantile of the chi-square distribution with N degrees of
freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
function and used as Mahalanobis gating threshold.
"""
chi2inv95 = {
1: 3.8415,
2: 5.9915,
3: 7.8147,
4: 9.4877,
5: 11.070,
6: 12.592,
7: 14.067,
8: 15.507,
9: 16.919
}
class KalmanFilter(object):
"""
A simple Kalman filter for tracking bounding boxes in image space.
The 8-dimensional state space
x, y, a, h, vx, vy, va, vh
contains the bounding box center position (x, y), aspect ratio a, height h,
and their respective velocities.
Object motion follows a constant velocity model. The bounding box location
(x, y, a, h) is taken as direct observation of the state space (linear
observation model).
"""
def __init__(self):
ndim, dt = 4, 1.
# Create Kalman filter model matrices.
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
self._update_mat = np.eye(ndim, 2 * ndim)
# Motion and observation uncertainty are chosen relative to the current
# state estimate. These weights control the amount of uncertainty in
# the model. This is a bit hacky.
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
def initiate(self, measurement):
"""
Create track from unassociated measurement.
Args:
measurement (ndarray): Bounding box coordinates (x, y, a, h) with
center position (x, y), aspect ratio a, and height h.
Returns:
The mean vector (8 dimensional) and covariance matrix (8x8
dimensional) of the new track. Unobserved velocities are
initialized to 0 mean.
"""
mean_pos = measurement
mean_vel = np.zeros_like(mean_pos)
mean = np.r_[mean_pos, mean_vel]
std = [
2 * self._std_weight_position * measurement[3],
2 * self._std_weight_position * measurement[3], 1e-2,
2 * self._std_weight_position * measurement[3],
10 * self._std_weight_velocity * measurement[3],
10 * self._std_weight_velocity * measurement[3], 1e-5,
10 * self._std_weight_velocity * measurement[3]
]
covariance = np.diag(np.square(std))
return mean, covariance
def predict(self, mean, covariance):
"""
Run Kalman filter prediction step.
Args:
mean (ndarray): The 8 dimensional mean vector of the object state
at the previous time step.
covariance (ndarray): The 8x8 dimensional covariance matrix of the
object state at the previous time step.
Returns:
The mean vector and covariance matrix of the predicted state.
Unobserved velocities are initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[3], self._std_weight_position *
mean[3], 1e-2, self._std_weight_position * mean[3]
]
std_vel = [
self._std_weight_velocity * mean[3], self._std_weight_velocity *
mean[3], 1e-5, self._std_weight_velocity * mean[3]
]
motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
#mean = np.dot(self._motion_mat, mean)
mean = np.dot(mean, self._motion_mat.T)
covariance = np.linalg.multi_dot(
(self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
return mean, covariance
def project(self, mean, covariance):
"""
Project state distribution to measurement space.
Args
mean (ndarray): The state's mean vector (8 dimensional array).
covariance (ndarray): The state's covariance matrix (8x8 dimensional).
Returns:
The projected mean and covariance matrix of the given state estimate.
"""
std = [
self._std_weight_position * mean[3], self._std_weight_position *
mean[3], 1e-1, self._std_weight_position * mean[3]
]
innovation_cov = np.diag(np.square(std))
mean = np.dot(self._update_mat, mean)
covariance = np.linalg.multi_dot((self._update_mat, covariance,
self._update_mat.T))
return mean, covariance + innovation_cov
def multi_predict(self, mean, covariance):
"""
Run Kalman filter prediction step (Vectorized version).
Args:
mean (ndarray): The Nx8 dimensional mean matrix of the object states
at the previous time step.
covariance (ndarray): The Nx8x8 dimensional covariance matrics of the
object states at the previous time step.
Returns:
The mean vector and covariance matrix of the predicted state.
Unobserved velocities are initialized to 0 mean.
"""
std_pos = [
self._std_weight_position * mean[:, 3], self._std_weight_position *
mean[:, 3], 1e-2 * np.ones_like(mean[:, 3]),
self._std_weight_position * mean[:, 3]
]
std_vel = [
self._std_weight_velocity * mean[:, 3], self._std_weight_velocity *
mean[:, 3], 1e-5 * np.ones_like(mean[:, 3]),
self._std_weight_velocity * mean[:, 3]
]
sqr = np.square(np.r_[std_pos, std_vel]).T
motion_cov = []
for i in range(len(mean)):
motion_cov.append(np.diag(sqr[i]))
motion_cov = np.asarray(motion_cov)
mean = np.dot(mean, self._motion_mat.T)
left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
covariance = np.dot(left, self._motion_mat.T) + motion_cov
return mean, covariance
def update(self, mean, covariance, measurement):
"""
Run Kalman filter correction step.
Args:
mean (ndarray): The predicted state's mean vector (8 dimensional).
covariance (ndarray): The state's covariance matrix (8x8 dimensional).
measurement (ndarray): The 4 dimensional measurement vector
(x, y, a, h), where (x, y) is the center position, a the aspect
ratio, and h the height of the bounding box.
Returns:
The measurement-corrected state distribution.
"""
projected_mean, projected_cov = self.project(mean, covariance)
chol_factor, lower = scipy.linalg.cho_factor(
projected_cov, lower=True, check_finite=False)
kalman_gain = scipy.linalg.cho_solve(
(chol_factor, lower),
np.dot(covariance, self._update_mat.T).T,
check_finite=False).T
innovation = measurement - projected_mean
new_mean = mean + np.dot(innovation, kalman_gain.T)
new_covariance = covariance - np.linalg.multi_dot(
(kalman_gain, projected_cov, kalman_gain.T))
return new_mean, new_covariance
def gating_distance(self,
mean,
covariance,
measurements,
only_position=False,
metric='maha'):
"""
Compute gating distance between state distribution and measurements.
A suitable distance threshold can be obtained from `chi2inv95`. If
`only_position` is False, the chi-square distribution has 4 degrees of
freedom, otherwise 2.
Args:
mean (ndarray): Mean vector over the state distribution (8
dimensional).
covariance (ndarray): Covariance of the state distribution (8x8
dimensional).
measurements (ndarray): An Nx4 dimensional matrix of N measurements,
each in format (x, y, a, h) where (x, y) is the bounding box center
position, a the aspect ratio, and h the height.
only_position (Optional[bool]): If True, distance computation is
done with respect to the bounding box center position only.
metric (str): Metric type, 'gaussian' or 'maha'.
Returns
An array of length N, where the i-th element contains the squared
Mahalanobis distance between (mean, covariance) and `measurements[i]`.
"""
mean, covariance = self.project(mean, covariance)
if only_position:
mean, covariance = mean[:2], covariance[:2, :2]
measurements = measurements[:, :2]
d = measurements - mean
if metric == 'gaussian':
return np.sum(d * d, axis=1)
elif metric == 'maha':
cholesky_factor = np.linalg.cholesky(covariance)
z = scipy.linalg.solve_triangular(
cholesky_factor,
d.T,
lower=True,
check_finite=False,
overwrite_b=True)
squared_maha = np.sum(z * z, axis=0)
return squared_maha
else:
raise ValueError('invalid distance metric')
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import utils
from . import postprocess
from .utils import *
from .postprocess import *
# The following codes are strongly related to zone and camera parameters
from . import camera_utils
from . import zone
from .camera_utils import *
from .zone import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
"""
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from .utils import get_dire, get_match, get_cid_tid, combin_feature, combin_cluster
from .utils import normalize, intracam_ignore, visual_rerank
__all__ = [
'st_filter',
'get_labels_with_camera',
]
CAM_DIST = [[0, 40, 55, 100, 120, 145], [40, 0, 15, 60, 80, 105],
[55, 15, 0, 40, 65, 90], [100, 60, 40, 0, 20, 45],
[120, 80, 65, 20, 0, 25], [145, 105, 90, 45, 25, 0]]
def st_filter(st_mask, cid_tids, cid_tid_dict):
count = len(cid_tids)
for i in range(count):
i_tracklet = cid_tid_dict[cid_tids[i]]
i_cid = i_tracklet['cam']
i_dire = get_dire(i_tracklet['zone_list'], i_cid)
i_iot = i_tracklet['io_time']
for j in range(count):
j_tracklet = cid_tid_dict[cid_tids[j]]
j_cid = j_tracklet['cam']
j_dire = get_dire(j_tracklet['zone_list'], j_cid)
j_iot = j_tracklet['io_time']
match_dire = True
cam_dist = CAM_DIST[i_cid - 41][j_cid - 41]
# if time overlopped
if i_iot[0] - cam_dist < j_iot[0] and j_iot[0] < i_iot[
1] + cam_dist:
match_dire = False
if i_iot[0] - cam_dist < j_iot[1] and j_iot[1] < i_iot[
1] + cam_dist:
match_dire = False
# not match after go out
if i_dire[1] in [1, 2]: # i out
if i_iot[0] < j_iot[1] + cam_dist:
match_dire = False
if i_dire[1] in [1, 2]:
if i_dire[0] in [3] and i_cid > j_cid:
match_dire = False
if i_dire[0] in [4] and i_cid < j_cid:
match_dire = False
if i_cid in [41] and i_dire[1] in [4]:
if i_iot[0] < j_iot[1] + cam_dist:
match_dire = False
if i_iot[1] > 199:
match_dire = False
if i_cid in [46] and i_dire[1] in [3]:
if i_iot[0] < j_iot[1] + cam_dist:
match_dire = False
# match after come into
if i_dire[0] in [1, 2]:
if i_iot[1] > j_iot[0] - cam_dist:
match_dire = False
if i_dire[0] in [1, 2]:
if i_dire[1] in [3] and i_cid > j_cid:
match_dire = False
if i_dire[1] in [4] and i_cid < j_cid:
match_dire = False
is_ignore = False
if ((i_dire[0] == i_dire[1] and i_dire[0] in [3, 4]) or
(j_dire[0] == j_dire[1] and j_dire[0] in [3, 4])):
is_ignore = True
if not is_ignore:
# direction conflict
if (i_dire[0] in [3] and j_dire[0] in [4]) or (
i_dire[1] in [3] and j_dire[1] in [4]):
match_dire = False
# filter before going next scene
if i_dire[1] in [3] and i_cid < j_cid:
if i_iot[1] > j_iot[1] - cam_dist:
match_dire = False
if i_dire[1] in [4] and i_cid > j_cid:
if i_iot[1] > j_iot[1] - cam_dist:
match_dire = False
if i_dire[0] in [3] and i_cid < j_cid:
if i_iot[0] < j_iot[0] + cam_dist:
match_dire = False
if i_dire[0] in [4] and i_cid > j_cid:
if i_iot[0] < j_iot[0] + cam_dist:
match_dire = False
## 3-30
## 4-1
if i_dire[0] in [3] and i_cid > j_cid:
if i_iot[1] > j_iot[0] - cam_dist:
match_dire = False
if i_dire[0] in [4] and i_cid < j_cid:
if i_iot[1] > j_iot[0] - cam_dist:
match_dire = False
# filter before going next scene
## 4-7
if i_dire[1] in [3] and i_cid > j_cid:
if i_iot[0] < j_iot[1] + cam_dist:
match_dire = False
if i_dire[1] in [4] and i_cid < j_cid:
if i_iot[0] < j_iot[1] + cam_dist:
match_dire = False
else:
if i_iot[1] > 199:
if i_dire[0] in [3] and i_cid < j_cid:
if i_iot[0] < j_iot[0] + cam_dist:
match_dire = False
if i_dire[0] in [4] and i_cid > j_cid:
if i_iot[0] < j_iot[0] + cam_dist:
match_dire = False
if i_dire[0] in [3] and i_cid > j_cid:
match_dire = False
if i_dire[0] in [4] and i_cid < j_cid:
match_dire = False
if i_iot[0] < 1:
if i_dire[1] in [3] and i_cid > j_cid:
match_dire = False
if i_dire[1] in [4] and i_cid < j_cid:
match_dire = False
if not match_dire:
st_mask[i, j] = 0.0
st_mask[j, i] = 0.0
return st_mask
def subcam_list(cid_tid_dict, cid_tids):
sub_3_4 = dict()
sub_4_3 = dict()
for cid_tid in cid_tids:
cid, tid = cid_tid
tracklet = cid_tid_dict[cid_tid]
zs, ze = get_dire(tracklet['zone_list'], cid)
if zs in [3] and cid not in [46]: # 4 to 3
if not cid + 1 in sub_4_3:
sub_4_3[cid + 1] = []
sub_4_3[cid + 1].append(cid_tid)
if ze in [4] and cid not in [41]: # 4 to 3
if not cid in sub_4_3:
sub_4_3[cid] = []
sub_4_3[cid].append(cid_tid)
if zs in [4] and cid not in [41]: # 3 to 4
if not cid - 1 in sub_3_4:
sub_3_4[cid - 1] = []
sub_3_4[cid - 1].append(cid_tid)
if ze in [3] and cid not in [46]: # 3 to 4
if not cid in sub_3_4:
sub_3_4[cid] = []
sub_3_4[cid].append(cid_tid)
sub_cid_tids = dict()
for i in sub_3_4:
sub_cid_tids[(i, i + 1)] = sub_3_4[i]
for i in sub_4_3:
sub_cid_tids[(i, i - 1)] = sub_4_3[i]
return sub_cid_tids
def subcam_list2(cid_tid_dict, cid_tids):
sub_dict = dict()
for cid_tid in cid_tids:
cid, tid = cid_tid
if cid not in [41]:
if not cid in sub_dict:
sub_dict[cid] = []
sub_dict[cid].append(cid_tid)
if cid not in [46]:
if not cid + 1 in sub_dict:
sub_dict[cid + 1] = []
sub_dict[cid + 1].append(cid_tid)
return sub_dict
def get_sim_matrix(cid_tid_dict,
cid_tids,
use_ff=True,
use_rerank=True,
use_st_filter=False):
# Note: carame releated get_sim_matrix function,
# which is different from the one in utils.py.
count = len(cid_tids)
q_arr = np.array(
[cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
g_arr = np.array(
[cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
q_arr = normalize(q_arr, axis=1)
g_arr = normalize(g_arr, axis=1)
st_mask = np.ones((count, count), dtype=np.float32)
st_mask = intracam_ignore(st_mask, cid_tids)
# different from utils.py
if use_st_filter:
st_mask = st_filter(st_mask, cid_tids, cid_tid_dict)
visual_sim_matrix = visual_rerank(
q_arr, g_arr, cid_tids, use_ff=use_ff, use_rerank=use_rerank)
visual_sim_matrix = visual_sim_matrix.astype('float32')
np.set_printoptions(precision=3)
sim_matrix = visual_sim_matrix * st_mask
np.fill_diagonal(sim_matrix, 0)
return sim_matrix
def get_labels_with_camera(cid_tid_dict,
cid_tids,
use_ff=True,
use_rerank=True,
use_st_filter=False):
# 1st cluster
sub_cid_tids = subcam_list(cid_tid_dict, cid_tids)
sub_labels = dict()
dis_thrs = [0.7, 0.5, 0.5, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5]
for i, sub_c_to_c in enumerate(sub_cid_tids):
sim_matrix = get_sim_matrix(
cid_tid_dict,
sub_cid_tids[sub_c_to_c],
use_ff=use_ff,
use_rerank=use_rerank,
use_st_filter=use_st_filter)
cluster_labels = AgglomerativeClustering(
n_clusters=None,
distance_threshold=1 - dis_thrs[i],
affinity='precomputed',
linkage='complete').fit_predict(1 - sim_matrix)
labels = get_match(cluster_labels)
cluster_cid_tids = get_cid_tid(labels, sub_cid_tids[sub_c_to_c])
sub_labels[sub_c_to_c] = cluster_cid_tids
labels, sub_cluster = combin_cluster(sub_labels, cid_tids)
# 2nd cluster
cid_tid_dict_new = combin_feature(cid_tid_dict, sub_cluster)
sub_cid_tids = subcam_list2(cid_tid_dict_new, cid_tids)
sub_labels = dict()
for i, sub_c_to_c in enumerate(sub_cid_tids):
sim_matrix = get_sim_matrix(
cid_tid_dict_new,
sub_cid_tids[sub_c_to_c],
use_ff=use_ff,
use_rerank=use_rerank,
use_st_filter=use_st_filter)
cluster_labels = AgglomerativeClustering(
n_clusters=None,
distance_threshold=1 - 0.1,
affinity='precomputed',
linkage='complete').fit_predict(1 - sim_matrix)
labels = get_match(cluster_labels)
cluster_cid_tids = get_cid_tid(labels, sub_cid_tids[sub_c_to_c])
sub_labels[sub_c_to_c] = cluster_cid_tids
labels, sub_cluster = combin_cluster(sub_labels, cid_tids)
return labels
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
"""
import re
import cv2
from tqdm import tqdm
import pickle
import os
import os.path as osp
from os.path import join as opj
import numpy as np
import motmetrics as mm
from functools import reduce
from .utils import parse_pt_gt, parse_pt, compare_dataframes_mtmc
from .utils import get_labels, getData, gen_new_mot
from .camera_utils import get_labels_with_camera
from .zone import Zone
from ..utils import plot_tracking
__all__ = [
'trajectory_fusion',
'sub_cluster',
'gen_res',
'print_mtmct_result',
'get_mtmct_matching_results',
'save_mtmct_crops',
'save_mtmct_vis_results',
]
def trajectory_fusion(mot_feature, cid, cid_bias, use_zone=False, zone_path=''):
cur_bias = cid_bias[cid]
mot_list_break = {}
if use_zone:
zones = Zone(zone_path=zone_path)
zones.set_cam(cid)
mot_list = parse_pt(mot_feature, zones)
else:
mot_list = parse_pt(mot_feature)
if use_zone:
mot_list = zones.break_mot(mot_list, cid)
mot_list = zones.filter_mot(mot_list, cid) # filter by zone
mot_list = zones.filter_bbox(mot_list, cid) # filter bbox
mot_list_break = gen_new_mot(mot_list) # save break feature for gen result
tid_data = dict()
for tid in mot_list:
tracklet = mot_list[tid]
if len(tracklet) <= 1:
continue
frame_list = list(tracklet.keys())
frame_list.sort()
# filter area too large
zone_list = [tracklet[f]['zone'] for f in frame_list]
feature_list = [
tracklet[f]['feat'] for f in frame_list
if (tracklet[f]['bbox'][3] - tracklet[f]['bbox'][1]
) * (tracklet[f]['bbox'][2] - tracklet[f]['bbox'][0]) > 2000
]
if len(feature_list) < 2:
feature_list = [tracklet[f]['feat'] for f in frame_list]
io_time = [
cur_bias + frame_list[0] / 10., cur_bias + frame_list[-1] / 10.
]
all_feat = np.array([feat for feat in feature_list])
mean_feat = np.mean(all_feat, axis=0)
tid_data[tid] = {
'cam': cid,
'tid': tid,
'mean_feat': mean_feat,
'zone_list': zone_list,
'frame_list': frame_list,
'tracklet': tracklet,
'io_time': io_time
}
return tid_data, mot_list_break
def sub_cluster(cid_tid_dict,
scene_cluster,
use_ff=True,
use_rerank=True,
use_camera=False,
use_st_filter=False):
'''
cid_tid_dict: all camera_id and track_id
scene_cluster: like [41, 42, 43, 44, 45, 46] in AIC21 MTMCT S06 test videos
'''
assert (len(scene_cluster) != 0), "Error: scene_cluster length equals 0"
cid_tids = sorted(
[key for key in cid_tid_dict.keys() if key[0] in scene_cluster])
if use_camera:
clu = get_labels_with_camera(
cid_tid_dict,
cid_tids,
use_ff=use_ff,
use_rerank=use_rerank,
use_st_filter=use_st_filter)
else:
clu = get_labels(
cid_tid_dict,
cid_tids,
use_ff=use_ff,
use_rerank=use_rerank,
use_st_filter=use_st_filter)
new_clu = list()
for c_list in clu:
if len(c_list) <= 1: continue
cam_list = [cid_tids[c][0] for c in c_list]
if len(cam_list) != len(set(cam_list)): continue
new_clu.append([cid_tids[c] for c in c_list])
all_clu = new_clu
cid_tid_label = dict()
for i, c_list in enumerate(all_clu):
for c in c_list:
cid_tid_label[c] = i + 1
return cid_tid_label
def gen_res(output_dir_filename,
scene_cluster,
map_tid,
mot_list_breaks,
use_roi=False,
roi_dir=''):
f_w = open(output_dir_filename, 'w')
for idx, mot_feature in enumerate(mot_list_breaks):
cid = scene_cluster[idx]
img_rects = parse_pt_gt(mot_feature)
if use_roi:
assert (roi_dir != ''), "Error: roi_dir is not empty!"
roi = cv2.imread(os.path.join(roi_dir, f'c{cid:03d}/roi.jpg'), 0)
height, width = roi.shape
for fid in img_rects:
tid_rects = img_rects[fid]
fid = int(fid) + 1
for tid_rect in tid_rects:
tid = tid_rect[0]
rect = tid_rect[1:]
cx = 0.5 * rect[0] + 0.5 * rect[2]
cy = 0.5 * rect[1] + 0.5 * rect[3]
w = rect[2] - rect[0]
w = min(w * 1.2, w + 40)
h = rect[3] - rect[1]
h = min(h * 1.2, h + 40)
rect[2] -= rect[0]
rect[3] -= rect[1]
rect[0] = max(0, rect[0])
rect[1] = max(0, rect[1])
x1, y1 = max(0, cx - 0.5 * w), max(0, cy - 0.5 * h)
if use_roi:
x2, y2 = min(width, cx + 0.5 * w), min(height, cy + 0.5 * h)
else:
x2, y2 = cx + 0.5 * w, cy + 0.5 * h
w, h = x2 - x1, y2 - y1
new_rect = list(map(int, [x1, y1, w, h]))
rect = list(map(int, rect))
if (cid, tid) in map_tid:
new_tid = map_tid[(cid, tid)]
f_w.write(
str(cid) + ' ' + str(new_tid) + ' ' + str(fid) + ' ' +
' '.join(map(str, new_rect)) + ' -1 -1'
'\n')
print('gen_res: write file in {}'.format(output_dir_filename))
f_w.close()
def print_mtmct_result(gt_file, pred_file):
names = [
'CameraId', 'Id', 'FrameId', 'X', 'Y', 'Width', 'Height', 'Xworld',
'Yworld'
]
gt = getData(gt_file, names=names)
pred = getData(pred_file, names=names)
summary = compare_dataframes_mtmc(gt, pred)
print('MTMCT summary: ', summary.columns.tolist())
formatters = {
'idf1': '{:2.2f}'.format,
'idp': '{:2.2f}'.format,
'idr': '{:2.2f}'.format,
'mota': '{:2.2f}'.format
}
summary = summary[['idf1', 'idp', 'idr', 'mota']]
summary.loc[:, 'idp'] *= 100
summary.loc[:, 'idr'] *= 100
summary.loc[:, 'idf1'] *= 100
summary.loc[:, 'mota'] *= 100
print(
mm.io.render_summary(
summary,
formatters=formatters,
namemap=mm.io.motchallenge_metric_names))
def get_mtmct_matching_results(pred_mtmct_file, secs_interval=0.5,
video_fps=20):
res = np.loadtxt(pred_mtmct_file) # 'cid, tid, fid, x1, y1, w, h, -1, -1'
carame_ids = list(map(int, np.unique(res[:, 0])))
num_track_ids = int(np.max(res[:, 1]))
num_frames = int(np.max(res[:, 2]))
res = res[:, :7]
# each line in res: 'cid, tid, fid, x1, y1, w, h'
carame_tids = []
carame_results = dict()
for c_id in carame_ids:
carame_results[c_id] = res[res[:, 0] == c_id]
tids = np.unique(carame_results[c_id][:, 1])
tids = list(map(int, tids))
carame_tids.append(tids)
# select common tids throughout each video
common_tids = reduce(np.intersect1d, carame_tids)
if len(common_tids) == 0:
print(
'No common tracked ids in these videos, please check your MOT result or select new videos.'
)
return None
# get mtmct matching results by cid_tid_fid_results[c_id][t_id][f_id]
cid_tid_fid_results = dict()
cid_tid_to_fids = dict()
interval = int(secs_interval * video_fps) # preferably less than 10
for c_id in carame_ids:
cid_tid_fid_results[c_id] = dict()
cid_tid_to_fids[c_id] = dict()
for t_id in common_tids:
tid_mask = carame_results[c_id][:, 1] == t_id
cid_tid_fid_results[c_id][t_id] = dict()
carame_trackid_results = carame_results[c_id][tid_mask]
fids = np.unique(carame_trackid_results[:, 2])
fids = fids[fids % interval == 0]
fids = list(map(int, fids))
cid_tid_to_fids[c_id][t_id] = fids
for f_id in fids:
st_frame = f_id
ed_frame = f_id + interval
st_mask = carame_trackid_results[:, 2] >= st_frame
ed_mask = carame_trackid_results[:, 2] < ed_frame
frame_mask = np.logical_and(st_mask, ed_mask)
cid_tid_fid_results[c_id][t_id][f_id] = carame_trackid_results[
frame_mask]
return carame_results, cid_tid_fid_results
def save_mtmct_crops(cid_tid_fid_res,
images_dir,
crops_dir,
width=300,
height=200):
carame_ids = cid_tid_fid_res.keys()
seqs_folder = os.listdir(images_dir)
seqs = []
for x in seqs_folder:
if os.path.isdir(os.path.join(images_dir, x)):
seqs.append(x)
assert len(seqs) == len(carame_ids)
seqs.sort()
if not os.path.exists(crops_dir):
os.makedirs(crops_dir)
common_tids = list(cid_tid_fid_res[list(carame_ids)[0]].keys())
# get crops by name 'tid_cid_fid.jpg
for t_id in common_tids:
for i, c_id in enumerate(carame_ids):
infer_dir = os.path.join(images_dir, seqs[i])
if os.path.exists(os.path.join(infer_dir, 'img1')):
infer_dir = os.path.join(infer_dir, 'img1')
all_images = os.listdir(infer_dir)
all_images.sort()
for f_id in cid_tid_fid_res[c_id][t_id].keys():
frame_idx = f_id - 1 if f_id > 0 else 0
im_path = os.path.join(infer_dir, all_images[frame_idx])
im = cv2.imread(im_path) # (H, W, 3)
track = cid_tid_fid_res[c_id][t_id][f_id][
0] # only select one track
cid, tid, fid, x1, y1, w, h = [int(v) for v in track]
clip = im[y1:(y1 + h), x1:(x1 + w)]
clip = cv2.resize(clip, (width, height))
cv2.imwrite(
os.path.join(crops_dir,
'tid{:06d}_cid{:06d}_fid{:06d}.jpg'.format(
tid, cid, fid)), clip)
print("Finish cropping image of tracked_id {} in camera: {}".format(
t_id, c_id))
def save_mtmct_vis_results(carame_results,
images_dir,
save_dir,
save_videos=False):
# carame_results: 'cid, tid, fid, x1, y1, w, h'
carame_ids = carame_results.keys()
seqs_folder = os.listdir(images_dir)
seqs = []
for x in seqs_folder:
if os.path.isdir(os.path.join(images_dir, x)):
seqs.append(x)
assert len(seqs) == len(carame_ids)
seqs.sort()
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for i, c_id in enumerate(carame_ids):
print("Start visualization for camera {} of sequence {}.".format(
c_id, seqs[i]))
cid_save_dir = os.path.join(save_dir, '{}'.format(seqs[i]))
if not os.path.exists(cid_save_dir):
os.makedirs(cid_save_dir)
infer_dir = os.path.join(images_dir, seqs[i])
if os.path.exists(os.path.join(infer_dir, 'img1')):
infer_dir = os.path.join(infer_dir, 'img1')
all_images = os.listdir(infer_dir)
all_images.sort()
for f_id, im_path in enumerate(all_images):
img = cv2.imread(os.path.join(infer_dir, im_path))
tracks = carame_results[c_id][carame_results[c_id][:, 2] == f_id]
if tracks.shape[0] > 0:
tracked_ids = tracks[:, 1]
xywhs = tracks[:, 3:]
online_im = plot_tracking(
img, xywhs, tracked_ids, scores=None, frame_id=f_id)
else:
online_im = img
print('Frame {} of seq {} has no tracking results'.format(
f_id, seqs[i]))
cv2.imwrite(
os.path.join(cid_save_dir, '{:05d}.jpg'.format(f_id)),
online_im)
if f_id % 40 == 0:
print('Processing frame {}'.format(f_id))
if save_videos:
output_video_path = os.path.join(cid_save_dir, '..',
'{}_mtmct_vis.mp4'.format(seqs[i]))
cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
cid_save_dir, output_video_path)
os.system(cmd_str)
print('Save camera {} video in {}.'.format(seqs[i],
output_video_path))
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
"""
import os
import re
import cv2
import paddle
import numpy as np
from sklearn import preprocessing
from sklearn.cluster import AgglomerativeClustering
import gc
import motmetrics as mm
import pandas as pd
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
__all__ = [
'parse_pt', 'parse_bias', 'get_dire', 'parse_pt_gt',
'compare_dataframes_mtmc', 'get_sim_matrix', 'get_labels', 'getData',
'gen_new_mot'
]
def parse_pt(mot_feature, zones=None):
mot_list = dict()
for line in mot_feature:
fid = int(re.sub('[a-z,A-Z]', "", mot_feature[line]['frame']))
tid = mot_feature[line]['id']
bbox = list(map(lambda x: int(float(x)), mot_feature[line]['bbox']))
if tid not in mot_list:
mot_list[tid] = dict()
out_dict = mot_feature[line]
if zones is not None:
out_dict['zone'] = zones.get_zone(bbox)
else:
out_dict['zone'] = None
mot_list[tid][fid] = out_dict
return mot_list
def gen_new_mot(mot_list):
out_dict = dict()
for tracklet in mot_list:
tracklet = mot_list[tracklet]
for f in tracklet:
out_dict[tracklet[f]['imgname']] = tracklet[f]
return out_dict
def mergesetfeat1_notrk(P, neg_vector, in_feats, in_labels):
out_feats = []
for i in range(in_feats.shape[0]):
camera_id = in_labels[i, 1]
feat = in_feats[i] - neg_vector[camera_id]
feat = P[camera_id].dot(feat)
feat = feat / np.linalg.norm(feat, ord=2)
out_feats.append(feat)
out_feats = np.vstack(out_feats)
return out_feats
def compute_P2(prb_feats, gal_feats, gal_labels, la=3.0):
X = gal_feats
neg_vector = {}
u_labels = np.unique(gal_labels[:, 1])
P = {}
for label in u_labels:
curX = gal_feats[gal_labels[:, 1] == label, :]
neg_vector[label] = np.mean(curX, axis=0)
P[label] = np.linalg.inv(
curX.T.dot(curX) + curX.shape[0] * la * np.eye(X.shape[1]))
return P, neg_vector
def parse_bias(cameras_bias):
cid_bias = dict()
for cameras in cameras_bias.keys():
cameras_id = re.sub('[a-z,A-Z]', "", cameras)
cameras_id = int(cameras_id)
bias = cameras_bias[cameras]
cid_bias[cameras_id] = float(bias)
return cid_bias
def get_dire(zone_list, cid):
zs, ze = zone_list[0], zone_list[-1]
return (zs, ze)
def intracam_ignore(st_mask, cid_tids):
count = len(cid_tids)
for i in range(count):
for j in range(count):
if cid_tids[i][0] == cid_tids[j][0]:
st_mask[i, j] = 0.
return st_mask
def mergesetfeat(in_feats, in_labels, in_tracks):
trackset = list(set(list(in_tracks)))
out_feats = []
out_labels = []
for track in trackset:
feat = np.mean(in_feats[in_tracks == track], axis=0)
feat = feat / np.linalg.norm(feat, ord=2)
label = in_labels[in_tracks == track][0]
out_feats.append(feat)
out_labels.append(label)
out_feats = np.vstack(out_feats)
out_labels = np.vstack(out_labels)
return out_feats, out_labels
def mergesetfeat3(X, labels, gX, glabels, beta=0.08, knn=20, lr=0.5):
for i in range(0, X.shape[0]):
if i % 1000 == 0:
print('feat3:%d/%d' % (i, X.shape[0]))
knnX = gX[glabels[:, 1] != labels[i, 1], :]
sim = knnX.dot(X[i, :])
knnX = knnX[sim > 0, :]
sim = sim[sim > 0]
if len(sim) > 0:
idx = np.argsort(-sim)
if len(sim) > 2 * knn:
sim = sim[idx[:2 * knn]]
knnX = knnX[idx[:2 * knn], :]
else:
sim = sim[idx]
knnX = knnX[idx, :]
knn = min(knn, len(sim))
knn_pos_weight = np.exp((sim[:knn] - 1) / beta)
knn_neg_weight = np.ones(len(sim) - knn)
knn_pos_prob = knn_pos_weight / np.sum(knn_pos_weight)
knn_neg_prob = knn_neg_weight / np.sum(knn_neg_weight)
X[i, :] += lr * (knn_pos_prob.dot(knnX[:knn, :]) -
knn_neg_prob.dot(knnX[knn:, :]))
X[i, :] /= np.linalg.norm(X[i, :])
return X
def run_fic(prb_feats, gal_feats, prb_labels, gal_labels, la=3.0):
P, neg_vector = compute_P2(prb_feats, gal_feats, gal_labels, la)
prb_feats_new = mergesetfeat1_notrk(P, neg_vector, prb_feats, prb_labels)
gal_feats_new = mergesetfeat1_notrk(P, neg_vector, gal_feats, gal_labels)
return prb_feats_new, gal_feats_new
def run_fac(prb_feats,
gal_feats,
prb_labels,
gal_labels,
beta=0.08,
knn=20,
lr=0.5,
prb_epoch=2,
gal_epoch=3):
gal_feats_new = gal_feats.copy()
for i in range(prb_epoch):
gal_feats_new = mergesetfeat3(gal_feats_new, gal_labels, gal_feats,
gal_labels, beta, knn, lr)
prb_feats_new = prb_feats.copy()
for i in range(gal_epoch):
prb_feats_new = mergesetfeat3(prb_feats_new, prb_labels, gal_feats_new,
gal_labels, beta, knn, lr)
return prb_feats_new, gal_feats_new
def euclidean_distance(qf, gf):
m = qf.shape[0]
n = gf.shape[0]
dist_mat = 2 - 2 * paddle.matmul(qf, gf.t())
return dist_mat
def batch_paddle_topk(qf, gf, k1, N=6000):
m = qf.shape[0]
n = gf.shape[0]
dist_mat = []
initial_rank = []
for j in range(n // N + 1):
temp_gf = gf[j * N:j * N + N]
temp_qd = []
for i in range(m // N + 1):
temp_qf = qf[i * N:i * N + N]
temp_d = euclidean_distance(temp_qf, temp_gf)
temp_qd.append(temp_d)
temp_qd = paddle.concat(temp_qd, axis=0)
temp_qd = temp_qd / (paddle.max(temp_qd, axis=0)[0])
temp_qd = temp_qd.t()
initial_rank.append(
paddle.topk(
temp_qd, k=k1, axis=1, largest=False, sorted=True)[1])
del temp_qd
del temp_gf
del temp_qf
del temp_d
initial_rank = paddle.concat(initial_rank, axis=0).cpu().numpy()
return initial_rank
def batch_euclidean_distance(qf, gf, N=6000):
m = qf.shape[0]
n = gf.shape[0]
dist_mat = []
for j in range(n // N + 1):
temp_gf = gf[j * N:j * N + N]
temp_qd = []
for i in range(m // N + 1):
temp_qf = qf[i * N:i * N + N]
temp_d = euclidean_distance(temp_qf, temp_gf)
temp_qd.append(temp_d)
temp_qd = paddle.concat(temp_qd, axis=0)
temp_qd = temp_qd / (paddle.max(temp_qd, axis=0)[0])
dist_mat.append(temp_qd.t()) # transpose
del temp_qd
del temp_gf
del temp_qf
del temp_d
dist_mat = paddle.concat(dist_mat, axis=0)
return dist_mat
def batch_v(feat, R, all_num):
V = np.zeros((all_num, all_num), dtype=np.float32)
m = feat.shape[0]
for i in tqdm(range(m)):
temp_gf = feat[i].unsqueeze(0)
temp_qd = euclidean_distance(temp_gf, feat)
temp_qd = temp_qd / (paddle.max(temp_qd))
temp_qd = temp_qd.squeeze()
temp_qd = temp_qd.numpy()[R[i].tolist()]
temp_qd = paddle.to_tensor(temp_qd)
weight = paddle.exp(-temp_qd)
weight = (weight / paddle.sum(weight)).numpy()
V[i, R[i]] = weight.astype(np.float32)
return V
def k_reciprocal_neigh(initial_rank, i, k1):
forward_k_neigh_index = initial_rank[i, :k1 + 1]
backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1]
fi = np.where(backward_k_neigh_index == i)[0]
return forward_k_neigh_index[fi]
def ReRank2(probFea, galFea, k1=20, k2=6, lambda_value=0.3):
# The following naming, e.g. gallery_num, is different from outer scope.
# Don't care about it.
query_num = probFea.shape[0]
all_num = query_num + galFea.shape[0]
feat = paddle.concat([probFea, galFea], axis=0)
initial_rank = batch_paddle_topk(feat, feat, k1 + 1, N=6000)
# del feat
del probFea
del galFea
gc.collect() # empty memory
R = []
for i in tqdm(range(all_num)):
# k-reciprocal neighbors
k_reciprocal_index = k_reciprocal_neigh(initial_rank, i, k1)
k_reciprocal_expansion_index = k_reciprocal_index
for j in range(len(k_reciprocal_index)):
candidate = k_reciprocal_index[j]
candidate_k_reciprocal_index = k_reciprocal_neigh(
initial_rank, candidate, int(np.around(k1 / 2)))
if len(
np.intersect1d(candidate_k_reciprocal_index,
k_reciprocal_index)) > 2. / 3 * len(
candidate_k_reciprocal_index):
k_reciprocal_expansion_index = np.append(
k_reciprocal_expansion_index, candidate_k_reciprocal_index)
k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
R.append(k_reciprocal_expansion_index)
gc.collect() # empty memory
V = batch_v(feat, R, all_num)
del R
gc.collect() # empty memory
initial_rank = initial_rank[:, :k2]
### Faster version
if k2 != 1:
V_qe = np.zeros_like(V, dtype=np.float16)
for i in range(all_num):
V_qe[i, :] = np.mean(V[initial_rank[i], :], axis=0)
V = V_qe
del V_qe
del initial_rank
gc.collect() # empty memory
invIndex = []
for i in range(all_num):
invIndex.append(np.where(V[:, i] != 0)[0])
jaccard_dist = np.zeros((query_num, all_num), dtype=np.float32)
for i in tqdm(range(query_num)):
temp_min = np.zeros(shape=[1, all_num], dtype=np.float32)
indNonZero = np.where(V[i, :] != 0)[0]
indImages = [invIndex[ind] for ind in indNonZero]
for j in range(len(indNonZero)):
temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(
V[i, indNonZero[j]], V[indImages[j], indNonZero[j]])
jaccard_dist[i] = 1 - temp_min / (2. - temp_min)
del V
gc.collect() # empty memory
original_dist = batch_euclidean_distance(feat, feat[:query_num, :]).numpy()
final_dist = jaccard_dist * (1 - lambda_value
) + original_dist * lambda_value
del original_dist
del jaccard_dist
final_dist = final_dist[:query_num, query_num:]
return final_dist
def visual_rerank(prb_feats,
gal_feats,
cid_tids,
use_ff=False,
use_rerank=False):
"""Rerank by visual cures."""
gal_labels = np.array([[0, item[0]] for item in cid_tids])
prb_labels = gal_labels.copy()
if use_ff:
print('current use ff finetuned parameters....')
# Step1-1: fic. finetuned parameters: [la]
prb_feats, gal_feats = run_fic(prb_feats, gal_feats, prb_labels,
gal_labels, 3.0)
# Step1=2: fac. finetuned parameters: [beta,knn,lr,prb_epoch,gal_epoch]
prb_feats, gal_feats = run_fac(prb_feats, gal_feats, prb_labels,
gal_labels, 0.08, 20, 0.5, 1, 1)
if use_rerank:
paddle.enable_static()
print('current use rerank finetuned parameters....')
# Step2: k-reciprocal. finetuned parameters: [k1,k2,lambda_value]
sims = ReRank2(
paddle.to_tensor(prb_feats),
paddle.to_tensor(gal_feats), 20, 3, 0.3)
else:
# sims = ComputeEuclid(prb_feats, gal_feats, 1)
sims = 1.0 - np.dot(prb_feats, gal_feats.T)
# NOTE: sims here is actually dist, the smaller the more similar
return 1.0 - sims
# sub_cluster
def normalize(nparray, axis=0):
nparray = preprocessing.normalize(nparray, norm='l2', axis=axis)
return nparray
def get_match(cluster_labels):
cluster_dict = dict()
cluster = list()
for i, l in enumerate(cluster_labels):
if l in list(cluster_dict.keys()):
cluster_dict[l].append(i)
else:
cluster_dict[l] = [i]
for idx in cluster_dict:
cluster.append(cluster_dict[idx])
return cluster
def get_cid_tid(cluster_labels, cid_tids):
cluster = list()
for labels in cluster_labels:
cid_tid_list = list()
for label in labels:
cid_tid_list.append(cid_tids[label])
cluster.append(cid_tid_list)
return cluster
def combin_feature(cid_tid_dict, sub_cluster):
for sub_ct in sub_cluster:
if len(sub_ct) < 2: continue
mean_feat = np.array([cid_tid_dict[i]['mean_feat'] for i in sub_ct])
for i in sub_ct:
cid_tid_dict[i]['mean_feat'] = mean_feat.mean(axis=0)
return cid_tid_dict
def combin_cluster(sub_labels, cid_tids):
cluster = list()
for sub_c_to_c in sub_labels:
if len(cluster) < 1:
cluster = sub_labels[sub_c_to_c]
continue
for c_ts in sub_labels[sub_c_to_c]:
is_add = False
for i_c, c_set in enumerate(cluster):
if len(set(c_ts) & set(c_set)) > 0:
new_list = list(set(c_ts) | set(c_set))
cluster[i_c] = new_list
is_add = True
break
if not is_add:
cluster.append(c_ts)
labels = list()
num_tr = 0
for c_ts in cluster:
label_list = list()
for c_t in c_ts:
label_list.append(cid_tids.index(c_t))
num_tr += 1
label_list.sort()
labels.append(label_list)
return labels, cluster
def parse_pt_gt(mot_feature):
img_rects = dict()
for line in mot_feature:
fid = int(re.sub('[a-z,A-Z]', "", mot_feature[line]['frame']))
tid = mot_feature[line]['id']
rect = list(map(lambda x: int(float(x)), mot_feature[line]['bbox']))
if fid not in img_rects:
img_rects[fid] = list()
rect.insert(0, tid)
img_rects[fid].append(rect)
return img_rects
# eval result
def compare_dataframes_mtmc(gts, ts):
"""Compute ID-based evaluation metrics for MTMCT
Return:
df (pandas.DataFrame): Results of the evaluations in a df with only the 'idf1', 'idp', and 'idr' columns.
"""
gtds = []
tsds = []
gtcams = gts['CameraId'].drop_duplicates().tolist()
tscams = ts['CameraId'].drop_duplicates().tolist()
maxFrameId = 0
for k in sorted(gtcams):
gtd = gts.query('CameraId == %d' % k)
gtd = gtd[['FrameId', 'Id', 'X', 'Y', 'Width', 'Height']]
# max FrameId in gtd only
mfid = gtd['FrameId'].max()
gtd['FrameId'] += maxFrameId
gtd = gtd.set_index(['FrameId', 'Id'])
gtds.append(gtd)
if k in tscams:
tsd = ts.query('CameraId == %d' % k)
tsd = tsd[['FrameId', 'Id', 'X', 'Y', 'Width', 'Height']]
# max FrameId among both gtd and tsd
mfid = max(mfid, tsd['FrameId'].max())
tsd['FrameId'] += maxFrameId
tsd = tsd.set_index(['FrameId', 'Id'])
tsds.append(tsd)
maxFrameId += mfid
# compute multi-camera tracking evaluation stats
multiCamAcc = mm.utils.compare_to_groundtruth(
pd.concat(gtds), pd.concat(tsds), 'iou')
metrics = list(mm.metrics.motchallenge_metrics)
metrics.extend(['num_frames', 'idfp', 'idfn', 'idtp'])
mh = mm.metrics.create()
summary = mh.compute(multiCamAcc, metrics=metrics, name='MultiCam')
return summary
def get_sim_matrix(cid_tid_dict,
cid_tids,
use_ff=True,
use_rerank=True,
use_st_filter=False):
# Note: carame independent get_sim_matrix function,
# which is different from the one in camera_utils.py.
count = len(cid_tids)
q_arr = np.array(
[cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
g_arr = np.array(
[cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
q_arr = normalize(q_arr, axis=1)
g_arr = normalize(g_arr, axis=1)
st_mask = np.ones((count, count), dtype=np.float32)
st_mask = intracam_ignore(st_mask, cid_tids)
visual_sim_matrix = visual_rerank(
q_arr, g_arr, cid_tids, use_ff=use_ff, use_rerank=use_rerank)
visual_sim_matrix = visual_sim_matrix.astype('float32')
np.set_printoptions(precision=3)
sim_matrix = visual_sim_matrix * st_mask
np.fill_diagonal(sim_matrix, 0)
return sim_matrix
def get_labels(cid_tid_dict,
cid_tids,
use_ff=True,
use_rerank=True,
use_st_filter=False):
# 1st cluster
sub_cid_tids = list(cid_tid_dict.keys())
sub_labels = dict()
dis_thrs = [0.7, 0.5, 0.5, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5]
sim_matrix = get_sim_matrix(
cid_tid_dict,
cid_tids,
use_ff=use_ff,
use_rerank=use_rerank,
use_st_filter=use_st_filter)
cluster_labels = AgglomerativeClustering(
n_clusters=None,
distance_threshold=0.5,
affinity='precomputed',
linkage='complete').fit_predict(1 - sim_matrix)
labels = get_match(cluster_labels)
sub_cluster = get_cid_tid(labels, cid_tids)
# 2nd cluster
cid_tid_dict_new = combin_feature(cid_tid_dict, sub_cluster)
sub_labels = dict()
sim_matrix = get_sim_matrix(
cid_tid_dict_new,
cid_tids,
use_ff=use_ff,
use_rerank=use_rerank,
use_st_filter=use_st_filter)
cluster_labels = AgglomerativeClustering(
n_clusters=None,
distance_threshold=0.9,
affinity='precomputed',
linkage='complete').fit_predict(1 - sim_matrix)
labels = get_match(cluster_labels)
sub_cluster = get_cid_tid(labels, cid_tids)
return labels
def getData(fpath, names=None, sep='\s+|\t+|,'):
""" Get the necessary track data from a file handle.
Args:
fpath (str) : Original path of file reading from.
names (list[str]): List of column names for the data.
sep (str): Allowed separators regular expression string.
Return:
df (pandas.DataFrame): Data frame containing the data loaded from the
stream with optionally assigned column names. No index is set on the data.
"""
try:
df = pd.read_csv(
fpath,
sep=sep,
index_col=None,
skipinitialspace=True,
header=None,
names=names,
engine='python')
return df
except Exception as e:
raise ValueError("Could not read input from %s. Error: %s" %
(fpath, repr(e)))
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
"""
import os
import cv2
import numpy as np
from sklearn.cluster import AgglomerativeClustering
BBOX_B = 10 / 15
class Zone(object):
def __init__(self, zone_path='datasets/zone'):
# 0: b 1: g 3: r 123:w
# w r not high speed
# b g high speed
assert zone_path != '', "Error: zone_path is not empty!"
zones = {}
for img_name in os.listdir(zone_path):
camnum = int(img_name.split('.')[0][-3:])
zone_img = cv2.imread(os.path.join(zone_path, img_name))
zones[camnum] = zone_img
self.zones = zones
self.current_cam = 0
def set_cam(self, cam):
self.current_cam = cam
def get_zone(self, bbox):
cx = int((bbox[0] + bbox[2]) / 2)
cy = int((bbox[1] + bbox[3]) / 2)
pix = self.zones[self.current_cam][max(cy - 1, 0), max(cx - 1, 0), :]
zone_num = 0
if pix[0] > 50 and pix[1] > 50 and pix[2] > 50: # w
zone_num = 1
if pix[0] < 50 and pix[1] < 50 and pix[2] > 50: # r
zone_num = 2
if pix[0] < 50 and pix[1] > 50 and pix[2] < 50: # g
zone_num = 3
if pix[0] > 50 and pix[1] < 50 and pix[2] < 50: # b
zone_num = 4
return zone_num
def is_ignore(self, zone_list, frame_list, cid):
# 0 not in any corssroad, 1 white 2 red 3 green 4 bule
zs, ze = zone_list[0], zone_list[-1]
fs, fe = frame_list[0], frame_list[-1]
if zs == ze:
# if always on one section, excluding
if ze in [1, 2]:
return 2
if zs != 0 and 0 in zone_list:
return 0
if fe - fs > 1500:
return 2
if fs < 2:
if cid in [45]:
if ze in [3, 4]:
return 1
else:
return 2
if fe > 1999:
if cid in [41]:
if ze not in [3]:
return 2
else:
return 0
if fs < 2 or fe > 1999:
if ze in [3, 4]:
return 0
if ze in [3, 4]:
return 1
return 2
else:
# if camera section change
if cid in [41, 42, 43, 44, 45, 46]:
# come from road extension, exclusing
if zs == 1 and ze == 2:
return 2
if zs == 2 and ze == 1:
return 2
if cid in [41]:
# On 41 camera, no vehicle come into 42 camera
if (zs in [1, 2]) and ze == 4:
return 2
if zs == 4 and (ze in [1, 2]):
return 2
if cid in [46]:
# On 46 camera,no vehicle come into 45
if (zs in [1, 2]) and ze == 3:
return 2
if zs == 3 and (ze in [1, 2]):
return 2
return 0
def filter_mot(self, mot_list, cid):
new_mot_list = dict()
sub_mot_list = dict()
for tracklet in mot_list:
tracklet_dict = mot_list[tracklet]
frame_list = list(tracklet_dict.keys())
frame_list.sort()
zone_list = []
for f in frame_list:
zone_list.append(tracklet_dict[f]['zone'])
if self.is_ignore(zone_list, frame_list, cid) == 0:
new_mot_list[tracklet] = tracklet_dict
if self.is_ignore(zone_list, frame_list, cid) == 1:
sub_mot_list[tracklet] = tracklet_dict
return new_mot_list
def filter_bbox(self, mot_list, cid):
new_mot_list = dict()
yh = self.zones[cid].shape[0]
for tracklet in mot_list:
tracklet_dict = mot_list[tracklet]
frame_list = list(tracklet_dict.keys())
frame_list.sort()
bbox_list = []
for f in frame_list:
bbox_list.append(tracklet_dict[f]['bbox'])
bbox_x = [b[0] for b in bbox_list]
bbox_y = [b[1] for b in bbox_list]
bbox_w = [b[2] - b[0] for b in bbox_list]
bbox_h = [b[3] - b[1] for b in bbox_list]
new_frame_list = list()
if 0 in bbox_x or 0 in bbox_y:
b0 = [
i for i, f in enumerate(frame_list)
if bbox_x[i] < 5 or bbox_y[i] + bbox_h[i] > yh - 5
]
if len(b0) == len(frame_list):
if cid in [41, 42, 44, 45, 46]:
continue
max_w = max(bbox_w)
max_h = max(bbox_h)
for i, f in enumerate(frame_list):
if bbox_w[i] > max_w * BBOX_B and bbox_h[
i] > max_h * BBOX_B:
new_frame_list.append(f)
else:
l_i, r_i = 0, len(frame_list) - 1
if len(b0) == 0:
continue
if b0[0] == 0:
for i in range(len(b0) - 1):
if b0[i] + 1 == b0[i + 1]:
l_i = b0[i + 1]
else:
break
if b0[-1] == len(frame_list) - 1:
for i in range(len(b0) - 1):
i = len(b0) - 1 - i
if b0[i] - 1 == b0[i - 1]:
r_i = b0[i - 1]
else:
break
max_lw, max_lh = bbox_w[l_i], bbox_h[l_i]
max_rw, max_rh = bbox_w[r_i], bbox_h[r_i]
for i, f in enumerate(frame_list):
if i < l_i:
if bbox_w[i] > max_lw * BBOX_B and bbox_h[
i] > max_lh * BBOX_B:
new_frame_list.append(f)
elif i > r_i:
if bbox_w[i] > max_rw * BBOX_B and bbox_h[
i] > max_rh * BBOX_B:
new_frame_list.append(f)
else:
new_frame_list.append(f)
new_tracklet_dict = dict()
for f in new_frame_list:
new_tracklet_dict[f] = tracklet_dict[f]
new_mot_list[tracklet] = new_tracklet_dict
else:
new_mot_list[tracklet] = tracklet_dict
return new_mot_list
def break_mot(self, mot_list, cid):
new_mot_list = dict()
new_num_tracklets = max(mot_list) + 1
for tracklet in mot_list:
tracklet_dict = mot_list[tracklet]
frame_list = list(tracklet_dict.keys())
frame_list.sort()
zone_list = []
back_tracklet = False
new_zone_f = 0
pre_frame = frame_list[0]
time_break = False
for f in frame_list:
if f - pre_frame > 100:
if cid in [44, 45]:
time_break = True
break
if not cid in [41, 44, 45, 46]:
break
pre_frame = f
new_zone = tracklet_dict[f]['zone']
if len(zone_list) > 0 and zone_list[-1] == new_zone:
continue
if new_zone_f > 1:
if len(zone_list) > 1 and new_zone in zone_list:
back_tracklet = True
zone_list.append(new_zone)
new_zone_f = 0
else:
new_zone_f += 1
if back_tracklet:
new_tracklet_dict = dict()
pre_bbox = -1
pre_arrow = 0
have_break = False
for f in frame_list:
now_bbox = tracklet_dict[f]['bbox']
if type(pre_bbox) == int:
if pre_bbox == -1:
pre_bbox = now_bbox
now_arrow = now_bbox[0] - pre_bbox[0]
if pre_arrow * now_arrow < 0 and len(
new_tracklet_dict) > 15 and not have_break:
new_mot_list[tracklet] = new_tracklet_dict
new_tracklet_dict = dict()
have_break = True
if have_break:
tracklet_dict[f]['id'] = new_num_tracklets
new_tracklet_dict[f] = tracklet_dict[f]
pre_bbox, pre_arrow = now_bbox, now_arrow
if have_break:
new_mot_list[new_num_tracklets] = new_tracklet_dict
new_num_tracklets += 1
else:
new_mot_list[tracklet] = new_tracklet_dict
elif time_break:
new_tracklet_dict = dict()
have_break = False
pre_frame = frame_list[0]
for f in frame_list:
if f - pre_frame > 100:
new_mot_list[tracklet] = new_tracklet_dict
new_tracklet_dict = dict()
have_break = True
new_tracklet_dict[f] = tracklet_dict[f]
pre_frame = f
if have_break:
new_mot_list[new_num_tracklets] = new_tracklet_dict
new_num_tracklets += 1
else:
new_mot_list[tracklet] = new_tracklet_dict
else:
new_mot_list[tracklet] = tracklet_dict
return new_mot_list
def intra_matching(self, mot_list, sub_mot_list):
sub_zone_dict = dict()
new_mot_list = dict()
new_mot_list, new_sub_mot_list = self.do_intra_matching2(mot_list,
sub_mot_list)
return new_mot_list
def do_intra_matching2(self, mot_list, sub_list):
new_zone_dict = dict()
def get_trac_info(tracklet1):
t1_f = list(tracklet1)
t1_f.sort()
t1_fs = t1_f[0]
t1_fe = t1_f[-1]
t1_zs = tracklet1[t1_fs]['zone']
t1_ze = tracklet1[t1_fe]['zone']
t1_boxs = tracklet1[t1_fs]['bbox']
t1_boxe = tracklet1[t1_fe]['bbox']
t1_boxs = [(t1_boxs[2] + t1_boxs[0]) / 2,
(t1_boxs[3] + t1_boxs[1]) / 2]
t1_boxe = [(t1_boxe[2] + t1_boxe[0]) / 2,
(t1_boxe[3] + t1_boxe[1]) / 2]
return t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe
for t1id in sub_list:
tracklet1 = sub_list[t1id]
if tracklet1 == -1:
continue
t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe = get_trac_info(
tracklet1)
sim_dict = dict()
for t2id in mot_list:
tracklet2 = mot_list[t2id]
t2_fs, t2_fe, t2_zs, t2_ze, t2_boxs, t2_boxe = get_trac_info(
tracklet2)
if t1_ze == t2_zs:
if abs(t2_fs - t1_fe) < 5 and abs(t2_boxe[0] - t1_boxs[
0]) < 50 and abs(t2_boxe[1] - t1_boxs[1]) < 50:
t1_feat = tracklet1[t1_fe]['feat']
t2_feat = tracklet2[t2_fs]['feat']
sim_dict[t2id] = np.matmul(t1_feat, t2_feat)
if t1_zs == t2_ze:
if abs(t2_fe - t1_fs) < 5 and abs(t2_boxs[0] - t1_boxe[
0]) < 50 and abs(t2_boxs[1] - t1_boxe[1]) < 50:
t1_feat = tracklet1[t1_fs]['feat']
t2_feat = tracklet2[t2_fe]['feat']
sim_dict[t2id] = np.matmul(t1_feat, t2_feat)
if len(sim_dict) > 0:
max_sim = 0
max_id = 0
for t2id in sim_dict:
if sim_dict[t2id] > max_sim:
sim_dict[t2id] = max_sim
max_id = t2id
if max_sim > 0.5:
t2 = mot_list[max_id]
for t1f in tracklet1:
if t1f not in t2:
tracklet1[t1f]['id'] = max_id
t2[t1f] = tracklet1[t1f]
mot_list[max_id] = t2
sub_list[t1id] = -1
return mot_list, sub_list
def do_intra_matching(self, sub_zone_dict, sub_zone):
new_zone_dict = dict()
id_list = list(sub_zone_dict)
id2index = dict()
for index, id in enumerate(id_list):
id2index[id] = index
def get_trac_info(tracklet1):
t1_f = list(tracklet1)
t1_f.sort()
t1_fs = t1_f[0]
t1_fe = t1_f[-1]
t1_zs = tracklet1[t1_fs]['zone']
t1_ze = tracklet1[t1_fe]['zone']
t1_boxs = tracklet1[t1_fs]['bbox']
t1_boxe = tracklet1[t1_fe]['bbox']
t1_boxs = [(t1_boxs[2] + t1_boxs[0]) / 2,
(t1_boxs[3] + t1_boxs[1]) / 2]
t1_boxe = [(t1_boxe[2] + t1_boxe[0]) / 2,
(t1_boxe[3] + t1_boxe[1]) / 2]
return t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe
sim_matrix = np.zeros([len(id_list), len(id_list)])
for t1id in sub_zone_dict:
tracklet1 = sub_zone_dict[t1id]
t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe = get_trac_info(
tracklet1)
t1_feat = tracklet1[t1_fe]['feat']
for t2id in sub_zone_dict:
if t1id == t2id:
continue
tracklet2 = sub_zone_dict[t2id]
t2_fs, t2_fe, t2_zs, t2_ze, t2_boxs, t2_boxe = get_trac_info(
tracklet2)
if t1_zs != t1_ze and t2_ze != t2_zs or t1_fe > t2_fs:
continue
if abs(t1_boxe[0] - t2_boxs[0]) > 50 or abs(t1_boxe[1] -
t2_boxs[1]) > 50:
continue
if t2_fs - t1_fe > 5:
continue
t2_feat = tracklet2[t2_fs]['feat']
sim_matrix[id2index[t1id], id2index[t2id]] = np.matmul(t1_feat,
t2_feat)
sim_matrix[id2index[t2id], id2index[t1id]] = np.matmul(t1_feat,
t2_feat)
sim_matrix = 1 - sim_matrix
cluster_labels = AgglomerativeClustering(
n_clusters=None,
distance_threshold=0.7,
affinity='precomputed',
linkage='complete').fit_predict(sim_matrix)
new_zone_dict = dict()
label2id = dict()
for index, label in enumerate(cluster_labels):
tracklet = sub_zone_dict[id_list[index]]
if label not in label2id:
new_id = tracklet[list(tracklet)[0]]
new_tracklet = dict()
else:
new_id = label2id[label]
new_tracklet = new_zone_dict[label2id[label]]
for tf in tracklet:
tracklet[tf]['id'] = new_id
new_tracklet[tf] = tracklet[tf]
new_zone_dict[label] = new_tracklet
return new_zone_dict
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import base_jde_tracker
from . import base_sde_tracker
from . import jde_tracker
from . import deepsort_tracker
from .base_jde_tracker import *
from .base_sde_tracker import *
from .jde_tracker import *
from .deepsort_tracker import *
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
"""
import numpy as np
from collections import defaultdict
from collections import deque, OrderedDict
from ..matching import jde_matching as matching
__all__ = [
'TrackState',
'BaseTrack',
'STrack',
'joint_stracks',
'sub_stracks',
'remove_duplicate_stracks',
]
class TrackState(object):
New = 0
Tracked = 1
Lost = 2
Removed = 3
class BaseTrack(object):
_count_dict = defaultdict(int) # support single class and multi classes
track_id = 0
is_activated = False
state = TrackState.New
history = OrderedDict()
features = []
curr_feature = None
score = 0
start_frame = 0
frame_id = 0
time_since_update = 0
# multi-camera
location = (np.inf, np.inf)
@property
def end_frame(self):
return self.frame_id
@staticmethod
def next_id(cls_id):
BaseTrack._count_dict[cls_id] += 1
return BaseTrack._count_dict[cls_id]
# @even: reset track id
@staticmethod
def init_count(num_classes):
"""
Initiate _count for all object classes
:param num_classes:
"""
for cls_id in range(num_classes):
BaseTrack._count_dict[cls_id] = 0
@staticmethod
def reset_track_count(cls_id):
BaseTrack._count_dict[cls_id] = 0
def activate(self, *args):
raise NotImplementedError
def predict(self):
raise NotImplementedError
def update(self, *args, **kwargs):
raise NotImplementedError
def mark_lost(self):
self.state = TrackState.Lost
def mark_removed(self):
self.state = TrackState.Removed
class STrack(BaseTrack):
def __init__(self,
tlwh,
score,
temp_feat,
num_classes,
cls_id,
buff_size=30):
# object class id
self.cls_id = cls_id
# wait activate
self._tlwh = np.asarray(tlwh, dtype=np.float)
self.kalman_filter = None
self.mean, self.covariance = None, None
self.is_activated = False
self.score = score
self.track_len = 0
self.smooth_feat = None
self.update_features(temp_feat)
self.features = deque([], maxlen=buff_size)
self.alpha = 0.9
def update_features(self, feat):
# L2 normalizing
feat /= np.linalg.norm(feat)
self.curr_feat = feat
if self.smooth_feat is None:
self.smooth_feat = feat
else:
self.smooth_feat = self.alpha * self.smooth_feat + (1.0 - self.alpha
) * feat
self.features.append(feat)
self.smooth_feat /= np.linalg.norm(self.smooth_feat)
def predict(self):
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[7] = 0
self.mean, self.covariance = self.kalman_filter.predict(mean_state,
self.covariance)
@staticmethod
def multi_predict(tracks, kalman_filter):
if len(tracks) > 0:
multi_mean = np.asarray([track.mean.copy() for track in tracks])
multi_covariance = np.asarray(
[track.covariance for track in tracks])
for i, st in enumerate(tracks):
if st.state != TrackState.Tracked:
multi_mean[i][7] = 0
multi_mean, multi_covariance = kalman_filter.multi_predict(
multi_mean, multi_covariance)
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
tracks[i].mean = mean
tracks[i].covariance = cov
def reset_track_id(self):
self.reset_track_count(self.cls_id)
def activate(self, kalman_filter, frame_id):
"""Start a new track"""
self.kalman_filter = kalman_filter
# update track id for the object class
self.track_id = self.next_id(self.cls_id)
self.mean, self.covariance = self.kalman_filter.initiate(
self.tlwh_to_xyah(self._tlwh))
self.track_len = 0
self.state = TrackState.Tracked # set flag 'tracked'
if frame_id == 1: # to record the first frame's detection result
self.is_activated = True
self.frame_id = frame_id
self.start_frame = frame_id
def re_activate(self, new_track, frame_id, new_id=False):
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh))
self.update_features(new_track.curr_feat)
self.track_len = 0
self.state = TrackState.Tracked
self.is_activated = True
self.frame_id = frame_id
if new_id: # update track id for the object class
self.track_id = self.next_id(self.cls_id)
def update(self, new_track, frame_id, update_feature=True):
self.frame_id = frame_id
self.track_len += 1
new_tlwh = new_track.tlwh
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
self.state = TrackState.Tracked # set flag 'tracked'
self.is_activated = True # set flag 'activated'
self.score = new_track.score
if update_feature:
self.update_features(new_track.curr_feat)
@property
def tlwh(self):
"""Get current position in bounding box format `(top left x, top left y,
width, height)`.
"""
if self.mean is None:
return self._tlwh.copy()
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
@property
def tlbr(self):
"""Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
@staticmethod
def tlwh_to_xyah(tlwh):
"""Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = np.asarray(tlwh).copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
def to_xyah(self):
return self.tlwh_to_xyah(self.tlwh)
@staticmethod
def tlbr_to_tlwh(tlbr):
ret = np.asarray(tlbr).copy()
ret[2:] -= ret[:2]
return ret
@staticmethod
def tlwh_to_tlbr(tlwh):
ret = np.asarray(tlwh).copy()
ret[2:] += ret[:2]
return ret
def __repr__(self):
return 'OT_({}-{})_({}-{})'.format(self.cls_id, self.track_id,
self.start_frame, self.end_frame)
def joint_stracks(tlista, tlistb):
exists = {}
res = []
for t in tlista:
exists[t.track_id] = 1
res.append(t)
for t in tlistb:
tid = t.track_id
if not exists.get(tid, 0):
exists[tid] = 1
res.append(t)
return res
def sub_stracks(tlista, tlistb):
stracks = {}
for t in tlista:
stracks[t.track_id] = t
for t in tlistb:
tid = t.track_id
if stracks.get(tid, 0):
del stracks[tid]
return list(stracks.values())
def remove_duplicate_stracks(stracksa, stracksb):
pdist = matching.iou_distance(stracksa, stracksb)
pairs = np.where(pdist < 0.15)
dupa, dupb = list(), list()
for p, q in zip(*pairs):
timep = stracksa[p].frame_id - stracksa[p].start_frame
timeq = stracksb[q].frame_id - stracksb[q].start_frame
if timep > timeq:
dupb.append(q)
else:
dupa.append(p)
resa = [t for i, t in enumerate(stracksa) if not i in dupa]
resb = [t for i, t in enumerate(stracksb) if not i in dupb]
return resa, resb
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/track.py
"""
import datetime
__all__ = ['TrackState', 'Track']
class TrackState(object):
"""
Enumeration type for the single target track state. Newly created tracks are
classified as `tentative` until enough evidence has been collected. Then,
the track state is changed to `confirmed`. Tracks that are no longer alive
are classified as `deleted` to mark them for removal from the set of active
tracks.
"""
Tentative = 1
Confirmed = 2
Deleted = 3
class Track(object):
"""
A single target track with state space `(x, y, a, h)` and associated
velocities, where `(x, y)` is the center of the bounding box, `a` is the
aspect ratio and `h` is the height.
Args:
mean (ndarray): Mean vector of the initial state distribution.
covariance (ndarray): Covariance matrix of the initial state distribution.
track_id (int): A unique track identifier.
n_init (int): Number of consecutive detections before the track is confirmed.
The track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
max_age (int): The maximum number of consecutive misses before the track
state is set to `Deleted`.
cls_id (int): The category id of the tracked box.
score (float): The confidence score of the tracked box.
feature (Optional[ndarray]): Feature vector of the detection this track
originates from. If not None, this feature is added to the `features` cache.
Attributes:
hits (int): Total number of measurement updates.
age (int): Total number of frames since first occurance.
time_since_update (int): Total number of frames since last measurement
update.
state (TrackState): The current track state.
features (List[ndarray]): A cache of features. On each measurement update,
the associated feature vector is added to this list.
"""
def __init__(self,
mean,
covariance,
track_id,
n_init,
max_age,
cls_id,
score,
feature=None):
self.mean = mean
self.covariance = covariance
self.track_id = track_id
self.hits = 1
self.age = 1
self.time_since_update = 0
self.cls_id = cls_id
self.score = score
self.start_time = datetime.datetime.now()
self.state = TrackState.Tentative
self.features = []
self.feat = feature
if feature is not None:
self.features.append(feature)
self._n_init = n_init
self._max_age = max_age
def to_tlwh(self):
"""Get position in format `(top left x, top left y, width, height)`."""
ret = self.mean[:4].copy()
ret[2] *= ret[3]
ret[:2] -= ret[2:] / 2
return ret
def to_tlbr(self):
"""Get position in bounding box format `(min x, miny, max x, max y)`."""
ret = self.to_tlwh()
ret[2:] = ret[:2] + ret[2:]
return ret
def predict(self, kalman_filter):
"""
Propagate the state distribution to the current time step using a Kalman
filter prediction step.
"""
self.mean, self.covariance = kalman_filter.predict(self.mean,
self.covariance)
self.age += 1
self.time_since_update += 1
def update(self, kalman_filter, detection):
"""
Perform Kalman filter measurement update step and update the associated
detection feature cache.
"""
self.mean, self.covariance = kalman_filter.update(self.mean,
self.covariance,
detection.to_xyah())
self.features.append(detection.feature)
self.feat = detection.feature
self.cls_id = detection.cls_id
self.score = detection.score
self.hits += 1
self.time_since_update = 0
if self.state == TrackState.Tentative and self.hits >= self._n_init:
self.state = TrackState.Confirmed
def mark_missed(self):
"""Mark this track as missed (no association at the current time step).
"""
if self.state == TrackState.Tentative:
self.state = TrackState.Deleted
elif self.time_since_update > self._max_age:
self.state = TrackState.Deleted
def is_tentative(self):
"""Returns True if this track is tentative (unconfirmed)."""
return self.state == TrackState.Tentative
def is_confirmed(self):
"""Returns True if this track is confirmed."""
return self.state == TrackState.Confirmed
def is_deleted(self):
"""Returns True if this track is dead and should be deleted."""
return self.state == TrackState.Deleted
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/tracker.py
"""
import numpy as np
from ..motion import KalmanFilter
from ..matching.deepsort_matching import NearestNeighborDistanceMetric
from ..matching.deepsort_matching import iou_cost, min_cost_matching, matching_cascade, gate_cost_matrix
from .base_sde_tracker import Track
from ..utils import Detection
__all__ = ['DeepSORTTracker']
class DeepSORTTracker(object):
"""
DeepSORT tracker
Args:
input_size (list): input feature map size to reid model, [h, w] format,
[64, 192] as default.
min_box_area (int): min box area to filter out low quality boxes
vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
bad results, set 1.6 default for pedestrian tracking. If set <=0
means no need to filter bboxes.
budget (int): If not None, fix samples per class to at most this number.
Removes the oldest samples when the budget is reached.
max_age (int): maximum number of missed misses before a track is deleted
n_init (float): Number of frames that a track remains in initialization
phase. Number of consecutive detections before the track is confirmed.
The track state is set to `Deleted` if a miss occurs within the first
`n_init` frames.
metric_type (str): either "euclidean" or "cosine", the distance metric
used for measurement to track association.
matching_threshold (float): samples with larger distance are
considered an invalid match.
max_iou_distance (float): max iou distance threshold
motion (object): KalmanFilter instance
"""
def __init__(self,
input_size=[64, 192],
min_box_area=0,
vertical_ratio=-1,
budget=100,
max_age=70,
n_init=3,
metric_type='cosine',
matching_threshold=0.2,
max_iou_distance=0.9,
motion='KalmanFilter'):
self.input_size = input_size
self.min_box_area = min_box_area
self.vertical_ratio = vertical_ratio
self.max_age = max_age
self.n_init = n_init
self.metric = NearestNeighborDistanceMetric(metric_type,
matching_threshold, budget)
self.max_iou_distance = max_iou_distance
if motion == 'KalmanFilter':
self.motion = KalmanFilter()
self.tracks = []
self._next_id = 1
def predict(self):
"""
Propagate track state distributions one time step forward.
This function should be called once every time step, before `update`.
"""
for track in self.tracks:
track.predict(self.motion)
def update(self, pred_dets, pred_embs):
"""
Perform measurement update and track management.
Args:
pred_dets (np.array): Detection results of the image, the shape is
[N, 6], means 'x0, y0, x1, y1, score, cls_id'.
pred_embs (np.array): Embedding results of the image, the shape is
[N, 128], usually pred_embs.shape[1] is a multiple of 128.
"""
pred_tlwhs = pred_dets[:, :4]
pred_scores = pred_dets[:, 4:5]
pred_cls_ids = pred_dets[:, 5:]
detections = [
Detection(tlwh, score, feat, cls_id)
for tlwh, score, feat, cls_id in zip(pred_tlwhs, pred_scores,
pred_embs, pred_cls_ids)
]
# Run matching cascade.
matches, unmatched_tracks, unmatched_detections = \
self._match(detections)
# Update track set.
for track_idx, detection_idx in matches:
self.tracks[track_idx].update(self.motion,
detections[detection_idx])
for track_idx in unmatched_tracks:
self.tracks[track_idx].mark_missed()
for detection_idx in unmatched_detections:
self._initiate_track(detections[detection_idx])
self.tracks = [t for t in self.tracks if not t.is_deleted()]
# Update distance metric.
active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
features, targets = [], []
for track in self.tracks:
if not track.is_confirmed():
continue
features += track.features
targets += [track.track_id for _ in track.features]
track.features = []
self.metric.partial_fit(
np.asarray(features), np.asarray(targets), active_targets)
output_stracks = self.tracks
return output_stracks
def _match(self, detections):
def gated_metric(tracks, dets, track_indices, detection_indices):
features = np.array([dets[i].feature for i in detection_indices])
targets = np.array([tracks[i].track_id for i in track_indices])
cost_matrix = self.metric.distance(features, targets)
cost_matrix = gate_cost_matrix(self.motion, cost_matrix, tracks,
dets, track_indices,
detection_indices)
return cost_matrix
# Split track set into confirmed and unconfirmed tracks.
confirmed_tracks = [
i for i, t in enumerate(self.tracks) if t.is_confirmed()
]
unconfirmed_tracks = [
i for i, t in enumerate(self.tracks) if not t.is_confirmed()
]
# Associate confirmed tracks using appearance features.
matches_a, unmatched_tracks_a, unmatched_detections = \
matching_cascade(
gated_metric, self.metric.matching_threshold, self.max_age,
self.tracks, detections, confirmed_tracks)
# Associate remaining tracks together with unconfirmed tracks using IOU.
iou_track_candidates = unconfirmed_tracks + [
k for k in unmatched_tracks_a
if self.tracks[k].time_since_update == 1
]
unmatched_tracks_a = [
k for k in unmatched_tracks_a
if self.tracks[k].time_since_update != 1
]
matches_b, unmatched_tracks_b, unmatched_detections = \
min_cost_matching(
iou_cost, self.max_iou_distance, self.tracks,
detections, iou_track_candidates, unmatched_detections)
matches = matches_a + matches_b
unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
return matches, unmatched_tracks, unmatched_detections
def _initiate_track(self, detection):
mean, covariance = self.motion.initiate(detection.to_xyah())
self.tracks.append(
Track(mean, covariance, self._next_id, self.n_init, self.max_age,
detection.cls_id, detection.score, detection.feature))
self._next_id += 1
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
"""
import numpy as np
from collections import defaultdict
from ..matching import jde_matching as matching
from ..motion import KalmanFilter
from .base_jde_tracker import TrackState, STrack
from .base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks
__all__ = ['JDETracker']
class JDETracker(object):
__shared__ = ['num_classes']
"""
JDE tracker, support single class and multi classes
Args:
num_classes (int): the number of classes
det_thresh (float): threshold of detection score
track_buffer (int): buffer for tracker
min_box_area (int): min box area to filter out low quality boxes
vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
bad results. If set <0 means no need to filter bboxes,usually set
1.6 for pedestrian tracking.
tracked_thresh (float): linear assignment threshold of tracked
stracks and detections
r_tracked_thresh (float): linear assignment threshold of
tracked stracks and unmatched detections
unconfirmed_thresh (float): linear assignment threshold of
unconfirmed stracks and unmatched detections
motion (str): motion model, KalmanFilter as default
conf_thres (float): confidence threshold for tracking
metric_type (str): either "euclidean" or "cosine", the distance metric
used for measurement to track association.
"""
def __init__(self,
num_classes=1,
det_thresh=0.3,
track_buffer=30,
min_box_area=200,
vertical_ratio=1.6,
tracked_thresh=0.7,
r_tracked_thresh=0.5,
unconfirmed_thresh=0.7,
motion='KalmanFilter',
conf_thres=0,
metric_type='euclidean'):
self.num_classes = num_classes
self.det_thresh = det_thresh
self.track_buffer = track_buffer
self.min_box_area = min_box_area
self.vertical_ratio = vertical_ratio
self.tracked_thresh = tracked_thresh
self.r_tracked_thresh = r_tracked_thresh
self.unconfirmed_thresh = unconfirmed_thresh
if motion == 'KalmanFilter':
self.motion = KalmanFilter()
self.conf_thres = conf_thres
self.metric_type = metric_type
self.frame_id = 0
self.tracked_tracks_dict = defaultdict(list) # dict(list[STrack])
self.lost_tracks_dict = defaultdict(list) # dict(list[STrack])
self.removed_tracks_dict = defaultdict(list) # dict(list[STrack])
self.max_time_lost = 0
# max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer)
def update(self, pred_dets, pred_embs):
"""
Processes the image frame and finds bounding box(detections).
Associates the detection with corresponding tracklets and also handles
lost, removed, refound and active tracklets.
Args:
pred_dets (np.array): Detection results of the image, the shape is
[N, 6], means 'x0, y0, x1, y1, score, cls_id'.
pred_embs (np.array): Embedding results of the image, the shape is
[N, 128] or [N, 512].
Return:
output_stracks_dict (dict(list)): The list contains information
regarding the online_tracklets for the recieved image tensor.
"""
self.frame_id += 1
if self.frame_id == 1:
STrack.init_count(self.num_classes)
activated_tracks_dict = defaultdict(list)
refined_tracks_dict = defaultdict(list)
lost_tracks_dict = defaultdict(list)
removed_tracks_dict = defaultdict(list)
output_tracks_dict = defaultdict(list)
pred_dets_dict = defaultdict(list)
pred_embs_dict = defaultdict(list)
# unify single and multi classes detection and embedding results
for cls_id in range(self.num_classes):
cls_idx = (pred_dets[:, 5:] == cls_id).squeeze(-1)
pred_dets_dict[cls_id] = pred_dets[cls_idx]
pred_embs_dict[cls_id] = pred_embs[cls_idx]
for cls_id in range(self.num_classes):
""" Step 1: Get detections by class"""
pred_dets_cls = pred_dets_dict[cls_id]
pred_embs_cls = pred_embs_dict[cls_id]
remain_inds = (pred_dets_cls[:, 4:5] > self.conf_thres).squeeze(-1)
if remain_inds.sum() > 0:
pred_dets_cls = pred_dets_cls[remain_inds]
pred_embs_cls = pred_embs_cls[remain_inds]
detections = [
STrack(
STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f,
self.num_classes, cls_id, 30)
for (tlbrs, f) in zip(pred_dets_cls, pred_embs_cls)
]
else:
detections = []
''' Add newly detected tracklets to tracked_stracks'''
unconfirmed_dict = defaultdict(list)
tracked_tracks_dict = defaultdict(list)
for track in self.tracked_tracks_dict[cls_id]:
if not track.is_activated:
# previous tracks which are not active in the current frame are added in unconfirmed list
unconfirmed_dict[cls_id].append(track)
else:
# Active tracks are added to the local list 'tracked_stracks'
tracked_tracks_dict[cls_id].append(track)
""" Step 2: First association, with embedding"""
# building tracking pool for the current frame
track_pool_dict = defaultdict(list)
track_pool_dict[cls_id] = joint_stracks(
tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id])
# Predict the current location with KalmanFilter
STrack.multi_predict(track_pool_dict[cls_id], self.motion)
dists = matching.embedding_distance(
track_pool_dict[cls_id], detections, metric=self.metric_type)
dists = matching.fuse_motion(self.motion, dists,
track_pool_dict[cls_id], detections)
matches, u_track, u_detection = matching.linear_assignment(
dists, thresh=self.tracked_thresh)
for i_tracked, idet in matches:
# i_tracked is the id of the track and idet is the detection
track = track_pool_dict[cls_id][i_tracked]
det = detections[idet]
if track.state == TrackState.Tracked:
# If the track is active, add the detection to the track
track.update(detections[idet], self.frame_id)
activated_tracks_dict[cls_id].append(track)
else:
# We have obtained a detection from a track which is not active,
# hence put the track in refind_stracks list
track.re_activate(det, self.frame_id, new_id=False)
refined_tracks_dict[cls_id].append(track)
# None of the steps below happen if there are no undetected tracks.
""" Step 3: Second association, with IOU"""
detections = [detections[i] for i in u_detection]
r_tracked_stracks = []
for i in u_track:
if track_pool_dict[cls_id][i].state == TrackState.Tracked:
r_tracked_stracks.append(track_pool_dict[cls_id][i])
dists = matching.iou_distance(r_tracked_stracks, detections)
matches, u_track, u_detection = matching.linear_assignment(
dists, thresh=self.r_tracked_thresh)
for i_tracked, idet in matches:
track = r_tracked_stracks[i_tracked]
det = detections[idet]
if track.state == TrackState.Tracked:
track.update(det, self.frame_id)
activated_tracks_dict[cls_id].append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refined_tracks_dict[cls_id].append(track)
for it in u_track:
track = r_tracked_stracks[it]
if not track.state == TrackState.Lost:
track.mark_lost()
lost_tracks_dict[cls_id].append(track)
'''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
detections = [detections[i] for i in u_detection]
dists = matching.iou_distance(unconfirmed_dict[cls_id], detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(
dists, thresh=self.unconfirmed_thresh)
for i_tracked, idet in matches:
unconfirmed_dict[cls_id][i_tracked].update(detections[idet],
self.frame_id)
activated_tracks_dict[cls_id].append(unconfirmed_dict[cls_id][
i_tracked])
for it in u_unconfirmed:
track = unconfirmed_dict[cls_id][it]
track.mark_removed()
removed_tracks_dict[cls_id].append(track)
""" Step 4: Init new stracks"""
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
track.activate(self.motion, self.frame_id)
activated_tracks_dict[cls_id].append(track)
""" Step 5: Update state"""
for track in self.lost_tracks_dict[cls_id]:
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_tracks_dict[cls_id].append(track)
self.tracked_tracks_dict[cls_id] = [
t for t in self.tracked_tracks_dict[cls_id]
if t.state == TrackState.Tracked
]
self.tracked_tracks_dict[cls_id] = joint_stracks(
self.tracked_tracks_dict[cls_id], activated_tracks_dict[cls_id])
self.tracked_tracks_dict[cls_id] = joint_stracks(
self.tracked_tracks_dict[cls_id], refined_tracks_dict[cls_id])
self.lost_tracks_dict[cls_id] = sub_stracks(
self.lost_tracks_dict[cls_id], self.tracked_tracks_dict[cls_id])
self.lost_tracks_dict[cls_id].extend(lost_tracks_dict[cls_id])
self.lost_tracks_dict[cls_id] = sub_stracks(
self.lost_tracks_dict[cls_id], self.removed_tracks_dict[cls_id])
self.removed_tracks_dict[cls_id].extend(removed_tracks_dict[cls_id])
self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[
cls_id] = remove_duplicate_stracks(
self.tracked_tracks_dict[cls_id],
self.lost_tracks_dict[cls_id])
# get scores of lost tracks
output_tracks_dict[cls_id] = [
track for track in self.tracked_tracks_dict[cls_id]
if track.is_activated
]
return output_tracks_dict
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import cv2
import time
import numpy as np
import collections
__all__ = [
'MOTTimer', 'Detection', 'write_mot_results', 'load_det_results',
'preprocess_reid', 'get_crops', 'clip_box', 'scale_coords', 'flow_statistic',
'plot_tracking'
]
class MOTTimer(object):
"""
This class used to compute and print the current FPS while evaling.
"""
def __init__(self, window_size=20):
self.start_time = 0.
self.diff = 0.
self.duration = 0.
self.deque = collections.deque(maxlen=window_size)
def tic(self):
# using time.time instead of time.clock because time time.clock
# does not normalize for multithreading
self.start_time = time.time()
def toc(self, average=True):
self.diff = time.time() - self.start_time
self.deque.append(self.diff)
if average:
self.duration = np.mean(self.deque)
else:
self.duration = np.sum(self.deque)
return self.duration
def clear(self):
self.start_time = 0.
self.diff = 0.
self.duration = 0.
class Detection(object):
"""
This class represents a bounding box detection in a single image.
Args:
tlwh (Tensor): Bounding box in format `(top left x, top left y,
width, height)`.
score (Tensor): Bounding box confidence score.
feature (Tensor): A feature vector that describes the object
contained in this image.
cls_id (Tensor): Bounding box category id.
"""
def __init__(self, tlwh, score, feature, cls_id):
self.tlwh = np.asarray(tlwh, dtype=np.float32)
self.score = float(score)
self.feature = np.asarray(feature, dtype=np.float32)
self.cls_id = int(cls_id)
def to_tlbr(self):
"""
Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
`(top left, bottom right)`.
"""
ret = self.tlwh.copy()
ret[2:] += ret[:2]
return ret
def to_xyah(self):
"""
Convert bounding box to format `(center x, center y, aspect ratio,
height)`, where the aspect ratio is `width / height`.
"""
ret = self.tlwh.copy()
ret[:2] += ret[2:] / 2
ret[2] /= ret[3]
return ret
def write_mot_results(filename, results, data_type='mot', num_classes=1):
# support single and multi classes
if data_type in ['mot', 'mcmot']:
save_format = '{frame},{id},{x1},{y1},{w},{h},{score},{cls_id},-1,-1\n'
elif data_type == 'kitti':
save_format = '{frame} {id} car 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
else:
raise ValueError(data_type)
f = open(filename, 'w')
for cls_id in range(num_classes):
for frame_id, tlwhs, tscores, track_ids in results[cls_id]:
if data_type == 'kitti':
frame_id -= 1
for tlwh, score, track_id in zip(tlwhs, tscores, track_ids):
if track_id < 0: continue
if data_type == 'mot':
cls_id = -1
x1, y1, w, h = tlwh
x2, y2 = x1 + w, y1 + h
line = save_format.format(
frame=frame_id,
id=track_id,
x1=x1,
y1=y1,
x2=x2,
y2=y2,
w=w,
h=h,
score=score,
cls_id=cls_id)
f.write(line)
print('MOT results save in {}'.format(filename))
def load_det_results(det_file, num_frames):
assert os.path.exists(det_file) and os.path.isfile(det_file), \
'{} is not exist or not a file.'.format(det_file)
labels = np.loadtxt(det_file, dtype='float32', delimiter=',')
assert labels.shape[1] == 7, \
"Each line of {} should have 7 items: '[frame_id],[x0],[y0],[w],[h],[score],[class_id]'.".format(det_file)
results_list = []
for frame_i in range(num_frames):
results = {'bbox': [], 'score': [], 'cls_id': []}
lables_with_frame = labels[labels[:, 0] == frame_i + 1]
# each line of lables_with_frame:
# [frame_id],[x0],[y0],[w],[h],[score],[class_id]
for l in lables_with_frame:
results['bbox'].append(l[1:5])
results['score'].append(l[5:6])
results['cls_id'].append(l[6:7])
results_list.append(results)
return results_list
def scale_coords(coords, input_shape, im_shape, scale_factor):
# Note: ratio has only one value, scale_factor[0] == scale_factor[1]
#
# This function only used for JDE YOLOv3 or other detectors with
# LetterBoxResize and JDEBBoxPostProcess, coords output from detector had
# not scaled back to the origin image.
ratio = scale_factor[0]
pad_w = (input_shape[1] - int(im_shape[1])) / 2
pad_h = (input_shape[0] - int(im_shape[0])) / 2
coords[:, 0::2] -= pad_w
coords[:, 1::2] -= pad_h
coords[:, 0:4] /= ratio
coords[:, :4] = np.clip(coords[:, :4], a_min=0, a_max=coords[:, :4].max())
return coords.round()
def clip_box(xyxy, ori_image_shape):
H, W = ori_image_shape
xyxy[:, 0::2] = np.clip(xyxy[:, 0::2], a_min=0, a_max=W)
xyxy[:, 1::2] = np.clip(xyxy[:, 1::2], a_min=0, a_max=H)
w = xyxy[:, 2:3] - xyxy[:, 0:1]
h = xyxy[:, 3:4] - xyxy[:, 1:2]
mask = np.logical_and(h > 0, w > 0)
keep_idx = np.nonzero(mask)
return xyxy[keep_idx[0]], keep_idx
def get_crops(xyxy, ori_img, w, h):
crops = []
xyxy = xyxy.astype(np.int64)
ori_img = ori_img.numpy()
ori_img = np.squeeze(ori_img, axis=0).transpose(1, 0, 2) # [h,w,3]->[w,h,3]
for i, bbox in enumerate(xyxy):
crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :]
crops.append(crop)
crops = preprocess_reid(crops, w, h)
return crops
def preprocess_reid(imgs,
w=64,
h=192,
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]):
im_batch = []
for img in imgs:
img = cv2.resize(img, (w, h))
img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255
img_mean = np.array(mean).reshape((3, 1, 1))
img_std = np.array(std).reshape((3, 1, 1))
img -= img_mean
img /= img_std
img = np.expand_dims(img, axis=0)
im_batch.append(img)
im_batch = np.concatenate(im_batch, 0)
return im_batch
def flow_statistic(result,
secs_interval,
do_entrance_counting,
video_fps,
entrance,
id_set,
interval_id_set,
in_id_list,
out_id_list,
prev_center,
records,
data_type='mot',
num_classes=1):
# Count in and out number:
# Use horizontal center line as the entrance just for simplification.
# If a person located in the above the horizontal center line
# at the previous frame and is in the below the line at the current frame,
# the in number is increased by one.
# If a person was in the below the horizontal center line
# at the previous frame and locates in the below the line at the current frame,
# the out number is increased by one.
# TODO: if the entrance is not the horizontal center line,
# the counting method should be optimized.
if do_entrance_counting:
entrance_y = entrance[1] # xmin, ymin, xmax, ymax
frame_id, tlwhs, tscores, track_ids = result
for tlwh, score, track_id in zip(tlwhs, tscores, track_ids):
if track_id < 0: continue
if data_type == 'kitti':
frame_id -= 1
x1, y1, w, h = tlwh
center_x = x1 + w / 2.
center_y = y1 + h / 2.
if track_id in prev_center:
if prev_center[track_id][1] <= entrance_y and \
center_y > entrance_y:
in_id_list.append(track_id)
if prev_center[track_id][1] >= entrance_y and \
center_y < entrance_y:
out_id_list.append(track_id)
prev_center[track_id][0] = center_x
prev_center[track_id][1] = center_y
else:
prev_center[track_id] = [center_x, center_y]
# Count totol number, number at a manual-setting interval
frame_id, tlwhs, tscores, track_ids = result
for tlwh, score, track_id in zip(tlwhs, tscores, track_ids):
if track_id < 0: continue
id_set.add(track_id)
interval_id_set.add(track_id)
# Reset counting at the interval beginning
if frame_id % video_fps == 0 and frame_id / video_fps % secs_interval == 0:
curr_interval_count = len(interval_id_set)
interval_id_set.clear()
info = "Frame id: {}, Total count: {}".format(frame_id, len(id_set))
if do_entrance_counting:
info += ", In count: {}, Out count: {}".format(
len(in_id_list), len(out_id_list))
if frame_id % video_fps == 0 and frame_id / video_fps % secs_interval == 0:
info += ", Count during {} secs: {}".format(secs_interval,
curr_interval_count)
interval_id_set.clear()
print(info)
info += "\n"
records.append(info)
return {
"id_set": id_set,
"interval_id_set": interval_id_set,
"in_id_list": in_id_list,
"out_id_list": out_id_list,
"prev_center": prev_center,
"records": records
}
def get_color(idx):
idx = idx * 3
color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
return color
def plot_tracking(image,
tlwhs,
obj_ids,
scores=None,
frame_id=0,
fps=0.,
ids2names=[],
do_entrance_counting=False,
entrance=None):
im = np.ascontiguousarray(np.copy(image))
im_h, im_w = im.shape[:2]
text_scale = max(1, image.shape[1] / 1600.)
text_thickness = 2
line_thickness = max(1, int(image.shape[1] / 500.))
if fps > 0:
_line = 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs))
else:
_line = 'frame: %d num: %d' % (frame_id, len(tlwhs))
cv2.putText(
im,
_line,
(0, int(15 * text_scale)),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=2)
for i, tlwh in enumerate(tlwhs):
x1, y1, w, h = tlwh
intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
obj_id = int(obj_ids[i])
id_text = '{}'.format(int(obj_id))
if ids2names != []:
assert len(
ids2names) == 1, "plot_tracking only supports single classes."
id_text = '{}_'.format(ids2names[0]) + id_text
_line_thickness = 1 if obj_id <= 0 else line_thickness
color = get_color(abs(obj_id))
cv2.rectangle(
im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
cv2.putText(
im,
id_text, (intbox[0], intbox[1] - 10),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=text_thickness)
if scores is not None:
text = '{:.2f}'.format(float(scores[i]))
cv2.putText(
im,
text, (intbox[0], intbox[1] + 10),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 255, 255),
thickness=text_thickness)
if do_entrance_counting:
entrance_line = tuple(map(int, entrance))
cv2.rectangle(
im,
entrance_line[0:2],
entrance_line[2:4],
color=(0, 255, 255),
thickness=line_thickness)
return im
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import time
import yaml
import cv2
import numpy as np
from collections import defaultdict
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
from utils import argsparser, Timer, get_current_memory_mb
from det_infer import Detector, get_test_images, print_arguments, PredictConfig
from benchmark_utils import PaddleInferBenchmark
from visualize import plot_tracking_dict
from mot.tracker import JDETracker
from mot.utils import MOTTimer, write_mot_results, flow_statistic
# Global dictionary
MOT_SUPPORT_MODELS = {
'JDE',
'FairMOT',
}
class JDE_Detector(Detector):
"""
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
"""
def __init__(self,
pred_config,
model_dir,
device='CPU',
run_mode='fluid',
batch_size=1,
trt_min_shape=1,
trt_max_shape=1088,
trt_opt_shape=608,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
super(JDE_Detector, self).__init__(
pred_config=pred_config,
model_dir=model_dir,
device=device,
run_mode=run_mode,
batch_size=batch_size,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn)
assert batch_size == 1, "The JDE Detector only supports batch size=1 now"
assert pred_config.tracker, "Tracking model should have tracker"
self.num_classes = len(pred_config.labels)
tp = pred_config.tracker
min_box_area = tp['min_box_area'] if 'min_box_area' in tp else 200
vertical_ratio = tp['vertical_ratio'] if 'vertical_ratio' in tp else 1.6
conf_thres = tp['conf_thres'] if 'conf_thres' in tp else 0.
tracked_thresh = tp['tracked_thresh'] if 'tracked_thresh' in tp else 0.7
metric_type = tp['metric_type'] if 'metric_type' in tp else 'euclidean'
self.tracker = JDETracker(
num_classes=self.num_classes,
min_box_area=min_box_area,
vertical_ratio=vertical_ratio,
conf_thres=conf_thres,
tracked_thresh=tracked_thresh,
metric_type=metric_type)
def postprocess(self, pred_dets, pred_embs, threshold):
online_targets_dict = self.tracker.update(pred_dets, pred_embs)
online_tlwhs = defaultdict(list)
online_scores = defaultdict(list)
online_ids = defaultdict(list)
for cls_id in range(self.num_classes):
online_targets = online_targets_dict[cls_id]
for t in online_targets:
tlwh = t.tlwh
tid = t.track_id
tscore = t.score
if tscore < threshold: continue
if tlwh[2] * tlwh[3] <= self.tracker.min_box_area: continue
if self.tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
3] > self.tracker.vertical_ratio:
continue
online_tlwhs[cls_id].append(tlwh)
online_ids[cls_id].append(tid)
online_scores[cls_id].append(tscore)
return online_tlwhs, online_scores, online_ids
def predict(self, image_list, threshold=0.5, warmup=0, repeats=1):
'''
Args:
image_list (list): list of image
threshold (float): threshold of predicted box' score
Returns:
online_tlwhs, online_scores, online_ids (dict[np.array])
'''
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image_list)
self.det_times.preprocess_time_s.end()
pred_dets, pred_embs = None, None
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])
for i in range(warmup):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
pred_dets = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.start()
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
pred_dets = boxes_tensor.copy_to_cpu()
embs_tensor = self.predictor.get_output_handle(output_names[1])
pred_embs = embs_tensor.copy_to_cpu()
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start()
online_tlwhs, online_scores, online_ids = self.postprocess(
pred_dets, pred_embs, threshold)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1
return online_tlwhs, online_scores, online_ids
def predict_image(detector, image_list):
results = []
num_classes = detector.num_classes
data_type = 'mcmot' if num_classes > 1 else 'mot'
ids2names = detector.pred_config.labels
image_list.sort()
for frame_id, img_file in enumerate(image_list):
frame = cv2.imread(img_file)
if FLAGS.run_benchmark:
detector.predict([frame], FLAGS.threshold, warmup=10, repeats=10)
cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm
detector.gpu_mem += gm
detector.gpu_util += gu
print('Test iter {}, file name:{}'.format(frame_id, img_file))
else:
online_tlwhs, online_scores, online_ids = detector.predict(
[frame], FLAGS.threshold)
online_im = plot_tracking_dict(frame, num_classes, online_tlwhs,
online_ids, online_scores, frame_id,
ids2names)
if FLAGS.save_images:
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
img_name = os.path.split(img_file)[-1]
out_path = os.path.join(FLAGS.output_dir, img_name)
cv2.imwrite(out_path, online_im)
print("save result to: " + out_path)
def predict_video(detector, camera_id):
video_name = 'mot_output.mp4'
if camera_id != -1:
capture = cv2.VideoCapture(camera_id)
else:
capture = cv2.VideoCapture(FLAGS.video_file)
video_name = os.path.split(FLAGS.video_file)[-1]
# Get Video info : resolution, fps, frame count
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(capture.get(cv2.CAP_PROP_FPS))
frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
print("fps: %d, frame_count: %d" % (fps, frame_count))
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_name)
if not FLAGS.save_images:
video_format = 'mp4v'
fourcc = cv2.VideoWriter_fourcc(*video_format)
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
frame_id = 0
timer = MOTTimer()
results = defaultdict(list) # support single class and multi classes
num_classes = detector.num_classes
data_type = 'mcmot' if num_classes > 1 else 'mot'
ids2names = detector.pred_config.labels
center_traj = None
entrance = None
records = None
if FLAGS.draw_center_traj:
center_traj = [{} for i in range(num_classes)]
if num_classes == 1:
id_set = set()
interval_id_set = set()
in_id_list = list()
out_id_list = list()
prev_center = dict()
records = list()
entrance = [0, height / 2., width, height / 2.]
video_fps = fps
while (1):
ret, frame = capture.read()
if not ret:
break
timer.tic()
online_tlwhs, online_scores, online_ids = detector.predict(
[frame], FLAGS.threshold)
timer.toc()
for cls_id in range(num_classes):
results[cls_id].append((frame_id + 1, online_tlwhs[cls_id],
online_scores[cls_id], online_ids[cls_id]))
fps = 1. / timer.duration
# NOTE: just implement flow statistic for one class
if num_classes == 1:
result = (frame_id + 1, online_tlwhs[0], online_scores[0],
online_ids[0])
statistic = flow_statistic(
result, FLAGS.secs_interval, FLAGS.do_entrance_counting,
video_fps, entrance, id_set, interval_id_set, in_id_list,
out_id_list, prev_center, records, data_type, num_classes)
id_set = statistic['id_set']
interval_id_set = statistic['interval_id_set']
in_id_list = statistic['in_id_list']
out_id_list = statistic['out_id_list']
prev_center = statistic['prev_center']
records = statistic['records']
elif num_classes > 1 and FLAGS.do_entrance_counting:
raise NotImplementedError(
'Multi-class flow counting is not implemented now!')
im = plot_tracking_dict(
frame,
num_classes,
online_tlwhs,
online_ids,
online_scores,
frame_id=frame_id,
fps=fps,
ids2names=ids2names,
do_entrance_counting=FLAGS.do_entrance_counting,
entrance=entrance,
records=records,
center_traj=center_traj)
if FLAGS.save_images:
save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
if not os.path.exists(save_dir):
os.makedirs(save_dir)
cv2.imwrite(
os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im)
else:
writer.write(im)
frame_id += 1
print('detect frame: %d, fps: %f' % (frame_id, fps))
if camera_id != -1:
cv2.imshow('Tracking Detection', im)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if FLAGS.save_mot_txts:
result_filename = os.path.join(FLAGS.output_dir,
video_name.split('.')[-2] + '.txt')
write_mot_results(result_filename, results, data_type, num_classes)
if num_classes == 1:
result_filename = os.path.join(
FLAGS.output_dir,
video_name.split('.')[-2] + '_flow_statistic.txt')
f = open(result_filename, 'w')
for line in records:
f.write(line)
print('Flow statistic save in {}'.format(result_filename))
f.close()
if FLAGS.save_images:
save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(save_dir,
out_path)
os.system(cmd_str)
print('Save video in {}.'.format(out_path))
else:
writer.release()
def main():
pred_config = PredictConfig(FLAGS.model_dir)
detector = JDE_Detector(
pred_config,
FLAGS.model_dir,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn)
# predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1:
predict_video(detector, FLAGS.camera_id)
else:
# predict from image
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
predict_image(detector, img_list)
if not FLAGS.run_benchmark:
detector.det_times.info(average=True)
else:
mems = {
'cpu_rss_mb': detector.cpu_mem / len(img_list),
'gpu_rss_mb': detector.gpu_mem / len(img_list),
'gpu_util': detector.gpu_util * 100 / len(img_list)
}
perf_info = detector.det_times.report(average=True)
model_dir = FLAGS.model_dir
mode = FLAGS.run_mode
model_info = {
'model_name': model_dir.strip('/').split('/')[-1],
'precision': mode.split('_')[-1]
}
data_info = {
'batch_size': 1,
'shape': "dynamic_shape",
'data_num': perf_info['img_num']
}
det_log = PaddleInferBenchmark(detector.config, model_info,
data_info, perf_info, mems)
det_log('MOT')
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
main()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import time
import yaml
import cv2
import re
import numpy as np
from collections import defaultdict
import paddle
from paddle.inference import Config
from paddle.inference import create_predictor
from picodet_postprocess import PicoDetPostProcess
from utils import argsparser, Timer, get_current_memory_mb, _is_valid_video, video2frames
from det_infer import Detector, DetectorPicoDet, get_test_images, print_arguments, PredictConfig
from det_infer import load_predictor
from benchmark_utils import PaddleInferBenchmark
from visualize import plot_tracking
from mot.tracker import DeepSORTTracker
from mot.utils import MOTTimer, write_mot_results, flow_statistic, scale_coords, clip_box, preprocess_reid
from mot.mtmct.utils import parse_bias
from mot.mtmct.postprocess import trajectory_fusion, sub_cluster, gen_res, print_mtmct_result
from mot.mtmct.postprocess import get_mtmct_matching_results, save_mtmct_crops, save_mtmct_vis_results
# Global dictionary
MOT_SUPPORT_MODELS = {'DeepSORT'}
def bench_log(detector, img_list, model_info, batch_size=1, name=None):
mems = {
'cpu_rss_mb': detector.cpu_mem / len(img_list),
'gpu_rss_mb': detector.gpu_mem / len(img_list),
'gpu_util': detector.gpu_util * 100 / len(img_list)
}
perf_info = detector.det_times.report(average=True)
data_info = {
'batch_size': batch_size,
'shape': "dynamic_shape",
'data_num': perf_info['img_num']
}
log = PaddleInferBenchmark(detector.config, model_info, data_info,
perf_info, mems)
log(name)
class SDE_Detector(Detector):
"""
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
"""
def __init__(self,
pred_config,
model_dir,
device='CPU',
run_mode='fluid',
batch_size=1,
trt_min_shape=1,
trt_max_shape=1088,
trt_opt_shape=608,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
super(SDE_Detector, self).__init__(
pred_config=pred_config,
model_dir=model_dir,
device=device,
run_mode=run_mode,
batch_size=batch_size,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn)
assert batch_size == 1, "The JDE Detector only supports batch size=1 now"
self.pred_config = pred_config
def postprocess(self, boxes, ori_image_shape, threshold, scaled):
over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0]
if len(over_thres_idx) == 0:
pred_dets = np.zeros((1, 6), dtype=np.float32)
pred_xyxys = np.zeros((1, 4), dtype=np.float32)
return pred_dets, pred_xyxys
else:
boxes = boxes[over_thres_idx]
if not scaled:
# scaled means whether the coords after detector outputs
# have been scaled back to the original image, set True
# in general detector, set False in JDE YOLOv3.
pred_bboxes = scale_coords(boxes[:, 2:], input_shape, im_shape,
scale_factor)
else:
pred_bboxes = boxes[:, 2:]
pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape)
if len(keep_idx[0]) == 0:
pred_dets = np.zeros((1, 6), dtype=np.float32)
pred_xyxys = np.zeros((1, 4), dtype=np.float32)
return pred_dets, pred_xyxys
pred_scores = boxes[:, 1:2][keep_idx[0]]
pred_cls_ids = boxes[:, 0:1][keep_idx[0]]
pred_tlwhs = np.concatenate(
(pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
axis=1)
pred_dets = np.concatenate(
(pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
return pred_dets, pred_xyxys
def predict(self, image_path, ori_image_shape, scaled, threshold=0.5, warmup=0, repeats=1):
'''
Args:
image_path (list[str]): path of images, only support one image path
(batch_size=1) in tracking model
ori_image_shape (list[int]: original image shape
threshold (float): threshold of predicted box' score
scaled (bool): whether the coords after detector outputs are scaled,
default False in jde yolov3, set True in general detector.
Returns:
pred_dets (np.ndarray, [N, 6])
'''
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image_path)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])
for i in range(warmup):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
boxes = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.start()
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
boxes = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start()
if len(boxes) == 0:
pred_dets = np.zeros((1, 6), dtype=np.float32)
pred_xyxys = np.zeros((1, 4), dtype=np.float32)
else:
pred_dets, pred_xyxys = self.postprocess(
boxes, ori_image_shape, threshold, scaled)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1
return pred_dets, pred_xyxys
class SDE_DetectorPicoDet(DetectorPicoDet):
"""
Args:
pred_config (object): config of model, defined by `Config(model_dir)`
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
trt_calib_mode (bool): If the model is produced by TRT offline quantitative
calibration, trt_calib_mode need to set True
cpu_threads (int): cpu threads
enable_mkldnn (bool): whether to open MKLDNN
"""
def __init__(self,
pred_config,
model_dir,
device='CPU',
run_mode='fluid',
batch_size=1,
trt_min_shape=1,
trt_max_shape=1088,
trt_opt_shape=608,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
super(SDE_DetectorPicoDet, self).__init__(
pred_config=pred_config,
model_dir=model_dir,
device=device,
run_mode=run_mode,
batch_size=batch_size,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn)
assert batch_size == 1, "The JDE Detector only supports batch size=1 now"
self.pred_config = pred_config
def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor,
threshold):
over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0]
if len(over_thres_idx) == 0:
pred_dets = np.zeros((1, 6), dtype=np.float32)
pred_xyxys = np.zeros((1, 4), dtype=np.float32)
return pred_dets, pred_xyxys
else:
boxes = boxes[over_thres_idx]
pred_bboxes = boxes[:, 2:]
pred_xyxys, keep_idx = clip_box(pred_bboxes, input_shape, im_shape,
scale_factor)
if len(keep_idx[0]) == 0:
pred_dets = np.zeros((1, 6), dtype=np.float32)
pred_xyxys = np.zeros((1, 4), dtype=np.float32)
return pred_dets, pred_xyxys
pred_scores = boxes[:, 1:2][keep_idx[0]]
pred_cls_ids = boxes[:, 0:1][keep_idx[0]]
pred_tlwhs = np.concatenate(
(pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
axis=1)
pred_dets = np.concatenate(
(pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
return pred_dets, pred_xyxys
def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1):
'''
Args:
image (np.ndarray): image numpy data
threshold (float): threshold of predicted box' score
scaled (bool): whether the coords after detector outputs are scaled,
default False in jde yolov3, set True in general detector.
Returns:
pred_dets (np.ndarray, [N, 6])
'''
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(image)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])
np_score_list, np_boxes_list = [], []
for i in range(warmup):
self.predictor.run()
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
boxes = boxes_tensor.copy_to_cpu()
self.det_times.inference_time_s.start()
for i in range(repeats):
self.predictor.run()
np_score_list.clear()
np_boxes_list.clear()
output_names = self.predictor.get_output_names()
num_outs = int(len(output_names) / 2)
for out_idx in range(num_outs):
np_score_list.append(
self.predictor.get_output_handle(output_names[out_idx])
.copy_to_cpu())
np_boxes_list.append(
self.predictor.get_output_handle(output_names[
out_idx + num_outs]).copy_to_cpu())
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.img_num += 1
self.det_times.postprocess_time_s.start()
self.postprocess = PicoDetPostProcess(
inputs['image'].shape[2:],
inputs['im_shape'],
inputs['scale_factor'],
strides=self.pred_config.fpn_stride,
nms_threshold=self.pred_config.nms['nms_threshold'])
boxes, boxes_num = self.postprocess(np_score_list, np_boxes_list)
if len(boxes) == 0:
pred_dets = np.zeros((1, 6), dtype=np.float32)
pred_xyxys = np.zeros((1, 4), dtype=np.float32)
else:
input_shape = inputs['image'].shape[2:]
im_shape = inputs['im_shape']
scale_factor = inputs['scale_factor']
pred_dets, pred_xyxys = self.postprocess_bboxes(
boxes, input_shape, im_shape, scale_factor, threshold)
return pred_dets, pred_xyxys
class SDE_ReID(object):
def __init__(self,
pred_config,
model_dir,
device='CPU',
run_mode='fluid',
batch_size=50,
trt_min_shape=1,
trt_max_shape=1088,
trt_opt_shape=608,
trt_calib_mode=False,
cpu_threads=1,
enable_mkldnn=False):
self.pred_config = pred_config
self.predictor, self.config = load_predictor(
model_dir,
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
device=device,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
trt_calib_mode=trt_calib_mode,
cpu_threads=cpu_threads,
enable_mkldnn=enable_mkldnn)
self.det_times = Timer()
self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
self.batch_size = batch_size
assert pred_config.tracker, "Tracking model should have tracker"
pt = pred_config.tracker
max_age = pt['max_age'] if 'max_age' in pt else 30
max_iou_distance = pt[
'max_iou_distance'] if 'max_iou_distance' in pt else 0.7
self.tracker = DeepSORTTracker(
max_age=max_age, max_iou_distance=max_iou_distance)
def get_crops(self, xyxy, ori_img):
w, h = self.tracker.input_size
self.det_times.preprocess_time_s.start()
crops = []
xyxy = xyxy.astype(np.int64)
ori_img = ori_img.transpose(1, 0, 2) # [h,w,3]->[w,h,3]
for i, bbox in enumerate(xyxy):
crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :]
crops.append(crop)
crops = preprocess_reid(crops, w, h)
self.det_times.preprocess_time_s.end()
return crops
def preprocess(self, crops):
# to keep fast speed, only use topk crops
crops = crops[:self.batch_size]
inputs = {}
inputs['crops'] = np.array(crops).astype('float32')
return inputs
def postprocess(self, pred_dets, pred_embs):
tracker = self.tracker
tracker.predict()
online_targets = tracker.update(pred_dets, pred_embs)
online_tlwhs, online_scores, online_ids = [], [], []
for t in online_targets:
if not t.is_confirmed() or t.time_since_update > 1:
continue
tlwh = t.to_tlwh()
tscore = t.score
tid = t.track_id
if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
3] > tracker.vertical_ratio:
continue
online_tlwhs.append(tlwh)
online_scores.append(tscore)
online_ids.append(tid)
tracking_outs = {
'online_tlwhs': online_tlwhs,
'online_scores': online_scores,
'online_ids': online_ids,
}
return tracking_outs
def postprocess_mtmct(self, pred_dets, pred_embs, frame_id, seq_name):
tracker = self.tracker
tracker.predict()
online_targets = tracker.update(pred_dets, pred_embs)
online_tlwhs, online_scores, online_ids = [], [], []
online_tlbrs, online_feats = [], []
for t in online_targets:
if not t.is_confirmed() or t.time_since_update > 1:
continue
tlwh = t.to_tlwh()
tscore = t.score
tid = t.track_id
if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
3] > tracker.vertical_ratio:
continue
online_tlwhs.append(tlwh)
online_scores.append(tscore)
online_ids.append(tid)
online_tlbrs.append(t.to_tlbr())
online_feats.append(t.feat)
tracking_outs = {
'online_tlwhs': online_tlwhs,
'online_scores': online_scores,
'online_ids': online_ids,
'feat_data': {},
}
for _tlbr, _id, _feat in zip(online_tlbrs, online_ids, online_feats):
feat_data = {}
feat_data['bbox'] = _tlbr
feat_data['frame'] = f"{frame_id:06d}"
feat_data['id'] = _id
_imgname = f'{seq_name}_{_id}_{frame_id}.jpg'
feat_data['imgname'] = _imgname
feat_data['feat'] = _feat
tracking_outs['feat_data'].update({_imgname: feat_data})
return tracking_outs
def predict(self,
crops,
pred_dets,
warmup=0,
repeats=1,
MTMCT=False,
frame_id=0,
seq_name=''):
self.det_times.preprocess_time_s.start()
inputs = self.preprocess(crops)
self.det_times.preprocess_time_s.end()
input_names = self.predictor.get_input_names()
for i in range(len(input_names)):
input_tensor = self.predictor.get_input_handle(input_names[i])
input_tensor.copy_from_cpu(inputs[input_names[i]])
for i in range(warmup):
self.predictor.run()
output_names = self.predictor.get_output_names()
feature_tensor = self.predictor.get_output_handle(output_names[0])
pred_embs = feature_tensor.copy_to_cpu()
self.det_times.inference_time_s.start()
for i in range(repeats):
self.predictor.run()
output_names = self.predictor.get_output_names()
feature_tensor = self.predictor.get_output_handle(output_names[0])
pred_embs = feature_tensor.copy_to_cpu()
self.det_times.inference_time_s.end(repeats=repeats)
self.det_times.postprocess_time_s.start()
if MTMCT == False:
tracking_outs = self.postprocess(pred_dets, pred_embs)
else:
tracking_outs = self.postprocess_mtmct(pred_dets, pred_embs,
frame_id, seq_name)
self.det_times.postprocess_time_s.end()
self.det_times.img_num += 1
return tracking_outs
def predict_image(detector, reid_model, image_list):
image_list.sort()
for i, img_file in enumerate(image_list):
frame = cv2.imread(img_file)
if FLAGS.run_benchmark:
pred_dets, pred_xyxys = detector.predict(
[frame], FLAGS.scaled, FLAGS.threshold, warmup=10, repeats=10)
cm, gm, gu = get_current_memory_mb()
detector.cpu_mem += cm
detector.gpu_mem += gm
detector.gpu_util += gu
print('Test iter {}, file name:{}'.format(i, img_file))
else:
pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled,
FLAGS.threshold)
if len(pred_dets) == 1 and np.sum(pred_dets) == 0:
print('Frame {} has no object, try to modify score threshold.'.
format(i))
online_im = frame
else:
# reid process
crops = reid_model.get_crops(pred_xyxys, frame)
if FLAGS.run_benchmark:
tracking_outs = reid_model.predict(
crops, pred_dets, warmup=10, repeats=10)
else:
tracking_outs = reid_model.predict(crops, pred_dets)
online_tlwhs = tracking_outs['online_tlwhs']
online_scores = tracking_outs['online_scores']
online_ids = tracking_outs['online_ids']
online_im = plot_tracking(
frame, online_tlwhs, online_ids, online_scores, frame_id=i)
if FLAGS.save_images:
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
img_name = os.path.split(img_file)[-1]
out_path = os.path.join(FLAGS.output_dir, img_name)
cv2.imwrite(out_path, online_im)
print("save result to: " + out_path)
def predict_video(detector, reid_model, camera_id):
if camera_id != -1:
capture = cv2.VideoCapture(camera_id)
video_name = 'mot_output.mp4'
else:
capture = cv2.VideoCapture(FLAGS.video_file)
video_name = os.path.split(FLAGS.video_file)[-1]
# Get Video info : resolution, fps, frame count
width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = int(capture.get(cv2.CAP_PROP_FPS))
frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
print("fps: %d, frame_count: %d" % (fps, frame_count))
if not os.path.exists(FLAGS.output_dir):
os.makedirs(FLAGS.output_dir)
out_path = os.path.join(FLAGS.output_dir, video_name)
if not FLAGS.save_images:
video_format = 'mp4v'
fourcc = cv2.VideoWriter_fourcc(*video_format)
writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
frame_id = 0
timer = MOTTimer()
results = defaultdict(list)
id_set = set()
interval_id_set = set()
in_id_list = list()
out_id_list = list()
prev_center = dict()
records = list()
entrance = [0, height / 2., width, height / 2.]
video_fps = fps
while (1):
ret, frame = capture.read()
if not ret:
break
timer.tic()
pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled,
FLAGS.threshold)
if len(pred_dets) == 1 and np.sum(pred_dets) == 0:
print('Frame {} has no object, try to modify score threshold.'.
format(frame_id))
timer.toc()
im = frame
else:
# reid process
crops = reid_model.get_crops(pred_xyxys, frame)
tracking_outs = reid_model.predict(crops, pred_dets)
online_tlwhs = tracking_outs['online_tlwhs']
online_scores = tracking_outs['online_scores']
online_ids = tracking_outs['online_ids']
results[0].append(
(frame_id + 1, online_tlwhs, online_scores, online_ids))
# NOTE: just implement flow statistic for one class
result = (frame_id + 1, online_tlwhs, online_scores, online_ids)
statistic = flow_statistic(
result, FLAGS.secs_interval, FLAGS.do_entrance_counting,
video_fps, entrance, id_set, interval_id_set, in_id_list,
out_id_list, prev_center, records)
id_set = statistic['id_set']
interval_id_set = statistic['interval_id_set']
in_id_list = statistic['in_id_list']
out_id_list = statistic['out_id_list']
prev_center = statistic['prev_center']
records = statistic['records']
timer.toc()
fps = 1. / timer.duration
im = plot_tracking(
frame,
online_tlwhs,
online_ids,
online_scores,
frame_id=frame_id,
fps=fps,
do_entrance_counting=FLAGS.do_entrance_counting,
entrance=entrance)
if FLAGS.save_images:
save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
if not os.path.exists(save_dir):
os.makedirs(save_dir)
cv2.imwrite(
os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im)
else:
writer.write(im)
frame_id += 1
print('detect frame:%d, fps: %f' % (frame_id, fps))
if camera_id != -1:
cv2.imshow('Tracking Detection', im)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
if FLAGS.save_mot_txts:
result_filename = os.path.join(FLAGS.output_dir,
video_name.split('.')[-2] + '.txt')
write_mot_results(result_filename, results)
result_filename = os.path.join(
FLAGS.output_dir, video_name.split('.')[-2] + '_flow_statistic.txt')
f = open(result_filename, 'w')
for line in records:
f.write(line)
print('Flow statistic save in {}'.format(result_filename))
f.close()
if FLAGS.save_images:
save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(save_dir,
out_path)
os.system(cmd_str)
print('Save video in {}.'.format(out_path))
else:
writer.release()
def predict_mtmct_seq(detector, reid_model, seq_name, output_dir):
fpath = os.path.join(FLAGS.mtmct_dir, seq_name)
if os.path.exists(os.path.join(fpath, 'img1')):
fpath = os.path.join(fpath, 'img1')
assert os.path.isdir(fpath), '{} should be a directory'.format(fpath)
image_list = os.listdir(fpath)
image_list.sort()
assert len(image_list) > 0, '{} has no images.'.format(fpath)
results = defaultdict(list)
mot_features_dict = {} # cid_tid_fid feats
print('Totally {} frames found in seq {}.'.format(len(image_list), seq_name))
for frame_id, img_file in enumerate(image_list):
if frame_id % 40 == 0:
print('Processing frame {} of seq {}.'.format(frame_id, seq_name))
frame = cv2.imread(os.path.join(fpath, img_file))
ori_image_shape = list(frame.shape[:2])
frame_path = os.path.join(fpath, img_file)
pred_dets, pred_xyxys = detector.predict([frame_path], ori_image_shape, FLAGS.scaled,
FLAGS.threshold)
if len(pred_dets) == 1 and np.sum(pred_dets) == 0:
print('Frame {} has no object, try to modify score threshold.'.
format(frame_id))
online_im = frame
else:
# reid process
crops = reid_model.get_crops(pred_xyxys, frame)
tracking_outs = reid_model.predict(
crops,
pred_dets,
MTMCT=True,
frame_id=frame_id,
seq_name=seq_name)
feat_data_dict = tracking_outs['feat_data']
mot_features_dict = dict(mot_features_dict, **feat_data_dict)
online_tlwhs = tracking_outs['online_tlwhs']
online_scores = tracking_outs['online_scores']
online_ids = tracking_outs['online_ids']
online_im = plot_tracking(frame, online_tlwhs, online_ids,
online_scores, frame_id)
results[0].append(
(frame_id + 1, online_tlwhs, online_scores, online_ids))
if FLAGS.save_images:
save_dir = os.path.join(output_dir, seq_name)
if not os.path.exists(save_dir): os.makedirs(save_dir)
img_name = os.path.split(img_file)[-1]
out_path = os.path.join(save_dir, img_name)
cv2.imwrite(out_path, online_im)
if FLAGS.save_mot_txts:
result_filename = os.path.join(output_dir, seq_name + '.txt')
write_mot_results(result_filename, results)
return mot_features_dict
def predict_mtmct(detector, reid_model, mtmct_dir, mtmct_cfg):
MTMCT = mtmct_cfg['MTMCT']
assert MTMCT == True, 'predict_mtmct should be used for MTMCT.'
cameras_bias = mtmct_cfg['cameras_bias']
cid_bias = parse_bias(cameras_bias)
scene_cluster = list(cid_bias.keys())
# 1.zone releated parameters
use_zone = mtmct_cfg['use_zone']
zone_path = mtmct_cfg['zone_path']
# 2.tricks parameters, can be used for other mtmct dataset
use_ff = mtmct_cfg['use_ff']
use_rerank = mtmct_cfg['use_rerank']
# 3.camera releated parameters
use_camera = mtmct_cfg['use_camera']
use_st_filter = mtmct_cfg['use_st_filter']
# 4.zone releated parameters
use_roi = mtmct_cfg['use_roi']
roi_dir = mtmct_cfg['roi_dir']
mot_list_breaks = []
cid_tid_dict = dict()
output_dir = FLAGS.output_dir
if not os.path.exists(output_dir): os.makedirs(output_dir)
seqs = os.listdir(mtmct_dir)
seqs.sort()
for seq in seqs:
fpath = os.path.join(mtmct_dir, seq)
if os.path.isfile(fpath) and _is_valid_video(fpath):
ext = seq.split('.')[-1]
seq = seq.split('.')[-2]
print('ffmpeg processing of video {}'.format(fpath))
frames_path = video2frames(video_path=fpath, outpath=mtmct_dir, frame_rate=25)
fpath = os.path.join(mtmct_dir, seq)
if os.path.isdir(fpath) == False:
print('{} is not a image folder.'.format(fpath))
continue
mot_features_dict = predict_mtmct_seq(detector, reid_model,
seq, output_dir)
cid = int(re.sub('[a-z,A-Z]', "", seq))
tid_data, mot_list_break = trajectory_fusion(
mot_features_dict,
cid,
cid_bias,
use_zone=use_zone,
zone_path=zone_path)
mot_list_breaks.append(mot_list_break)
# single seq process
for line in tid_data:
tracklet = tid_data[line]
tid = tracklet['tid']
if (cid, tid) not in cid_tid_dict:
cid_tid_dict[(cid, tid)] = tracklet
map_tid = sub_cluster(
cid_tid_dict,
scene_cluster,
use_ff=use_ff,
use_rerank=use_rerank,
use_camera=use_camera,
use_st_filter=use_st_filter)
pred_mtmct_file = os.path.join(output_dir, 'mtmct_result.txt')
if use_camera:
gen_res(pred_mtmct_file, scene_cluster, map_tid, mot_list_breaks)
else:
gen_res(
pred_mtmct_file,
scene_cluster,
map_tid,
mot_list_breaks,
use_roi=use_roi,
roi_dir=roi_dir)
if FLAGS.save_images:
carame_results, cid_tid_fid_res = get_mtmct_matching_results(
pred_mtmct_file)
crops_dir = os.path.join(output_dir, 'mtmct_crops')
save_mtmct_crops(
cid_tid_fid_res, images_dir=mtmct_dir, crops_dir=crops_dir)
save_dir = os.path.join(output_dir, 'mtmct_vis')
save_mtmct_vis_results(
carame_results,
images_dir=mtmct_dir,
save_dir=save_dir,
save_videos=FLAGS.save_images)
# evalution metrics
data_root_gt = os.path.join(mtmct_dir, '..', 'gt', 'gt.txt')
if os.path.exists(data_root_gt):
print_mtmct_result(data_root_gt, pred_mtmct_file)
def main():
pred_config = PredictConfig(FLAGS.model_dir)
detector_func = 'SDE_Detector'
if pred_config.arch == 'PicoDet':
detector_func = 'SDE_DetectorPicoDet'
detector = eval(detector_func)(pred_config,
FLAGS.model_dir,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn)
pred_config = PredictConfig(FLAGS.reid_model_dir)
reid_model = SDE_ReID(
pred_config,
FLAGS.reid_model_dir,
device=FLAGS.device,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.reid_batch_size,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
trt_calib_mode=FLAGS.trt_calib_mode,
cpu_threads=FLAGS.cpu_threads,
enable_mkldnn=FLAGS.enable_mkldnn)
# predict from video file or camera video stream
if FLAGS.video_file is not None or FLAGS.camera_id != -1:
predict_video(detector, reid_model, FLAGS.camera_id)
elif FLAGS.mtmct_dir is not None:
mtmct_cfg_file = FLAGS.mtmct_cfg
with open(mtmct_cfg_file) as f:
mtmct_cfg = yaml.safe_load(f)
predict_mtmct(detector, reid_model, FLAGS.mtmct_dir, mtmct_cfg)
else:
# predict from image
img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
predict_image(detector, reid_model, img_list)
if not FLAGS.run_benchmark:
detector.det_times.info(average=True)
reid_model.det_times.info(average=True)
else:
mode = FLAGS.run_mode
det_model_dir = FLAGS.model_dir
det_model_info = {
'model_name': det_model_dir.strip('/').split('/')[-1],
'precision': mode.split('_')[-1]
}
bench_log(detector, img_list, det_model_info, name='Det')
reid_model_dir = FLAGS.reid_model_dir
reid_model_info = {
'model_name': reid_model_dir.strip('/').split('/')[-1],
'precision': mode.split('_')[-1]
}
bench_log(reid_model, img_list, reid_model_info, name='ReID')
if __name__ == '__main__':
paddle.enable_static()
parser = argsparser()
FLAGS = parser.parse_args()
print_arguments(FLAGS)
FLAGS.device = FLAGS.device.upper()
assert FLAGS.device in ['CPU', 'GPU', 'XPU'
], "device should be CPU, GPU or XPU"
main()
# config for MTMCT
MTMCT: True
cameras_bias:
c041: 0
c042: 0
# 1.zone releated parameters
use_zone: True
zone_path: dataset/mot/aic21mtmct_vehicle/S06/zone
# 2.tricks parameters, can be used for other mtmct dataset
use_ff: True
use_rerank: True
# 3.camera releated parameters
use_camera: True
use_st_filter: False
# 4.zone releated parameters
use_roi: True
roi_dir: dataset/mot/aic21mtmct_vehicle/S06
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from scipy.special import softmax
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
"""
Args:
box_scores (N, 5): boxes in corner-form and probabilities.
iou_threshold: intersection over union threshold.
top_k: keep top_k results. If k <= 0, keep all the results.
candidate_size: only consider the candidates with the highest scores.
Returns:
picked: a list of indexes of the kept boxes
"""
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked = []
indexes = np.argsort(scores)
indexes = indexes[-candidate_size:]
while len(indexes) > 0:
current = indexes[-1]
picked.append(current)
if 0 < top_k == len(picked) or len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[:-1]
rest_boxes = boxes[indexes, :]
iou = iou_of(
rest_boxes,
np.expand_dims(
current_box, axis=0), )
indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :]
def iou_of(boxes0, boxes1, eps=1e-5):
"""Return intersection-over-union (Jaccard index) of boxes.
Args:
boxes0 (N, 4): ground truth boxes.
boxes1 (N or 1, 4): predicted boxes.
eps: a small number to avoid 0 as denominator.
Returns:
iou (N): IoU values.
"""
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)
def area_of(left_top, right_bottom):
"""Compute the areas of rectangles given two corners.
Args:
left_top (N, 2): left top corner.
right_bottom (N, 2): right bottom corner.
Returns:
area (N): return the area.
"""
hw = np.clip(right_bottom - left_top, 0.0, None)
return hw[..., 0] * hw[..., 1]
class PicoDetPostProcess(object):
"""
Args:
input_shape (int): network input image size
ori_shape (int): ori image shape of before padding
scale_factor (float): scale factor of ori image
enable_mkldnn (bool): whether to open MKLDNN
"""
def __init__(self,
input_shape,
ori_shape,
scale_factor,
strides=[8, 16, 32, 64],
score_threshold=0.4,
nms_threshold=0.5,
nms_top_k=1000,
keep_top_k=100):
self.ori_shape = ori_shape
self.input_shape = input_shape
self.scale_factor = scale_factor
self.strides = strides
self.score_threshold = score_threshold
self.nms_threshold = nms_threshold
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
def warp_boxes(self, boxes, ori_shape):
"""Apply transform to boxes
"""
width, height = ori_shape[1], ori_shape[0]
n = len(boxes)
if n:
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
n * 4, 2) # x1y1, x2y2, x1y2, x2y1
# xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate(
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip boxes
xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
return xy.astype(np.float32)
else:
return boxes
def __call__(self, scores, raw_boxes):
batch_size = raw_boxes[0].shape[0]
reg_max = int(raw_boxes[0].shape[-1] / 4 - 1)
out_boxes_num = []
out_boxes_list = []
for batch_id in range(batch_size):
# generate centers
decode_boxes = []
select_scores = []
for stride, box_distribute, score in zip(self.strides, raw_boxes,
scores):
box_distribute = box_distribute[batch_id]
score = score[batch_id]
# centers
fm_h = self.input_shape[0] / stride
fm_w = self.input_shape[1] / stride
h_range = np.arange(fm_h)
w_range = np.arange(fm_w)
ww, hh = np.meshgrid(w_range, h_range)
ct_row = (hh.flatten() + 0.5) * stride
ct_col = (ww.flatten() + 0.5) * stride
center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)
# box distribution to distance
reg_range = np.arange(reg_max + 1)
box_distance = box_distribute.reshape((-1, reg_max + 1))
box_distance = softmax(box_distance, axis=1)
box_distance = box_distance * np.expand_dims(reg_range, axis=0)
box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
box_distance = box_distance * stride
# top K candidate
topk_idx = np.argsort(score.max(axis=1))[::-1]
topk_idx = topk_idx[:self.nms_top_k]
center = center[topk_idx]
score = score[topk_idx]
box_distance = box_distance[topk_idx]
# decode box
decode_box = center + [-1, -1, 1, 1] * box_distance
select_scores.append(score)
decode_boxes.append(decode_box)
# nms
bboxes = np.concatenate(decode_boxes, axis=0)
confidences = np.concatenate(select_scores, axis=0)
picked_box_probs = []
picked_labels = []
for class_index in range(0, confidences.shape[1]):
probs = confidences[:, class_index]
mask = probs > self.score_threshold
probs = probs[mask]
if probs.shape[0] == 0:
continue
subset_boxes = bboxes[mask, :]
box_probs = np.concatenate(
[subset_boxes, probs.reshape(-1, 1)], axis=1)
box_probs = hard_nms(
box_probs,
iou_threshold=self.nms_threshold,
top_k=self.keep_top_k, )
picked_box_probs.append(box_probs)
picked_labels.extend([class_index] * box_probs.shape[0])
if len(picked_box_probs) == 0:
out_boxes_list.append(np.empty((0, 4)))
out_boxes_num.append(0)
else:
picked_box_probs = np.concatenate(picked_box_probs)
# resize output boxes
picked_box_probs[:, :4] = self.warp_boxes(
picked_box_probs[:, :4], self.ori_shape[batch_id])
im_scale = np.concatenate([
self.scale_factor[batch_id][::-1],
self.scale_factor[batch_id][::-1]
])
picked_box_probs[:, :4] /= im_scale
# clas score box
out_boxes_list.append(
np.concatenate(
[
np.expand_dims(
np.array(picked_labels),
axis=-1), np.expand_dims(
picked_box_probs[:, 4], axis=-1),
picked_box_probs[:, :4]
],
axis=1))
out_boxes_num.append(len(picked_labels))
out_boxes_list = np.concatenate(out_boxes_list, axis=0)
out_boxes_num = np.asarray(out_boxes_num).astype(np.int32)
return out_boxes_list, out_boxes_num
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cv2
import numpy as np
def decode_image(im_file, im_info):
"""read rgb image
Args:
im_file (str|np.ndarray): input can be image path or np.ndarray
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
if isinstance(im_file, str):
with open(im_file, 'rb') as f:
im_read = f.read()
data = np.frombuffer(im_read, dtype='uint8')
im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
else:
im = im_file
im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
return im, im_info
class Resize(object):
"""resize image by target_size and max_size
Args:
target_size (int): the target size of image
keep_ratio (bool): whether keep_ratio or not, default true
interp (int): method of resize
"""
def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
if isinstance(target_size, int):
target_size = [target_size, target_size]
self.target_size = target_size
self.keep_ratio = keep_ratio
self.interp = interp
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
im_channel = im.shape[2]
im_scale_y, im_scale_x = self.generate_scale(im)
im = cv2.resize(
im,
None,
None,
fx=im_scale_x,
fy=im_scale_y,
interpolation=self.interp)
im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
im_info['scale_factor'] = np.array(
[im_scale_y, im_scale_x]).astype('float32')
return im, im_info
def generate_scale(self, im):
"""
Args:
im (np.ndarray): image (np.ndarray)
Returns:
im_scale_x: the resize ratio of X
im_scale_y: the resize ratio of Y
"""
origin_shape = im.shape[:2]
im_c = im.shape[2]
if self.keep_ratio:
im_size_min = np.min(origin_shape)
im_size_max = np.max(origin_shape)
target_size_min = np.min(self.target_size)
target_size_max = np.max(self.target_size)
im_scale = float(target_size_min) / float(im_size_min)
if np.round(im_scale * im_size_max) > target_size_max:
im_scale = float(target_size_max) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
else:
resize_h, resize_w = self.target_size
im_scale_y = resize_h / float(origin_shape[0])
im_scale_x = resize_w / float(origin_shape[1])
return im_scale_y, im_scale_x
class NormalizeImage(object):
"""normalize image
Args:
mean (list): im - mean
std (list): im / std
is_scale (bool): whether need im / 255
is_channel_first (bool): if True: image shape is CHW, else: HWC
"""
def __init__(self, mean, std, is_scale=True):
self.mean = mean
self.std = std
self.is_scale = is_scale
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
im = im.astype(np.float32, copy=False)
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.is_scale:
im = im / 255.0
im -= mean
im /= std
return im, im_info
class Permute(object):
"""permute image
Args:
to_bgr (bool): whether convert RGB to BGR
channel_first (bool): whether convert HWC to CHW
"""
def __init__(self, ):
super(Permute, self).__init__()
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
im = im.transpose((2, 0, 1)).copy()
return im, im_info
class PadStride(object):
""" padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
Args:
stride (bool): model with FPN need image shape % stride == 0
"""
def __init__(self, stride=0):
self.coarsest_stride = stride
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
coarsest_stride = self.coarsest_stride
if coarsest_stride <= 0:
return im, im_info
im_c, im_h, im_w = im.shape
pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
padding_im[:, :im_h, :im_w] = im
return padding_im, im_info
class LetterBoxResize(object):
def __init__(self, target_size):
"""
Resize image to target size, convert normalized xywh to pixel xyxy
format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
Args:
target_size (int|list): image target size.
"""
super(LetterBoxResize, self).__init__()
if isinstance(target_size, int):
target_size = [target_size, target_size]
self.target_size = target_size
def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
# letterbox: resize a rectangular image to a padded rectangular
shape = img.shape[:2] # [height, width]
ratio_h = float(height) / shape[0]
ratio_w = float(width) / shape[1]
ratio = min(ratio_h, ratio_w)
new_shape = (round(shape[1] * ratio),
round(shape[0] * ratio)) # [width, height]
padw = (width - new_shape[0]) / 2
padh = (height - new_shape[1]) / 2
top, bottom = round(padh - 0.1), round(padh + 0.1)
left, right = round(padw - 0.1), round(padw + 0.1)
img = cv2.resize(
img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
img = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=color) # padded rectangular
return img, ratio, padw, padh
def __call__(self, im, im_info):
"""
Args:
im (np.ndarray): image (np.ndarray)
im_info (dict): info of image
Returns:
im (np.ndarray): processed image (np.ndarray)
im_info (dict): info of processed image
"""
assert len(self.target_size) == 2
assert self.target_size[0] > 0 and self.target_size[1] > 0
height, width = self.target_size
h, w = im.shape[:2]
im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
new_shape = [round(h * ratio), round(w * ratio)]
im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
return im, im_info
def preprocess(im, preprocess_ops):
# process image by preprocess_ops
im_info = {
'scale_factor': np.array(
[1., 1.], dtype=np.float32),
'im_shape': None,
}
im, im_info = decode_image(im, im_info)
for operator in preprocess_ops:
im, im_info = operator(im, im_info)
return im, im_info
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import os
import sys
import ast
import argparse
def argsparser():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--model_dir",
type=str,
default=None,
help=("Directory include:'model.pdiparams', 'model.pdmodel', "
"'infer_cfg.yml', created by tools/export_model.py."),
required=True)
parser.add_argument(
"--image_file", type=str, default=None, help="Path of image file.")
parser.add_argument(
"--image_dir",
type=str,
default=None,
help="Dir of image file, `image_file` has a higher priority.")
parser.add_argument(
"--batch_size", type=int, default=1, help="batch_size for inference.")
parser.add_argument(
"--video_file",
type=str,
default=None,
help="Path of video file, `video_file` or `camera_id` has a highest priority."
)
parser.add_argument(
"--camera_id",
type=int,
default=-1,
help="device id of camera to predict.")
parser.add_argument(
"--threshold", type=float, default=0.5, help="Threshold of score.")
parser.add_argument(
"--output_dir",
type=str,
default="output",
help="Directory of output visualization files.")
parser.add_argument(
"--run_mode",
type=str,
default='fluid',
help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
parser.add_argument(
"--device",
type=str,
default='cpu',
help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
)
parser.add_argument(
"--run_benchmark",
type=ast.literal_eval,
default=False,
help="Whether to predict a image_file repeatedly for benchmark")
parser.add_argument(
"--enable_mkldnn",
type=ast.literal_eval,
default=False,
help="Whether use mkldnn with CPU.")
parser.add_argument(
"--cpu_threads", type=int, default=1, help="Num of threads with CPU.")
parser.add_argument(
"--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.")
parser.add_argument(
"--trt_max_shape",
type=int,
default=1280,
help="max_shape for TensorRT.")
parser.add_argument(
"--trt_opt_shape",
type=int,
default=640,
help="opt_shape for TensorRT.")
parser.add_argument(
"--trt_calib_mode",
type=bool,
default=False,
help="If the model is produced by TRT offline quantitative "
"calibration, trt_calib_mode need to set True.")
parser.add_argument(
'--save_images',
action='store_true',
help='Save visualization image results.')
parser.add_argument(
'--save_mot_txts',
action='store_true',
help='Save tracking results (txt).')
parser.add_argument(
'--scaled',
type=bool,
default=False,
help="Whether coords after detector outputs are scaled, False in JDE YOLOv3 "
"True in general detector.")
parser.add_argument(
"--reid_model_dir",
type=str,
default=None,
help=("Directory include:'model.pdiparams', 'model.pdmodel', "
"'infer_cfg.yml', created by tools/export_model.py."))
parser.add_argument(
"--reid_batch_size",
type=int,
default=50,
help="max batch_size for reid model inference.")
parser.add_argument(
"--do_entrance_counting",
action='store_true',
help="Whether counting the numbers of identifiers entering "
"or getting out from the entrance. Note that only support one-class"
"counting, multi-class counting is coming soon.")
parser.add_argument(
"--secs_interval",
type=int,
default=2,
help="The seconds interval to count after tracking")
parser.add_argument(
"--draw_center_traj",
action='store_true',
help="Whether drawing the trajectory of center")
parser.add_argument(
"--mtmct_dir",
type=str,
default=None,
help="The MTMCT scene video folder.")
parser.add_argument(
"--mtmct_cfg", type=str, default=None, help="The MTMCT config.")
return parser
class Times(object):
def __init__(self):
self.time = 0.
# start time
self.st = 0.
# end time
self.et = 0.
def start(self):
self.st = time.time()
def end(self, repeats=1, accumulative=True):
self.et = time.time()
if accumulative:
self.time += (self.et - self.st) / repeats
else:
self.time = (self.et - self.st) / repeats
def reset(self):
self.time = 0.
self.st = 0.
self.et = 0.
def value(self):
return round(self.time, 4)
class Timer(Times):
def __init__(self):
super(Timer, self).__init__()
self.preprocess_time_s = Times()
self.inference_time_s = Times()
self.postprocess_time_s = Times()
self.img_num = 0
def info(self, average=False):
total_time = self.preprocess_time_s.value(
) + self.inference_time_s.value() + self.postprocess_time_s.value()
total_time = round(total_time, 4)
print("------------------ Inference Time Info ----------------------")
print("total_time(ms): {}, img_num: {}".format(total_time * 1000,
self.img_num))
preprocess_time = round(
self.preprocess_time_s.value() / max(1, self.img_num),
4) if average else self.preprocess_time_s.value()
postprocess_time = round(
self.postprocess_time_s.value() / max(1, self.img_num),
4) if average else self.postprocess_time_s.value()
inference_time = round(self.inference_time_s.value() /
max(1, self.img_num),
4) if average else self.inference_time_s.value()
average_latency = total_time / max(1, self.img_num)
qps = 0
if total_time > 0:
qps = 1 / average_latency
print("average latency time(ms): {:.2f}, QPS: {:2f}".format(
average_latency * 1000, qps))
print(
"preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}".
format(preprocess_time * 1000, inference_time * 1000,
postprocess_time * 1000))
def report(self, average=False):
dic = {}
dic['preprocess_time_s'] = round(
self.preprocess_time_s.value() / max(1, self.img_num),
4) if average else self.preprocess_time_s.value()
dic['postprocess_time_s'] = round(
self.postprocess_time_s.value() / max(1, self.img_num),
4) if average else self.postprocess_time_s.value()
dic['inference_time_s'] = round(
self.inference_time_s.value() / max(1, self.img_num),
4) if average else self.inference_time_s.value()
dic['img_num'] = self.img_num
total_time = self.preprocess_time_s.value(
) + self.inference_time_s.value() + self.postprocess_time_s.value()
dic['total_time_s'] = round(total_time, 4)
return dic
def get_current_memory_mb():
"""
It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
And this function Current program is time-consuming.
"""
import pynvml
import psutil
import GPUtil
gpu_id = int(os.environ.get('CUDA_VISIBLE_DEVICES', 0))
pid = os.getpid()
p = psutil.Process(pid)
info = p.memory_full_info()
cpu_mem = info.uss / 1024. / 1024.
gpu_mem = 0
gpu_percent = 0
gpus = GPUtil.getGPUs()
if gpu_id is not None and len(gpus) > 0:
gpu_percent = gpus[gpu_id].load
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
gpu_mem = meminfo.used / 1024. / 1024.
return round(cpu_mem, 4), round(gpu_mem, 4), round(gpu_percent, 4)
def video2frames(video_path, outpath, frame_rate=25, **kargs):
def _dict2str(kargs):
cmd_str = ''
for k, v in kargs.items():
cmd_str += (' ' + str(k) + ' ' + str(v))
return cmd_str
ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
vid_name = os.path.basename(video_path).split('.')[0]
out_full_path = os.path.join(outpath, vid_name)
if not os.path.exists(out_full_path):
os.makedirs(out_full_path)
# video file name
outformat = os.path.join(out_full_path, '%05d.jpg')
cmd = ffmpeg
cmd = ffmpeg + [
' -i ', video_path, ' -r ', str(frame_rate), ' -f image2 ', outformat
]
cmd = ''.join(cmd) + _dict2str(kargs)
if os.system(cmd) != 0:
raise RuntimeError('ffmpeg process video: {} error'.format(video_path))
sys.exit(-1)
sys.stdout.flush()
return out_full_path
def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', '.flv')):
return f.lower().endswith(extensions)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
import os
import cv2
import numpy as np
from PIL import Image, ImageDraw
from collections import deque
def visualize_box_mask(im, results, labels, threshold=0.5):
"""
Args:
im (str/np.ndarray): path of image/np.ndarray read by cv2
results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max]
labels (list): labels:['class1', ..., 'classn']
threshold (float): Threshold of score.
Returns:
im (PIL.Image.Image): visualized image
"""
if isinstance(im, str):
im = Image.open(im).convert('RGB')
else:
im = Image.fromarray(im)
if 'boxes' in results and len(results['boxes']) > 0:
im = draw_box(im, results['boxes'], labels, threshold=threshold)
return im
def get_color_map_list(num_classes):
"""
Args:
num_classes (int): number of class
Returns:
color_map (list): RGB color list
"""
color_map = num_classes * [0, 0, 0]
for i in range(0, num_classes):
j = 0
lab = i
while lab:
color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
j += 1
lab >>= 3
color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
return color_map
def draw_box(im, np_boxes, labels, threshold=0.5):
"""
Args:
im (PIL.Image.Image): PIL image
np_boxes (np.ndarray): shape:[N,6], N: number of box,
matix element:[class, score, x_min, y_min, x_max, y_max]
labels (list): labels:['class1', ..., 'classn']
threshold (float): threshold of box
Returns:
im (PIL.Image.Image): visualized image
"""
draw_thickness = min(im.size) // 320
draw = ImageDraw.Draw(im)
clsid2color = {}
color_list = get_color_map_list(len(labels))
expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1)
np_boxes = np_boxes[expect_boxes, :]
for dt in np_boxes:
clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
if clsid not in clsid2color:
clsid2color[clsid] = color_list[clsid]
color = tuple(clsid2color[clsid])
if len(bbox) == 4:
xmin, ymin, xmax, ymax = bbox
print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
'right_bottom:[{:.2f},{:.2f}]'.format(
int(clsid), score, xmin, ymin, xmax, ymax))
# draw bbox
draw.line(
[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
(xmin, ymin)],
width=draw_thickness,
fill=color)
elif len(bbox) == 8:
x1, y1, x2, y2, x3, y3, x4, y4 = bbox
draw.line(
[(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
width=2,
fill=color)
xmin = min(x1, x2, x3, x4)
ymin = min(y1, y2, y3, y4)
# draw label
text = "{} {:.4f}".format(labels[clsid], score)
tw, th = draw.textsize(text)
draw.rectangle(
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
return im
def get_color(idx):
idx = idx * 3
color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
return color
def plot_tracking(image,
tlwhs,
obj_ids,
scores=None,
frame_id=0,
fps=0.,
ids2names=[],
do_entrance_counting=False,
entrance=None):
im = np.ascontiguousarray(np.copy(image))
im_h, im_w = im.shape[:2]
text_scale = max(1, image.shape[1] / 1600.)
text_thickness = 2
line_thickness = max(1, int(image.shape[1] / 500.))
cv2.putText(
im,
'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
(0, int(15 * text_scale)),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=2)
for i, tlwh in enumerate(tlwhs):
x1, y1, w, h = tlwh
intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
obj_id = int(obj_ids[i])
id_text = '{}'.format(int(obj_id))
if ids2names != []:
assert len(
ids2names) == 1, "plot_tracking only supports single classes."
id_text = '{}_'.format(ids2names[0]) + id_text
_line_thickness = 1 if obj_id <= 0 else line_thickness
color = get_color(abs(obj_id))
cv2.rectangle(
im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
cv2.putText(
im,
id_text, (intbox[0], intbox[1] - 10),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=text_thickness)
if scores is not None:
text = '{:.2f}'.format(float(scores[i]))
cv2.putText(
im,
text, (intbox[0], intbox[1] + 10),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 255, 255),
thickness=text_thickness)
if do_entrance_counting:
entrance_line = tuple(map(int, entrance))
cv2.rectangle(
im,
entrance_line[0:2],
entrance_line[2:4],
color=(0, 255, 255),
thickness=line_thickness)
return im
def plot_tracking_dict(image,
num_classes,
tlwhs_dict,
obj_ids_dict,
scores_dict,
frame_id=0,
fps=0.,
ids2names=[],
do_entrance_counting=False,
entrance=None,
records=None,
center_traj=None):
im = np.ascontiguousarray(np.copy(image))
im_h, im_w = im.shape[:2]
text_scale = max(1, image.shape[1] / 1600.)
text_thickness = 2
line_thickness = max(1, int(image.shape[1] / 500.))
if num_classes == 1:
start = records[-1].find('Total')
end = records[-1].find('In')
cv2.putText(
im,
records[-1][start:end], (0, int(40 * text_scale)),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=2)
if num_classes == 1 and do_entrance_counting:
entrance_line = tuple(map(int, entrance))
cv2.rectangle(
im,
entrance_line[0:2],
entrance_line[2:4],
color=(0, 255, 255),
thickness=line_thickness)
# find start location for entrance counting data
start = records[-1].find('In')
cv2.putText(
im,
records[-1][start:-1], (0, int(60 * text_scale)),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=2)
for cls_id in range(num_classes):
tlwhs = tlwhs_dict[cls_id]
obj_ids = obj_ids_dict[cls_id]
scores = scores_dict[cls_id]
cv2.putText(
im,
'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
(0, int(15 * text_scale)),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=2)
record_id = set()
for i, tlwh in enumerate(tlwhs):
x1, y1, w, h = tlwh
intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
center = tuple(map(int, (x1 + w / 2., y1 + h / 2.)))
obj_id = int(obj_ids[i])
if center_traj is not None:
record_id.add(obj_id)
if obj_id not in center_traj[cls_id]:
center_traj[cls_id][obj_id] = deque(maxlen=30)
center_traj[cls_id][obj_id].append(center)
id_text = '{}'.format(int(obj_id))
if ids2names != []:
id_text = '{}_{}'.format(ids2names[cls_id], id_text)
else:
id_text = 'class{}_{}'.format(cls_id, id_text)
_line_thickness = 1 if obj_id <= 0 else line_thickness
color = get_color(abs(obj_id))
cv2.rectangle(
im,
intbox[0:2],
intbox[2:4],
color=color,
thickness=line_thickness)
cv2.putText(
im,
id_text, (intbox[0], intbox[1] - 10),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 0, 255),
thickness=text_thickness)
if scores is not None:
text = '{:.2f}'.format(float(scores[i]))
cv2.putText(
im,
text, (intbox[0], intbox[1] + 10),
cv2.FONT_HERSHEY_PLAIN,
text_scale, (0, 255, 255),
thickness=text_thickness)
if center_traj is not None:
for traj in center_traj:
for i in traj.keys():
if i not in record_id:
continue
for point in traj[i]:
cv2.circle(im, point, 3, (0, 0, 255), -1)
return im
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册