add pptracking (#4661)

76d3274a · wangguanzhong · GitHub · 48761c4a · 76d3274a · 76d3274a
53 changed file
--- a/deploy/pptracking/README.md
+++ b/deploy/pptracking/README.md
+# 实时跟踪系统PP-Tracking
+
+PP-Tracking是基于飞桨深度学习框架的业界首个开源实时跟踪系统。针对实际业务的难点痛点，PP-Tracking内置行人车辆跟踪、跨镜头跟踪、多类别跟踪、小目标跟踪及流量计数等能力与产业应用，同时提供可视化开发界面。模型集成多目标跟踪，目标检测，ReID轻量级算法，进一步提升PP-Tracking在服务器端部署性能。同时支持python，C++部署，适配Linux，Nvidia Jetson多平台环境。
+
+<div width="1000" align="center">
+  <img src="../../docs/images/pptracking.png"/>
+</div>
+
+<div width="1000" align="center">
+  <img src="../../docs/images/pptracking-demo.gif"/>
+  <br>
+  视频来源：VisDrone2021, BDD100K开源数据集</div>
+</div>
+
+### 一、快速开始
+
+PP-Tracking提供了简洁的可视化界面，无需开发即可实现多种跟踪功能，可以参考[PP-Tracking可视化界面使用文档]()快速上手体验
+
+### 二、算法介绍
+
+PP-Tracking集成了多目标跟踪，目标检测，ReID轻量级算法，提升跟踪系统实时性能。多目标跟踪算法基于FairMOT进行优化，实现了服务器端轻量级模型，同时基于不同应用场景提供了针对性的预训练模型。
+
+模型训练评估方法请参考[多目标跟踪快速开始](../../configs/mot/README_cn.md#快速开始)
+
+PP-Tracking中提供的多场景预训练模型及导出模型列表如下：
+
+| 场景  | 数据集 | 精度（MOTA） | NX模型预测速度（FPS） | 配置文件 | 模型权重 | 预测部署模型 |
+| :---------:|:--------------- | :-------:  | :------: | :------: |:---: | :---: |
+| 行人跟踪 | MOT17 | 65.3 | 23.9 | [配置文件](../../configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.tar) |
+| 行人小目标跟踪 | VisDrone-pedestrian |  40.5 | 8.35 | [配置文件](../../configs/mot/pedestrian/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone_pedestrian.tar) |
+| 车辆跟踪 | BDD100k-vehicle | 32.6 | 24.3 | [配置文件](../../configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100kmot_vehicle.tar) |
+| 车辆小目标跟踪 | VisDrone-vehicle | 39.8 | 22.8 | [配置文件](../../configs/mot/vehicle/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone_vehicle.tar)
+| 多类别跟踪 | BDD100k |  - | 12.5 | [配置文件]() | [下载链接]() | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_bdd100k_mcmot.tar) |
+| 多类别小目标跟踪 | VisDrone |  20.4 | 6.74 | [配置文件](../../configs/mot/mcfairmot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.yml) | [下载链接](https://paddledet.bj.bcebos.com/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.pdparams) | [下载链接](https://bj.bcebos.com/v1/paddledet/models/mot/mcfairmot_hrnetv2_w18_dlafpn_30e_1088x608_visdrone.tar) |
+
+**注：**
+
+1. 模型预测速度为TensorRT FP16速度，测试环境为CUDA 10.2，JETPACK 4.5.1，TensorRT 7.1
+2. 更多跟踪模型请参考[多目标跟踪模型库](../../configs/mot/README_cn.md#模型库)
+
+检测模型使用轻量级特色模型PP-PicoDet，具体请参考[PP-PicoDet文档](../../configs/picodet)
+
+ReID模型使用超轻量骨干网络模型PP-LCNet, 具体请参考[PP-LCNet模型介绍](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.3/docs/zh_CN/models/PP-LCNet.md)
+
+### 三、Python端预测部署
+
+PP-Tracking 使用python预测部署教程请参考[PP-Tracking python部署文档](python/README.md)
+
+### 四、C++端预测部署
+
+PP-Tracking 使用c++预测部署教程请参考[PP-Tracking c++部署文档](cpp/README.md)
--- a/deploy/pptracking/cpp/CMakeLists.txt
+++ b/deploy/pptracking/cpp/CMakeLists.txt
+cmake_minimum_required(VERSION 3.0)
+project(PaddleObjectDetector CXX C)
+
+option(WITH_MKL        "Compile demo with MKL/OpenBlas support,defaultuseMKL."          ON)
+option(WITH_GPU        "Compile demo with GPU/CPU, default use CPU."                    ON)
+option(WITH_TENSORRT   "Compile demo with TensorRT."                                    OFF)
+
+SET(PADDLE_DIR "" CACHE PATH "Location of libraries")
+SET(PADDLE_LIB_NAME "" CACHE STRING "libpaddle_inference")
+SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
+SET(CUDA_LIB "" CACHE PATH "Location of libraries")
+SET(CUDNN_LIB "" CACHE PATH "Location of libraries")
+SET(TENSORRT_INC_DIR "" CACHE PATH "Compile demo with TensorRT")
+SET(TENSORRT_LIB_DIR "" CACHE PATH "Compile demo with TensorRT")
+
+include(cmake/yaml-cpp.cmake)
+
+include_directories("${CMAKE_SOURCE_DIR}/")
+include_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/src/ext-yaml-cpp/include")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}/ext/yaml-cpp/lib")
+
+set(SRCS src/main.cc src/preprocess_op.cc src/pipeline.cc src/jde_predictor.cc src/sde_predictor.cc src/tracker.cc src/trajectory.cc src/lapjv.cpp src/postprocess.cc)
+
+macro(safe_set_static_flag)
+    foreach(flag_var
+        CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+        CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+      if(${flag_var} MATCHES "/MD")
+        string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+      endif(${flag_var} MATCHES "/MD")
+    endforeach(flag_var)
+endmacro()
+
+if (WITH_MKL)
+    ADD_DEFINITIONS(-DUSE_MKL)
+endif()
+
+if (NOT DEFINED PADDLE_DIR OR ${PADDLE_DIR} STREQUAL "")
+    message(FATAL_ERROR "please set PADDLE_DIR with -DPADDLE_DIR=/path/paddle_influence_dir")
+endif()
+message("PADDLE_DIR IS:" ${PADDLE_DIR})
+
+if (NOT DEFINED OPENCV_DIR OR ${OPENCV_DIR} STREQUAL "")
+    message(FATAL_ERROR "please set OPENCV_DIR with -DOPENCV_DIR=/path/opencv")
+endif()
+
+include_directories("${CMAKE_SOURCE_DIR}/")
+include_directories("${PADDLE_DIR}/")
+include_directories("${PADDLE_DIR}/third_party/install/protobuf/include")
+include_directories("${PADDLE_DIR}/third_party/install/glog/include")
+include_directories("${PADDLE_DIR}/third_party/install/gflags/include")
+include_directories("${PADDLE_DIR}/third_party/install/xxhash/include")
+if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/include")
+    include_directories("${PADDLE_DIR}/third_party/install/snappy/include")
+endif()
+if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/include")
+    include_directories("${PADDLE_DIR}/third_party/install/snappystream/include")
+endif()
+include_directories("${PADDLE_DIR}/third_party/boost")
+include_directories("${PADDLE_DIR}/third_party/eigen3")
+
+if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+    link_directories("${PADDLE_DIR}/third_party/install/snappy/lib")
+endif()
+if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+    link_directories("${PADDLE_DIR}/third_party/install/snappystream/lib")
+endif()
+
+link_directories("${PADDLE_DIR}/third_party/install/protobuf/lib")
+link_directories("${PADDLE_DIR}/third_party/install/glog/lib")
+link_directories("${PADDLE_DIR}/third_party/install/gflags/lib")
+link_directories("${PADDLE_DIR}/third_party/install/xxhash/lib")
+link_directories("${PADDLE_DIR}/paddle/lib/")
+link_directories("${CMAKE_CURRENT_BINARY_DIR}")
+
+
+
+if (WIN32)
+  include_directories("${PADDLE_DIR}/paddle/fluid/inference")
+  include_directories("${PADDLE_DIR}/paddle/include")
+  link_directories("${PADDLE_DIR}/paddle/fluid/inference")
+  find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/build/ NO_DEFAULT_PATH)
+
+else ()
+  find_package(OpenCV REQUIRED PATHS ${OPENCV_DIR}/share/OpenCV NO_DEFAULT_PATH)
+  include_directories("${PADDLE_DIR}/paddle/include")
+  link_directories("${PADDLE_DIR}/paddle/lib")
+endif ()
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+if (WIN32)
+    add_definitions("/DGOOGLE_GLOG_DLL_DECL=")
+    set(CMAKE_C_FLAGS_DEBUG   "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd")
+    set(CMAKE_C_FLAGS_RELEASE  "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT")
+    set(CMAKE_CXX_FLAGS_DEBUG  "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd")
+    set(CMAKE_CXX_FLAGS_RELEASE   "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT")
+else()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -o2 -fopenmp -std=c++11")
+    set(CMAKE_STATIC_LIBRARY_PREFIX "")
+endif()
+
+# TODO let users define cuda lib path
+if (WITH_GPU)
+    if (NOT DEFINED CUDA_LIB OR ${CUDA_LIB} STREQUAL "")
+        message(FATAL_ERROR "please set CUDA_LIB with -DCUDA_LIB=/path/cuda-8.0/lib64")
+    endif()
+    if (NOT WIN32)
+        if (NOT DEFINED CUDNN_LIB)
+            message(FATAL_ERROR "please set CUDNN_LIB with -DCUDNN_LIB=/path/cudnn_v7.4/cuda/lib64")
+        endif()
+    endif(NOT WIN32)
+endif()
+
+
+if (NOT WIN32)
+  if (WITH_TENSORRT AND WITH_GPU)
+	  include_directories("${TENSORRT_INC_DIR}/")
+	  link_directories("${TENSORRT_LIB_DIR}/")
+  endif()
+endif(NOT WIN32)
+
+if (NOT WIN32)
+    set(NGRAPH_PATH "${PADDLE_DIR}/third_party/install/ngraph")
+    if(EXISTS ${NGRAPH_PATH})
+        include(GNUInstallDirs)
+        include_directories("${NGRAPH_PATH}/include")
+        link_directories("${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}")
+        set(NGRAPH_LIB ${NGRAPH_PATH}/${CMAKE_INSTALL_LIBDIR}/libngraph${CMAKE_SHARED_LIBRARY_SUFFIX})
+    endif()
+endif()
+
+if(WITH_MKL)
+  include_directories("${PADDLE_DIR}/third_party/install/mklml/include")
+  if (WIN32)
+    set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.lib
+            ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.lib)
+  else ()
+    set(MATH_LIB ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX}
+            ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5${CMAKE_SHARED_LIBRARY_SUFFIX})
+    execute_process(COMMAND cp -r ${PADDLE_DIR}/third_party/install/mklml/lib/libmklml_intel${CMAKE_SHARED_LIBRARY_SUFFIX} /usr/lib)
+  endif ()
+  set(MKLDNN_PATH "${PADDLE_DIR}/third_party/install/mkldnn")
+  if(EXISTS ${MKLDNN_PATH})
+    include_directories("${MKLDNN_PATH}/include")
+    if (WIN32)
+      set(MKLDNN_LIB ${MKLDNN_PATH}/lib/mkldnn.lib)
+    else ()
+      set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
+    endif ()
+  endif()
+else()
+  set(MATH_LIB ${PADDLE_DIR}/third_party/install/openblas/lib/libopenblas${CMAKE_STATIC_LIBRARY_SUFFIX})
+endif()
+
+
+if (WIN32)
+    if(EXISTS "${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX}")
+        set(DEPS
+            ${PADDLE_DIR}/paddle/fluid/inference/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
+    else()
+        set(DEPS
+            ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
+    endif()
+endif()
+
+
+if (WIN32)
+    set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
+else()
+    set(DEPS ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+endif()
+
+message("PADDLE_LIB_NAME:" ${PADDLE_LIB_NAME})
+message("DEPS:" $DEPS)
+
+if (NOT WIN32)
+    set(DEPS ${DEPS}
+        ${MATH_LIB} ${MKLDNN_LIB}
+        glog gflags protobuf z xxhash yaml-cpp
+        )
+    if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+        set(DEPS ${DEPS} snappystream)
+    endif()
+    if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+        set(DEPS ${DEPS} snappy)
+    endif()
+else()
+    set(DEPS ${DEPS}
+        ${MATH_LIB} ${MKLDNN_LIB}
+        glog gflags_static libprotobuf xxhash libyaml-cppmt)
+    set(DEPS ${DEPS} libcmt shlwapi)
+    if (EXISTS "${PADDLE_DIR}/third_party/install/snappy/lib")
+        set(DEPS ${DEPS} snappy)
+    endif()
+    if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib")
+        set(DEPS ${DEPS} snappystream)
+    endif()
+endif(NOT WIN32)
+
+if(WITH_GPU)
+  if(NOT WIN32)
+    if (WITH_TENSORRT)
+	    set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
+	    set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
+    endif()
+    set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
+    set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
+  else()
+    set(DEPS ${DEPS} ${CUDA_LIB}/cudart${CMAKE_STATIC_LIBRARY_SUFFIX} )
+    set(DEPS ${DEPS} ${CUDA_LIB}/cublas${CMAKE_STATIC_LIBRARY_SUFFIX} )
+    set(DEPS ${DEPS} ${CUDNN_LIB}/cudnn${CMAKE_STATIC_LIBRARY_SUFFIX})
+  endif()
+endif()
+
+if (NOT WIN32)
+    set(EXTERNAL_LIB "-ldl -lrt -lgomp -lz -lm -lpthread")
+    set(DEPS ${DEPS} ${EXTERNAL_LIB})
+endif()
+
+set(DEPS ${DEPS} ${OpenCV_LIBS})
+add_executable(main ${SRCS})
+ADD_DEPENDENCIES(main ext-yaml-cpp)
+message("DEPS:" $DEPS)
+target_link_libraries(main ${DEPS})
+
+if (WIN32 AND WITH_MKL)
+    add_custom_command(TARGET main POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/mklml.dll ./release/mklml.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mklml/lib/libiomp5md.dll ./release/libiomp5md.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/third_party/install/mkldnn/lib/mkldnn.dll ./release/mkldnn.dll
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll
+    )
+endif()
+
+if (WIN32)
+    add_custom_command(TARGET main POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different ${PADDLE_DIR}/paddle/lib/${PADDLE_LIB_NAME}.dll ./release/${PADDLE_LIB_NAME}.dll
+    )
+endif()
--- a/deploy/pptracking/cpp/README.md
+++ b/deploy/pptracking/cpp/README.md
+# C++端预测部署
+
+在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 预测引擎使用了AnalysisPredictor，专门针对推理进行了优化，该引擎可以对模型进行多项图优化，减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求，我们提供了独立于PaddleDetection的预测脚本，方便用户直接集成部署。当前C++部署支持基于Fairmot的单镜头类别预测部署，并支持人流量统计、出入口计数功能。
+
+主要包含三个步骤：
+
+- 准备环境
+- 导出预测模型
+- C++预测
+
+## 一、准备环境
+
+环境要求：
+
+- GCC 8.2
+- CUDA 10.1/10.2/11.1; CUDNN 7.6/8.1
+- CMake 3.0+
+- TensorRT 6/7
+
+NVIDIA Jetson用户请参考[Jetson平台编译指南](../../cpp/Jetson_build.md#jetson环境搭建)完成JetPack安装
+
+### 1. 下载代码
+
+```
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+# C++部署代码与其他目录代码独立
+cd deploy/pptracking/cpp
+```
+
+### 2. 下载或编译PaddlePaddle C++预测库
+
+请根据环境选择适当的预测库进行下载，参考[C++预测库下载列表](https://paddleinference.paddlepaddle.org.cn/user_guides/download_lib.html)
+
+下载并解压后`./paddle_inference`目录包含内容为：
+
+```
+paddle_inference
+├── paddle # paddle核心库和头文件
+|
+├── third_party # 第三方依赖库和头文件
+|
+└── version.txt # 版本和编译信息
+```
+
+**注意:** 如果用户环境与官网提供环境不一致（如cuda 、cudnn、tensorrt版本不一致等），或对飞桨源代码有修改需求，或希望进行定制化构建，可参考[文档](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)自行源码编译预测库。
+
+### 3. 编译
+
+编译`cmake`的命令在`scripts/build.sh`中，请根据实际情况修改主要参数，其主要内容说明如下：
+
+```
+# 是否使用GPU(即是否使用 CUDA)
+WITH_GPU=ON
+
+# 是否使用MKL or openblas，TX2需要设置为OFF
+WITH_MKL=OFF
+
+# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
+WITH_TENSORRT=ON
+
+# TensorRT 的include路径
+TENSORRT_INC_DIR=/path/to/TensorRT/include
+
+# TensorRT 的lib路径
+TENSORRT_LIB_DIR=/path/to/TensorRT/lib
+
+# Paddle 预测库路径
+PADDLE_DIR=/path/to/paddle_inference/
+
+# Paddle 预测库名称
+PADDLE_LIB_NAME=libpaddle_inference
+
+# CUDA 的 lib 路径
+CUDA_LIB=/path/to/cuda/lib
+
+# CUDNN 的 lib 路径
+CUDNN_LIB=/path/to/cudnn/lib
+
+# OPENCV路径
+OPENCV_DIR=/path/to/opencv
+```
+
+修改脚本设置好主要参数后，执行```build.sh```脚本：
+
+```
+sh ./scripts/build.sh
+```
+
+**注意：**
+
+1. `TX2`平台的`CUDA`、`CUDNN`需要通过`JetPack`安装。
+2. 已提供linux和tx2平台的opencv下载方式，其他环境请自行安装[opencv](https://opencv.org/)
+
+## 二、导出预测模型
+
+将训练保存的权重导出为预测库需要的模型格式，使用PaddleDetection下的```tools/export_model.py```导出模型
+
+```
+python tools/export_model.py -c configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams
+```
+
+预测模型会默认导出到```output_inference/fairmot_hrnetv2_w18_dlafpn_30e_576x320```目录下，包括```infer_cfg.yml```, ```model.pdiparams```, ```model.pdiparams.info```, ```model.pdmodel```
+
+导出模型也可以通过[预测模型列表]()直接下载使用
+
+## 三、C++预测
+
+完成以上步骤后，可以通过```build/main```进行预测，参数列表如下:
+
+|  参数   | 说明  |
+|  ----  | ----  |
+| --track_model_dir  | 导出的跟踪预测模型所在路径 |
+| --video_file  | 要预测的视频文件路径 |
+| --device  | 运行时的设备，可选择`CPU/GPU/XPU`，默认为`CPU`|
+| --gpu_id  |  指定进行推理的GPU device id(默认值为0)|
+| --run_mode | 使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
+| --output_dir | 输出图片所在的文件夹, 默认为output ｜
+| --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
+| --cpu_threads | 设置cpu线程数，默认为1 |
+| --do_entrance_counting | 是否进行出入口流量统计，默认为否 |
+| --save_result | 是否保存跟踪结果 |
+
+样例一：
+
+```shell
+# 使用CPU测试视频 `test.mp4` , 模型和测试视频均移至`build`目录下
+
+./main --track_model_dir=./fairmot_hrnetv2_w18_dlafpn_30e_576x320 --video_file=test.mp4
+
+# 视频可视化预测结果默认保存在当前目录下output/test.mp4文件中
+```
+
+
+样例二：
+
+```shell
+# 使用GPU测试视频 `test.mp4` , 模型和测试视频均移至`build`目录下，实现出入口计数功能，并保存跟踪结果
+
+./main -video_file=test.mp4 -track_model_dir=./fairmot_dla34_30e_1088x608/  --device=gpu --do_entrance_counting=True --save_result=True
+
+# 视频可视化预测结果默认保存在当前目录下`output/test.mp4`中
+# 跟踪结果保存在`output/mot_output.txt`中
+# 计数结果保存在`output/flow_statistic.txt`中
+```
--- a/deploy/pptracking/cpp/cmake/yaml-cpp.cmake
+++ b/deploy/pptracking/cpp/cmake/yaml-cpp.cmake
+
+find_package(Git REQUIRED)
+
+include(ExternalProject)
+
+message("${CMAKE_BUILD_TYPE}")
+
+ExternalProject_Add(
+        ext-yaml-cpp
+        URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip
+        URL_MD5 9542d6de397d1fbd649ed468cb5850e6
+        CMAKE_ARGS
+        -DYAML_CPP_BUILD_TESTS=OFF
+		-DYAML_CPP_BUILD_TOOLS=OFF
+        -DYAML_CPP_INSTALL=OFF
+        -DYAML_CPP_BUILD_CONTRIB=OFF
+		-DMSVC_SHARED_RT=OFF
+		-DBUILD_SHARED_LIBS=OFF
+        -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+        -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+        -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+        -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+        -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
+        -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
+        PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp"
+        # Disable install step
+        INSTALL_COMMAND ""
+	    LOG_DOWNLOAD ON
+        LOG_BUILD 1
+)
--- a/deploy/pptracking/cpp/include/config_parser.h
+++ b/deploy/pptracking/cpp/include/config_parser.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+
+#include "yaml-cpp/yaml.h"
+
+#ifdef _WIN32
+#define OS_PATH_SEP "\\"
+#else
+#define OS_PATH_SEP "/"
+#endif
+
+namespace PaddleDetection {
+
+// Inference model configuration parser
+class ConfigPaser {
+ public:
+  ConfigPaser() {}
+
+  ~ConfigPaser() {}
+
+  bool load_config(const std::string& model_dir,
+                   const std::string& cfg = "infer_cfg.yml") {
+    // Load as a YAML::Node
+    YAML::Node config;
+    config = YAML::LoadFile(model_dir + OS_PATH_SEP + cfg);
+
+    // Get runtime mode : fluid, trt_fp16, trt_fp32
+    if (config["mode"].IsDefined()) {
+      mode_ = config["mode"].as<std::string>();
+    } else {
+      std::cerr << "Please set mode, "
+                << "support value : fluid/trt_fp16/trt_fp32." << std::endl;
+      return false;
+    }
+
+    // Get model arch: FairMot or YOLO/Picodet/LCNet for DeepSort
+    if (config["arch"].IsDefined()) {
+      arch_ = config["arch"].as<std::string>();
+    } else {
+      std::cerr << "Please set model arch,"
+                << "support value : FairMot, YOLO, PicoDet, LCNet etc"
+                << std::endl;
+      return false;
+    }
+
+    // Get min_subgraph_size for tensorrt
+    if (config["min_subgraph_size"].IsDefined()) {
+      min_subgraph_size_ = config["min_subgraph_size"].as<int>();
+    } else {
+      std::cerr << "Please set min_subgraph_size." << std::endl;
+      return false;
+    }
+    // Get draw_threshold for visualization
+    if (config["draw_threshold"].IsDefined()) {
+      draw_threshold_ = config["draw_threshold"].as<float>();
+    } else {
+      std::cerr << "Please set draw_threshold." << std::endl;
+      return false;
+    }
+    // Get Preprocess for preprocessing
+    if (config["Preprocess"].IsDefined()) {
+      preprocess_info_ = config["Preprocess"];
+    } else {
+      std::cerr << "Please set Preprocess." << std::endl;
+      return false;
+    }
+    // Get label_list for visualization
+    if (config["label_list"].IsDefined()) {
+      label_list_ = config["label_list"].as<std::vector<std::string>>();
+    } else {
+      std::cerr << "Please set label_list." << std::endl;
+      return false;
+    }
+
+    // Get use_dynamic_shape for TensorRT
+    if (config["use_dynamic_shape"].IsDefined()) {
+      use_dynamic_shape_ = config["use_dynamic_shape"].as<bool>();
+    } else {
+      std::cerr << "Please set use_dynamic_shape." << std::endl;
+      return false;
+    }
+
+    // Get conf_thresh for tracker
+    if (config["tracker"].IsDefined()) {
+      if (config["tracker"]["conf_thres"].IsDefined()) {
+        conf_thresh_ = config["tracker"]["conf_thres"].as<float>();
+      } else {
+        std::cerr << "Please set conf_thres in tracker." << std::endl;
+        return false;
+      }
+    }
+
+    // Get NMS for postprocess
+    if (config["NMS"].IsDefined()) {
+      nms_info_ = config["NMS"];
+    }
+    // Get fpn_stride in PicoDet
+    if (config["fpn_stride"].IsDefined()) {
+      fpn_stride_.clear();
+      for (auto item : config["fpn_stride"]) {
+        fpn_stride_.emplace_back(item.as<int>());
+      }
+    }
+
+    return true;
+  }
+  std::string mode_;
+  float draw_threshold_;
+  std::string arch_;
+  int min_subgraph_size_;
+  YAML::Node preprocess_info_;
+  YAML::Node nms_info_;
+  std::vector<std::string> label_list_;
+  std::vector<int> fpn_stride_;
+  bool use_dynamic_shape_;
+  float conf_thresh_;
+};
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/include/jde_predictor.h
+++ b/deploy/pptracking/cpp/include/jde_predictor.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <ctime>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "paddle_inference_api.h"  // NOLINT
+
+#include "include/config_parser.h"
+#include "include/preprocess_op.h"
+#include "include/utils.h"
+
+using namespace paddle_infer;  // NOLINT
+
+namespace PaddleDetection {
+
+class JDEPredictor {
+ public:
+  explicit JDEPredictor(const std::string& device = "CPU",
+                        const std::string& model_dir = "",
+                        const double threshold = -1.,
+                        const std::string& run_mode = "fluid",
+                        const int gpu_id = 0,
+                        const bool use_mkldnn = false,
+                        const int cpu_threads = 1,
+                        bool trt_calib_mode = false,
+                        const int min_box_area = 200) {
+    this->device_ = device;
+    this->gpu_id_ = gpu_id;
+    this->use_mkldnn_ = use_mkldnn;
+    this->cpu_math_library_num_threads_ = cpu_threads;
+    this->trt_calib_mode_ = trt_calib_mode;
+    this->min_box_area_ = min_box_area;
+
+    config_.load_config(model_dir);
+    this->min_subgraph_size_ = config_.min_subgraph_size_;
+    preprocessor_.Init(config_.preprocess_info_);
+    LoadModel(model_dir, run_mode);
+    this->conf_thresh_ = config_.conf_thresh_;
+  }
+
+  // Load Paddle inference model
+  void LoadModel(const std::string& model_dir,
+                 const std::string& run_mode = "fluid");
+
+  // Run predictor
+  void Predict(const std::vector<cv::Mat> imgs,
+               const double threshold = 0.5,
+               MOTResult* result = nullptr,
+               std::vector<double>* times = nullptr);
+
+ private:
+  std::string device_ = "CPU";
+  float threhold = 0.5;
+  int gpu_id_ = 0;
+  bool use_mkldnn_ = false;
+  int cpu_math_library_num_threads_ = 1;
+  int min_subgraph_size_ = 3;
+  bool trt_calib_mode_ = false;
+
+  // Preprocess image and copy data to input buffer
+  void Preprocess(const cv::Mat& image_mat);
+  // Postprocess result
+  void Postprocess(const cv::Mat dets, const cv::Mat emb, MOTResult* result);
+
+  std::shared_ptr<Predictor> predictor_;
+  Preprocessor preprocessor_;
+  ImageBlob inputs_;
+  std::vector<float> bbox_data_;
+  std::vector<float> emb_data_;
+  double threshold_;
+  ConfigPaser config_;
+  float min_box_area_;
+  float conf_thresh_;
+};
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/include/lapjv.h
+++ b/deploy/pptracking/cpp/include/lapjv.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/gatagat/lap/blob/master/lap/lapjv.h
+// Ths copyright of gatagat/lap is as follows:
+// MIT License
+
+#ifndef DEPLOY_PPTRACKING_CPP_INCLUDE_LAPJV_H_
+#define DEPLOY_PPTRACKING_CPP_INCLUDE_LAPJV_H_
+#define LARGE 1000000
+
+#if !defined TRUE
+#define TRUE 1
+#endif
+#if !defined FALSE
+#define FALSE 0
+#endif
+
+#define NEW(x, t, n)                                               \
+  if ((x = reinterpret_cast<t *>(malloc(sizeof(t) * (n)))) == 0) { \
+    return -1;                                                     \
+  }
+#define FREE(x) \
+  if (x != 0) { \
+    free(x);    \
+    x = 0;      \
+  }
+#define SWAP_INDICES(a, b) \
+  {                        \
+    int_t _temp_index = a; \
+    a = b;                 \
+    b = _temp_index;       \
+  }
+#include <opencv2/opencv.hpp>
+
+namespace PaddleDetection {
+
+typedef signed int int_t;
+typedef unsigned int uint_t;
+typedef double cost_t;
+typedef char boolean;
+typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t;
+
+int lapjv_internal(const cv::Mat &cost,
+                   const bool extend_cost,
+                   const float cost_limit,
+                   int *x,
+                   int *y);
+
+}  // namespace PaddleDetection
+
+#endif  // DEPLOY_PPTRACKING_CPP_INCLUDE_LAPJV_H_
--- a/deploy/pptracking/cpp/include/pipeline.h
+++ b/deploy/pptracking/cpp/include/pipeline.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef DEPLOY_PPTRACKING_CPP_INCLUDE_PIPELINE_H_
+#define DEPLOY_PPTRACKING_CPP_INCLUDE_PIPELINE_H_
+
+#include <glog/logging.h>
+
+#include <math.h>
+#include <sys/types.h>
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+#include <direct.h>
+#include <io.h>
+#elif LINUX
+#include <stdarg.h>
+#include <sys/stat.h>
+#endif
+
+#include "include/jde_predictor.h"
+#include "include/sde_predictor.h"
+
+namespace PaddleDetection {
+
+class Pipeline {
+ public:
+  explicit Pipeline(const std::string& device,
+                    const double threshold,
+                    const std::string& output_dir,
+                    const std::string& run_mode = "fluid",
+                    const int gpu_id = 0,
+                    const bool use_mkldnn = false,
+                    const int cpu_threads = 1,
+                    const bool trt_calib_mode = false,
+                    const bool do_entrance_counting = false,
+                    const bool save_result = false,
+                    const std::string& scene = "pedestrian",
+                    const bool tiny_obj = false,
+                    const bool is_mtmct = false,
+                    const int secs_interval = 10,
+                    const std::string track_model_dir = "",
+                    const std::string det_model_dir = "",
+                    const std::string reid_model_dir = "") {
+    std::vector<std::string> input;
+    this->input_ = input;
+    this->device_ = device;
+    this->threshold_ = threshold;
+    this->output_dir_ = output_dir;
+    this->run_mode_ = run_mode;
+    this->gpu_id_ = gpu_id;
+    this->use_mkldnn_ = use_mkldnn;
+    this->cpu_threads_ = cpu_threads;
+    this->trt_calib_mode_ = trt_calib_mode;
+    this->do_entrance_counting_ = do_entrance_counting;
+    this->secs_interval_ = secs_interval_;
+    this->save_result_ = save_result;
+    SelectModel(scene,
+                tiny_obj,
+                is_mtmct,
+                track_model_dir,
+                det_model_dir,
+                reid_model_dir);
+    InitPredictor();
+  }
+
+  // Set input, it must execute before Run()
+  void SetInput(const std::string& input_video);
+  void ClearInput();
+
+  // Run pipeline in video
+  void Run();
+  void PredictMOT(const std::string& video_path);
+  void PredictMTMCT(const std::vector<std::string> video_inputs);
+
+  // Run pipeline in stream
+  void RunMOTStream(const cv::Mat img,
+                    const int frame_id,
+                    const int video_fps,
+                    const Rect entrance,
+                    cv::Mat out_img,
+                    std::vector<std::string>* records,
+                    std::set<int>* count_set,
+                    std::set<int>* interval_count_set,
+                    std::vector<int>* in_count_list,
+                    std::vector<int>* out_count_list,
+                    std::map<int, std::vector<float>>* prev_center,
+                    std::vector<std::string>* flow_records);
+  void RunMTMCTStream(const std::vector<cv::Mat> imgs,
+                      std::vector<std::string>* records);
+
+  void PrintBenchmarkLog(const std::vector<double> det_time, const int img_num);
+
+ private:
+  // Select model according to scenes, it must execute before Run()
+  void SelectModel(const std::string& scene = "pedestrian",
+                   const bool tiny_obj = false,
+                   const bool is_mtmct = false,
+                   const std::string track_model_dir = "",
+                   const std::string det_model_dir = "",
+                   const std::string reid_model_dir = "");
+  void InitPredictor();
+
+  std::shared_ptr<PaddleDetection::JDEPredictor> jde_sct_;
+  std::shared_ptr<PaddleDetection::SDEPredictor> sde_sct_;
+
+  std::vector<std::string> input_;
+  std::vector<cv::Mat> stream_;
+  std::string device_;
+  double threshold_;
+  std::string output_dir_;
+  std::string track_model_dir_;
+  std::string det_model_dir_;
+  std::string reid_model_dir_;
+  std::string run_mode_ = "fluid";
+  int gpu_id_ = 0;
+  bool use_mkldnn_ = false;
+  int cpu_threads_ = 1;
+  bool trt_calib_mode_ = false;
+  bool do_entrance_counting_ = false;
+  bool save_result_ = false;
+  int secs_interval_ = 10;
+};
+
+}  // namespace PaddleDetection
+
+#endif  // DEPLOY_PPTRACKING_CPP_INCLUDE_PIPELINE_H_
--- a/deploy/pptracking/cpp/include/postprocess.h
+++ b/deploy/pptracking/cpp/include/postprocess.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <glog/logging.h>
+
+#include <ctime>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "include/utils.h"
+
+namespace PaddleDetection {
+
+// Generate visualization color
+cv::Scalar GetColor(int idx);
+
+// Visualize Tracking Results
+cv::Mat VisualizeTrackResult(const cv::Mat& img,
+                             const MOTResult& results,
+                             const float fps,
+                             const int frame_id);
+
+// Pedestrian/Vehicle Counting
+void FlowStatistic(const MOTResult& results,
+                   const int frame_id,
+                   const int secs_interval,
+                   const bool do_entrance_counting,
+                   const int video_fps,
+                   const Rect entrance,
+                   std::set<int>* id_set,
+                   std::set<int>* interval_id_set,
+                   std::vector<int>* in_id_list,
+                   std::vector<int>* out_id_list,
+                   std::map<int, std::vector<float>>* prev_center,
+                   std::vector<std::string>* records);
+
+// Save Tracking Results
+void SaveMOTResult(const MOTResult& results,
+                   const int frame_id,
+                   std::vector<std::string>* records);
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/include/predictor.h
+++ b/deploy/pptracking/cpp/include/predictor.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <ctime>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "paddle_inference_api.h"  // NOLINT
+
+#include "include/config_parser.h"
+#include "include/jde_predictor.h"
+#include "include/preprocess_op.h"
+#include "include/sde_predictor.h"
+
+using namespace paddle_infer;  // NOLINT
+
+namespace PaddleDetection {
+
+class Predictor {
+ public:
+  explicit Predictor(const std::string& device = "CPU",
+                     const std::string& track_model_dir = "",
+                     const std::string& det_model_dir = "",
+                     const std::string& reid_model_dir = "",
+                     const double threshold = -1.,
+                     const std::string& run_mode = "fluid",
+                     const int gpu_id = 0,
+                     const bool use_mkldnn = false,
+                     const int cpu_threads = 1,
+                     bool trt_calib_mode = false,
+                     const int min_box_area = 200) {
+    if (track_model_dir.empty() && det_model_dir.empty()) {
+      throw "Predictor must receive track_model or det_model!";
+    }
+
+    if (!track_model_dir.empty() && !det_model_dir.empty()) {
+      throw "Predictor only receive one of track_model or det_model!";
+    }
+
+    if (!track_model_dir.empty()) {
+      jde_sct_ =
+          std::make_shared<PaddleDetection::JDEPredictor>(device,
+                                                          track_model_dir,
+                                                          threshold,
+                                                          run_mode,
+                                                          gpu_id,
+                                                          use_mkldnn,
+                                                          cpu_threads,
+                                                          trt_calib_mode,
+                                                          min_box_area);
+      use_jde_ = true;
+    }
+    if (!det_model_dir.empty()) {
+      sde_sct_ = std::make_shared<PaddleDetection::SDEPredictor>(device,
+                                                                 det_model_dir,
+                                                                 reid_model_dir,
+                                                                 threshold,
+                                                                 run_mode,
+                                                                 gpu_id,
+                                                                 use_mkldnn,
+                                                                 cpu_threads,
+                                                                 trt_calib_mode,
+                                                                 min_box_area);
+      use_jde_ = false;
+    }
+  }
+
+  // Run predictor
+  void Predict(const std::vector<cv::Mat> imgs,
+               const double threshold = 0.5,
+               MOTResult* result = nullptr,
+               std::vector<double>* times = nullptr);
+
+ private:
+  std::shared_ptr<PaddleDetection::JDEPredictor> jde_sct_;
+  std::shared_ptr<PaddleDetection::SDEPredictor> sde_sct_;
+  bool use_jde_ = true;
+};
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/include/preprocess_op.h
+++ b/deploy/pptracking/cpp/include/preprocess_op.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <glog/logging.h>
+#include <yaml-cpp/yaml.h>
+
+#include <iostream>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+namespace PaddleDetection {
+
+// Object for storing all preprocessed data
+class ImageBlob {
+ public:
+  // image width and height
+  std::vector<float> im_shape_;
+  // Buffer for image data after preprocessing
+  std::vector<float> im_data_;
+  // in net data shape(after pad)
+  std::vector<float> in_net_shape_;
+  // Evaluation image width and height
+  // std::vector<float>  eval_im_size_f_;
+  // Scale factor for image size to origin image size
+  std::vector<float> scale_factor_;
+};
+
+// Abstraction of preprocessing opration class
+class PreprocessOp {
+ public:
+  virtual void Init(const YAML::Node& item) = 0;
+  virtual void Run(cv::Mat* im, ImageBlob* data) = 0;
+};
+
+class InitInfo : public PreprocessOp {
+ public:
+  virtual void Init(const YAML::Node& item) {}
+  virtual void Run(cv::Mat* im, ImageBlob* data);
+};
+
+class NormalizeImage : public PreprocessOp {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    mean_ = item["mean"].as<std::vector<float>>();
+    scale_ = item["std"].as<std::vector<float>>();
+    is_scale_ = item["is_scale"].as<bool>();
+  }
+
+  virtual void Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  // CHW or HWC
+  std::vector<float> mean_;
+  std::vector<float> scale_;
+  bool is_scale_;
+};
+
+class Permute : public PreprocessOp {
+ public:
+  virtual void Init(const YAML::Node& item) {}
+  virtual void Run(cv::Mat* im, ImageBlob* data);
+};
+
+class Resize : public PreprocessOp {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    interp_ = item["interp"].as<int>();
+    keep_ratio_ = item["keep_ratio"].as<bool>();
+    target_size_ = item["target_size"].as<std::vector<int>>();
+  }
+
+  // Compute best resize scale for x-dimension, y-dimension
+  std::pair<float, float> GenerateScale(const cv::Mat& im);
+
+  virtual void Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  int interp_;
+  bool keep_ratio_;
+  std::vector<int> target_size_;
+  std::vector<int> in_net_shape_;
+};
+
+class LetterBoxResize : public PreprocessOp {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    target_size_ = item["target_size"].as<std::vector<int>>();
+  }
+
+  float GenerateScale(const cv::Mat& im);
+
+  virtual void Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  std::vector<int> target_size_;
+  std::vector<int> in_net_shape_;
+};
+// Models with FPN need input shape % stride == 0
+class PadStride : public PreprocessOp {
+ public:
+  virtual void Init(const YAML::Node& item) {
+    stride_ = item["stride"].as<int>();
+  }
+
+  virtual void Run(cv::Mat* im, ImageBlob* data);
+
+ private:
+  int stride_;
+};
+
+class Preprocessor {
+ public:
+  void Init(const YAML::Node& config_node) {
+    // initialize image info at first
+    ops_["InitInfo"] = std::make_shared<InitInfo>();
+    for (const auto& item : config_node) {
+      auto op_name = item["type"].as<std::string>();
+
+      ops_[op_name] = CreateOp(op_name);
+      ops_[op_name]->Init(item);
+    }
+  }
+
+  std::shared_ptr<PreprocessOp> CreateOp(const std::string& name) {
+    if (name == "Resize") {
+      return std::make_shared<Resize>();
+    } else if (name == "LetterBoxResize") {
+      return std::make_shared<LetterBoxResize>();
+    } else if (name == "Permute") {
+      return std::make_shared<Permute>();
+    } else if (name == "NormalizeImage") {
+      return std::make_shared<NormalizeImage>();
+    } else if (name == "PadStride") {
+      // use PadStride instead of PadBatch
+      return std::make_shared<PadStride>();
+    }
+    std::cerr << "can not find function of OP: " << name
+              << " and return: nullptr" << std::endl;
+    return nullptr;
+  }
+
+  void Run(cv::Mat* im, ImageBlob* data);
+
+ public:
+  static const std::vector<std::string> RUN_ORDER;
+
+ private:
+  std::unordered_map<std::string, std::shared_ptr<PreprocessOp>> ops_;
+};
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/include/sde_predictor.h
+++ b/deploy/pptracking/cpp/include/sde_predictor.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <ctime>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+
+#include "paddle_inference_api.h"  // NOLINT
+
+#include "include/config_parser.h"
+#include "include/preprocess_op.h"
+#include "include/utils.h"
+
+using namespace paddle_infer;  // NOLINT
+
+namespace PaddleDetection {
+
+class SDEPredictor {
+ public:
+  explicit SDEPredictor(const std::string& device,
+                        const std::string& det_model_dir = "",
+                        const std::string& reid_model_dir = "",
+                        const double threshold = -1.,
+                        const std::string& run_mode = "fluid",
+                        const int gpu_id = 0,
+                        const bool use_mkldnn = false,
+                        const int cpu_threads = 1,
+                        bool trt_calib_mode = false,
+                        const int min_box_area = 200) {
+    this->device_ = device;
+    this->gpu_id_ = gpu_id;
+    this->use_mkldnn_ = use_mkldnn;
+    this->cpu_math_library_num_threads_ = cpu_threads;
+    this->trt_calib_mode_ = trt_calib_mode;
+    this->min_box_area_ = min_box_area;
+
+    det_config_.load_config(det_model_dir);
+    this->min_subgraph_size_ = det_config_.min_subgraph_size_;
+    det_preprocessor_.Init(det_config_.preprocess_info_);
+
+    reid_config_.load_config(reid_model_dir);
+    reid_preprocessor_.Init(reid_config_.preprocess_info_);
+
+    LoadModel(det_model_dir, reid_model_dir, run_mode);
+    this->conf_thresh_ = det_config_.conf_thresh_;
+  }
+
+  // Load Paddle inference model
+  void LoadModel(const std::string& det_model_dir,
+                 const std::string& reid_model_dir,
+                 const std::string& run_mode = "fluid");
+
+  // Run predictor
+  void Predict(const std::vector<cv::Mat> imgs,
+               const double threshold = 0.5,
+               MOTResult* result = nullptr,
+               std::vector<double>* times = nullptr);
+
+ private:
+  std::string device_ = "CPU";
+  float threhold = 0.5;
+  int gpu_id_ = 0;
+  bool use_mkldnn_ = false;
+  int cpu_math_library_num_threads_ = 1;
+  int min_subgraph_size_ = 3;
+  bool trt_calib_mode_ = false;
+
+  // Preprocess image and copy data to input buffer
+  void Preprocess(const cv::Mat& image_mat);
+  // Postprocess result
+  void Postprocess(const cv::Mat dets, const cv::Mat emb, MOTResult* result);
+
+  std::shared_ptr<Predictor> det_predictor_;
+  std::shared_ptr<Predictor> reid_predictor_;
+  Preprocessor det_preprocessor_;
+  Preprocessor reid_preprocessor_;
+  ImageBlob inputs_;
+  std::vector<float> bbox_data_;
+  std::vector<float> emb_data_;
+  double threshold_;
+  ConfigPaser det_config_;
+  ConfigPaser reid_config_;
+  float min_box_area_ = 200;
+  float conf_thresh_;
+};
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/include/tracker.h
+++ b/deploy/pptracking/cpp/include/tracker.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.h
+// Ths copyright of CnybTseng/JDE is as follows:
+// MIT License
+
+#pragma once
+
+#include <map>
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#include "include/trajectory.h"
+
+namespace PaddleDetection {
+
+typedef std::map<int, int> Match;
+typedef std::map<int, int>::iterator MatchIterator;
+
+struct Track {
+  int id;
+  float score;
+  cv::Vec4f ltrb;
+};
+
+class JDETracker {
+ public:
+  static JDETracker *instance(void);
+  virtual bool update(const cv::Mat &dets,
+                      const cv::Mat &emb,
+                      std::vector<Track> *tracks);
+
+ private:
+  JDETracker(void);
+  virtual ~JDETracker(void) {}
+  cv::Mat motion_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b);
+  void linear_assignment(const cv::Mat &cost,
+                         float cost_limit,
+                         Match *matches,
+                         std::vector<int> *mismatch_row,
+                         std::vector<int> *mismatch_col);
+  void remove_duplicate_trajectory(TrajectoryPool *a,
+                                   TrajectoryPool *b,
+                                   float iou_thresh = 0.15f);
+
+ private:
+  static JDETracker *me;
+  int timestamp;
+  TrajectoryPool tracked_trajectories;
+  TrajectoryPool lost_trajectories;
+  TrajectoryPool removed_trajectories;
+  int max_lost_time;
+  float lambda;
+  float det_thresh;
+};
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/include/trajectory.h
+++ b/deploy/pptracking/cpp/include/trajectory.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.h
+// Ths copyright of CnybTseng/JDE is as follows:
+// MIT License
+
+#pragma once
+
+#include <vector>
+
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#include "opencv2/video/tracking.hpp"
+
+namespace PaddleDetection {
+
+typedef enum { New = 0, Tracked = 1, Lost = 2, Removed = 3 } TrajectoryState;
+
+class Trajectory;
+typedef std::vector<Trajectory> TrajectoryPool;
+typedef std::vector<Trajectory>::iterator TrajectoryPoolIterator;
+typedef std::vector<Trajectory *> TrajectoryPtrPool;
+typedef std::vector<Trajectory *>::iterator TrajectoryPtrPoolIterator;
+
+class TKalmanFilter : public cv::KalmanFilter {
+ public:
+  TKalmanFilter(void);
+  virtual ~TKalmanFilter(void) {}
+  virtual void init(const cv::Mat &measurement);
+  virtual const cv::Mat &predict();
+  virtual const cv::Mat &correct(const cv::Mat &measurement);
+  virtual void project(cv::Mat *mean, cv::Mat *covariance) const;
+
+ private:
+  float std_weight_position;
+  float std_weight_velocity;
+};
+
+inline TKalmanFilter::TKalmanFilter(void) : cv::KalmanFilter(8, 4) {
+  cv::KalmanFilter::transitionMatrix = cv::Mat::eye(8, 8, CV_32F);
+  for (int i = 0; i < 4; ++i)
+    cv::KalmanFilter::transitionMatrix.at<float>(i, i + 4) = 1;
+  cv::KalmanFilter::measurementMatrix = cv::Mat::eye(4, 8, CV_32F);
+  std_weight_position = 1 / 20.f;
+  std_weight_velocity = 1 / 160.f;
+}
+
+class Trajectory : public TKalmanFilter {
+ public:
+  Trajectory();
+  Trajectory(const cv::Vec4f &ltrb, float score, const cv::Mat &embedding);
+  Trajectory(const Trajectory &other);
+  Trajectory &operator=(const Trajectory &rhs);
+  virtual ~Trajectory(void) {}
+
+  static int next_id();
+  virtual const cv::Mat &predict(void);
+  virtual void update(Trajectory *traj,
+                      int timestamp,
+                      bool update_embedding = true);
+  virtual void activate(int timestamp);
+  virtual void reactivate(Trajectory *traj, int timestamp, bool newid = false);
+  virtual void mark_lost(void);
+  virtual void mark_removed(void);
+
+  friend TrajectoryPool operator+(const TrajectoryPool &a,
+                                  const TrajectoryPool &b);
+  friend TrajectoryPool operator+(const TrajectoryPool &a,
+                                  const TrajectoryPtrPool &b);
+  friend TrajectoryPool &operator+=(TrajectoryPool &a,  // NOLINT
+                                    const TrajectoryPtrPool &b);
+  friend TrajectoryPool operator-(const TrajectoryPool &a,
+                                  const TrajectoryPool &b);
+  friend TrajectoryPool &operator-=(TrajectoryPool &a,  // NOLINT
+                                    const TrajectoryPool &b);
+  friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a,
+                                     const TrajectoryPtrPool &b);
+  friend TrajectoryPtrPool operator+(const TrajectoryPtrPool &a,
+                                     TrajectoryPool *b);
+  friend TrajectoryPtrPool operator-(const TrajectoryPtrPool &a,
+                                     const TrajectoryPtrPool &b);
+
+  friend cv::Mat embedding_distance(const TrajectoryPool &a,
+                                    const TrajectoryPool &b);
+  friend cv::Mat embedding_distance(const TrajectoryPtrPool &a,
+                                    const TrajectoryPtrPool &b);
+  friend cv::Mat embedding_distance(const TrajectoryPtrPool &a,
+                                    const TrajectoryPool &b);
+
+  friend cv::Mat mahalanobis_distance(const TrajectoryPool &a,
+                                      const TrajectoryPool &b);
+  friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
+                                      const TrajectoryPtrPool &b);
+  friend cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
+                                      const TrajectoryPool &b);
+
+  friend cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b);
+  friend cv::Mat iou_distance(const TrajectoryPtrPool &a,
+                              const TrajectoryPtrPool &b);
+  friend cv::Mat iou_distance(const TrajectoryPtrPool &a,
+                              const TrajectoryPool &b);
+
+ private:
+  void update_embedding(const cv::Mat &embedding);
+
+ public:
+  TrajectoryState state;
+  cv::Vec4f ltrb;
+  cv::Mat smooth_embedding;
+  int id;
+  bool is_activated;
+  int timestamp;
+  int starttime;
+  float score;
+
+ private:
+  static int count;
+  cv::Vec4f xyah;
+  cv::Mat current_embedding;
+  float eta;
+  int length;
+};
+
+inline cv::Vec4f ltrb2xyah(const cv::Vec4f &ltrb) {
+  cv::Vec4f xyah;
+  xyah[0] = (ltrb[0] + ltrb[2]) * 0.5f;
+  xyah[1] = (ltrb[1] + ltrb[3]) * 0.5f;
+  xyah[3] = ltrb[3] - ltrb[1];
+  xyah[2] = (ltrb[2] - ltrb[0]) / xyah[3];
+  return xyah;
+}
+
+inline Trajectory::Trajectory()
+    : state(New),
+      ltrb(cv::Vec4f()),
+      smooth_embedding(cv::Mat()),
+      id(0),
+      is_activated(false),
+      timestamp(0),
+      starttime(0),
+      score(0),
+      eta(0.9),
+      length(0) {}
+
+inline Trajectory::Trajectory(const cv::Vec4f &ltrb_,
+                              float score_,
+                              const cv::Mat &embedding)
+    : state(New),
+      ltrb(ltrb_),
+      smooth_embedding(cv::Mat()),
+      id(0),
+      is_activated(false),
+      timestamp(0),
+      starttime(0),
+      score(score_),
+      eta(0.9),
+      length(0) {
+  xyah = ltrb2xyah(ltrb);
+  update_embedding(embedding);
+}
+
+inline Trajectory::Trajectory(const Trajectory &other)
+    : state(other.state),
+      ltrb(other.ltrb),
+      id(other.id),
+      is_activated(other.is_activated),
+      timestamp(other.timestamp),
+      starttime(other.starttime),
+      xyah(other.xyah),
+      score(other.score),
+      eta(other.eta),
+      length(other.length) {
+  other.smooth_embedding.copyTo(smooth_embedding);
+  other.current_embedding.copyTo(current_embedding);
+  // copy state in KalmanFilter
+
+  other.statePre.copyTo(cv::KalmanFilter::statePre);
+  other.statePost.copyTo(cv::KalmanFilter::statePost);
+  other.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre);
+  other.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost);
+}
+
+inline Trajectory &Trajectory::operator=(const Trajectory &rhs) {
+  this->state = rhs.state;
+  this->ltrb = rhs.ltrb;
+  rhs.smooth_embedding.copyTo(this->smooth_embedding);
+  this->id = rhs.id;
+  this->is_activated = rhs.is_activated;
+  this->timestamp = rhs.timestamp;
+  this->starttime = rhs.starttime;
+  this->xyah = rhs.xyah;
+  this->score = rhs.score;
+  rhs.current_embedding.copyTo(this->current_embedding);
+  this->eta = rhs.eta;
+  this->length = rhs.length;
+
+  // copy state in KalmanFilter
+
+  rhs.statePre.copyTo(cv::KalmanFilter::statePre);
+  rhs.statePost.copyTo(cv::KalmanFilter::statePost);
+  rhs.errorCovPre.copyTo(cv::KalmanFilter::errorCovPre);
+  rhs.errorCovPost.copyTo(cv::KalmanFilter::errorCovPost);
+
+  return *this;
+}
+
+inline int Trajectory::next_id() {
+  ++count;
+  return count;
+}
+
+inline void Trajectory::mark_lost(void) { state = Lost; }
+
+inline void Trajectory::mark_removed(void) { state = Removed; }
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/include/utils.h
+++ b/deploy/pptracking/cpp/include/utils.h
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <ctime>
+#include <numeric>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "include/tracker.h"
+
+namespace PaddleDetection {
+
+struct Rect {
+  float left;
+  float top;
+  float right;
+  float bottom;
+};
+
+struct MOTTrack {
+  int ids;
+  float score;
+  Rect rects;
+  int class_id = -1;
+};
+
+typedef std::vector<MOTTrack> MOTResult;
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/scripts/build.sh
+++ b/deploy/pptracking/cpp/scripts/build.sh
+# 是否使用GPU(即是否使用 CUDA)
+WITH_GPU=OFF
+
+# 是否使用MKL or openblas，TX2需要设置为OFF
+WITH_MKL=ON
+
+# 是否集成 TensorRT(仅WITH_GPU=ON 有效)
+WITH_TENSORRT=OFF
+
+# paddle 预测库lib名称，由于不同平台不同版本预测库lib名称不同，请查看所下载的预测库中`paddle_inference/lib/`文件夹下`lib`的名称
+PADDLE_LIB_NAME=libpaddle_inference
+
+# TensorRT 的include路径
+TENSORRT_INC_DIR=/path/to/tensorrt/include
+
+# TensorRT 的lib路径
+TENSORRT_LIB_DIR=/path/to/tensorrt/lib
+
+# Paddle 预测库路径
+PADDLE_DIR=/path/to/paddle_inference
+
+# CUDA 的 lib 路径
+CUDA_LIB=/path/to/cuda/lib
+
+# CUDNN 的 lib 路径
+CUDNN_LIB=/path/to/cudnn/lib
+
+MACHINE_TYPE=`uname -m`
+echo "MACHINE_TYPE: "${MACHINE_TYPE}
+
+
+if [ "$MACHINE_TYPE" = "x86_64" ]
+then
+  echo "set OPENCV_DIR for x86_64"
+  # linux系统通过以下命令下载预编译的opencv
+  mkdir -p $(pwd)/deps && cd $(pwd)/deps
+  wget -c https://paddledet.bj.bcebos.com/data/opencv-3.4.16_gcc8.2_ffmpeg.tar.gz
+  tar -xvf opencv-3.4.16_gcc8.2_ffmpeg.tar.gz && cd ..
+
+  # set OPENCV_DIR
+  OPENCV_DIR=$(pwd)/deps/opencv-3.4.16_gcc8.2_ffmpeg
+
+elif [ "$MACHINE_TYPE" = "aarch64" ]
+then
+  echo "set OPENCV_DIR for aarch64"
+  # TX2平台通过以下命令下载预编译的opencv
+  mkdir -p $(pwd)/deps && cd $(pwd)/deps
+  wget -c https://bj.bcebos.com/v1/paddledet/data/TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0.tar.gz
+  tar -xvf TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0.tar.gz && cd ..
+
+  # set OPENCV_DIR
+  OPENCV_DIR=$(pwd)/deps/TX2_JetPack4.3_opencv_3.4.6_gcc7.5.0/
+
+else
+  echo "Please set OPENCV_DIR manually"
+fi
+
+echo "OPENCV_DIR: "$OPENCV_DIR
+
+# 以下无需改动
+rm -rf build
+mkdir -p build
+cd build
+cmake .. \
+    -DWITH_GPU=${WITH_GPU} \
+    -DWITH_MKL=${WITH_MKL} \
+    -DWITH_TENSORRT=${WITH_TENSORRT} \
+    -DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR} \
+    -DTENSORRT_INC_DIR=${TENSORRT_INC_DIR} \
+    -DPADDLE_DIR=${PADDLE_DIR} \
+    -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \
+    -DCUDA_LIB=${CUDA_LIB} \
+    -DCUDNN_LIB=${CUDNN_LIB} \
+    -DOPENCV_DIR=${OPENCV_DIR} \
+    -DPADDLE_LIB_NAME=${PADDLE_LIB_NAME} \
+
+make
+echo "make finished!"
--- a/deploy/pptracking/cpp/src/jde_predictor.cc
+++ b/deploy/pptracking/cpp/src/jde_predictor.cc
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+// for setprecision
+#include <chrono>
+#include <iomanip>
+#include "include/jde_predictor.h"
+
+using namespace paddle_infer;  // NOLINT
+
+namespace PaddleDetection {
+
+// Load Model and create model predictor
+void JDEPredictor::LoadModel(const std::string& model_dir,
+                             const std::string& run_mode) {
+  paddle_infer::Config config;
+  std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel";
+  std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams";
+  config.SetModel(prog_file, params_file);
+  if (this->device_ == "GPU") {
+    config.EnableUseGpu(200, this->gpu_id_);
+    config.SwitchIrOptim(true);
+    // use tensorrt
+    if (run_mode != "fluid") {
+      auto precision = paddle_infer::Config::Precision::kFloat32;
+      if (run_mode == "trt_fp32") {
+        precision = paddle_infer::Config::Precision::kFloat32;
+      } else if (run_mode == "trt_fp16") {
+        precision = paddle_infer::Config::Precision::kHalf;
+      } else if (run_mode == "trt_int8") {
+        precision = paddle_infer::Config::Precision::kInt8;
+      } else {
+        printf(
+            "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'");
+      }
+      // set tensorrt
+      config.EnableTensorRtEngine(1 << 30,
+                                  1,
+                                  this->min_subgraph_size_,
+                                  precision,
+                                  false,
+                                  this->trt_calib_mode_);
+    }
+  } else if (this->device_ == "XPU") {
+    config.EnableXpu(10 * 1024 * 1024);
+  } else {
+    config.DisableGpu();
+    if (this->use_mkldnn_) {
+      config.EnableMKLDNN();
+      // cache 10 different shapes for mkldnn to avoid memory leak
+      config.SetMkldnnCacheCapacity(10);
+    }
+    config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
+  }
+  config.SwitchUseFeedFetchOps(false);
+  config.SwitchIrOptim(true);
+  config.DisableGlogInfo();
+  // Memory optimization
+  config.EnableMemoryOptim();
+  predictor_ = std::move(CreatePredictor(config));
+}
+
+void FilterDets(const float conf_thresh,
+                const cv::Mat dets,
+                std::vector<int>* index) {
+  for (int i = 0; i < dets.rows; ++i) {
+    float score = *dets.ptr<float>(i, 4);
+    if (score > conf_thresh) {
+      index->push_back(i);
+    }
+  }
+}
+
+void JDEPredictor::Preprocess(const cv::Mat& ori_im) {
+  // Clone the image : keep the original mat for postprocess
+  cv::Mat im = ori_im.clone();
+  preprocessor_.Run(&im, &inputs_);
+}
+
+void JDEPredictor::Postprocess(const cv::Mat dets,
+                               const cv::Mat emb,
+                               MOTResult* result) {
+  result->clear();
+  std::vector<Track> tracks;
+  std::vector<int> valid;
+  FilterDets(conf_thresh_, dets, &valid);
+  cv::Mat new_dets, new_emb;
+  for (int i = 0; i < valid.size(); ++i) {
+    new_dets.push_back(dets.row(valid[i]));
+    new_emb.push_back(emb.row(valid[i]));
+  }
+  JDETracker::instance()->update(new_dets, new_emb, &tracks);
+  if (tracks.size() == 0) {
+    MOTTrack mot_track;
+    Rect ret = {*dets.ptr<float>(0, 0),
+                *dets.ptr<float>(0, 1),
+                *dets.ptr<float>(0, 2),
+                *dets.ptr<float>(0, 3)};
+    mot_track.ids = 1;
+    mot_track.score = *dets.ptr<float>(0, 4);
+    mot_track.rects = ret;
+    result->push_back(mot_track);
+  } else {
+    std::vector<Track>::iterator titer;
+    for (titer = tracks.begin(); titer != tracks.end(); ++titer) {
+      if (titer->score < threshold_) {
+        continue;
+      } else {
+        float w = titer->ltrb[2] - titer->ltrb[0];
+        float h = titer->ltrb[3] - titer->ltrb[1];
+        bool vertical = w / h > 1.6;
+        float area = w * h;
+        if (area > min_box_area_ && !vertical) {
+          MOTTrack mot_track;
+          Rect ret = {
+              titer->ltrb[0], titer->ltrb[1], titer->ltrb[2], titer->ltrb[3]};
+          mot_track.rects = ret;
+          mot_track.score = titer->score;
+          mot_track.ids = titer->id;
+          result->push_back(mot_track);
+        }
+      }
+    }
+  }
+}
+
+void JDEPredictor::Predict(const std::vector<cv::Mat> imgs,
+                           const double threshold,
+                           MOTResult* result,
+                           std::vector<double>* times) {
+  auto preprocess_start = std::chrono::steady_clock::now();
+  int batch_size = imgs.size();
+
+  // in_data_batch
+  std::vector<float> in_data_all;
+  std::vector<float> im_shape_all(batch_size * 2);
+  std::vector<float> scale_factor_all(batch_size * 2);
+
+  // Preprocess image
+  for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
+    cv::Mat im = imgs.at(bs_idx);
+    Preprocess(im);
+    im_shape_all[bs_idx * 2] = inputs_.im_shape_[0];
+    im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1];
+
+    scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
+    scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
+
+    in_data_all.insert(
+        in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end());
+  }
+
+  // Prepare input tensor
+  auto input_names = predictor_->GetInputNames();
+  for (const auto& tensor_name : input_names) {
+    auto in_tensor = predictor_->GetInputHandle(tensor_name);
+    if (tensor_name == "image") {
+      int rh = inputs_.in_net_shape_[0];
+      int rw = inputs_.in_net_shape_[1];
+      in_tensor->Reshape({batch_size, 3, rh, rw});
+      in_tensor->CopyFromCpu(in_data_all.data());
+    } else if (tensor_name == "im_shape") {
+      in_tensor->Reshape({batch_size, 2});
+      in_tensor->CopyFromCpu(im_shape_all.data());
+    } else if (tensor_name == "scale_factor") {
+      in_tensor->Reshape({batch_size, 2});
+      in_tensor->CopyFromCpu(scale_factor_all.data());
+    }
+  }
+
+  auto preprocess_end = std::chrono::steady_clock::now();
+  std::vector<int> bbox_shape;
+  std::vector<int> emb_shape;
+
+  // Run predictor
+  auto inference_start = std::chrono::steady_clock::now();
+  predictor_->Run();
+  // Get output tensor
+  auto output_names = predictor_->GetOutputNames();
+  auto bbox_tensor = predictor_->GetOutputHandle(output_names[0]);
+  bbox_shape = bbox_tensor->shape();
+  auto emb_tensor = predictor_->GetOutputHandle(output_names[1]);
+  emb_shape = emb_tensor->shape();
+  // Calculate bbox length
+  int bbox_size = 1;
+  for (int j = 0; j < bbox_shape.size(); ++j) {
+    bbox_size *= bbox_shape[j];
+  }
+  // Calculate emb length
+  int emb_size = 1;
+  for (int j = 0; j < emb_shape.size(); ++j) {
+    emb_size *= emb_shape[j];
+  }
+
+  bbox_data_.resize(bbox_size);
+  bbox_tensor->CopyToCpu(bbox_data_.data());
+
+  emb_data_.resize(emb_size);
+  emb_tensor->CopyToCpu(emb_data_.data());
+  auto inference_end = std::chrono::steady_clock::now();
+
+  // Postprocessing result
+  auto postprocess_start = std::chrono::steady_clock::now();
+  result->clear();
+
+  cv::Mat dets(bbox_shape[0], 6, CV_32FC1, bbox_data_.data());
+  cv::Mat emb(bbox_shape[0], emb_shape[1], CV_32FC1, emb_data_.data());
+
+  Postprocess(dets, emb, result);
+
+  auto postprocess_end = std::chrono::steady_clock::now();
+
+  std::chrono::duration<float> preprocess_diff =
+      preprocess_end - preprocess_start;
+  (*times)[0] += static_cast<double>(preprocess_diff.count() * 1000);
+  std::chrono::duration<float> inference_diff = inference_end - inference_start;
+  (*times)[1] += static_cast<double>(inference_diff.count() * 1000);
+  std::chrono::duration<float> postprocess_diff =
+      postprocess_end - postprocess_start;
+  (*times)[2] += static_cast<double>(postprocess_diff.count() * 1000);
+}
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/src/lapjv.cpp
+++ b/deploy/pptracking/cpp/src/lapjv.cpp
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/gatagat/lap/blob/master/lap/lapjv.cpp
+// Ths copyright of gatagat/lap is as follows:
+// MIT License
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "include/lapjv.h"
+
+namespace PaddleDetection {
+
+/** Column-reduction and reduction transfer for a dense cost matrix.
+ */
+int _ccrrt_dense(
+    const int n, float *cost[], int *free_rows, int *x, int *y, float *v) {
+  int n_free_rows;
+  bool *unique;
+
+  for (int i = 0; i < n; i++) {
+    x[i] = -1;
+    v[i] = LARGE;
+    y[i] = 0;
+  }
+  for (int i = 0; i < n; i++) {
+    for (int j = 0; j < n; j++) {
+      const float c = cost[i][j];
+      if (c < v[j]) {
+        v[j] = c;
+        y[j] = i;
+      }
+    }
+  }
+  NEW(unique, bool, n);
+  memset(unique, TRUE, n);
+  {
+    int j = n;
+    do {
+      j--;
+      const int i = y[j];
+      if (x[i] < 0) {
+        x[i] = j;
+      } else {
+        unique[i] = FALSE;
+        y[j] = -1;
+      }
+    } while (j > 0);
+  }
+  n_free_rows = 0;
+  for (int i = 0; i < n; i++) {
+    if (x[i] < 0) {
+      free_rows[n_free_rows++] = i;
+    } else if (unique[i]) {
+      const int j = x[i];
+      float min = LARGE;
+      for (int j2 = 0; j2 < n; j2++) {
+        if (j2 == static_cast<int>(j)) {
+          continue;
+        }
+        const float c = cost[i][j2] - v[j2];
+        if (c < min) {
+          min = c;
+        }
+      }
+      v[j] -= min;
+    }
+  }
+  FREE(unique);
+  return n_free_rows;
+}
+
+/** Augmenting row reduction for a dense cost matrix.
+ */
+int _carr_dense(const int n,
+                float *cost[],
+                const int n_free_rows,
+                int *free_rows,
+                int *x,
+                int *y,
+                float *v) {
+  int current = 0;
+  int new_free_rows = 0;
+  int rr_cnt = 0;
+  while (current < n_free_rows) {
+    int i0;
+    int j1, j2;
+    float v1, v2, v1_new;
+    bool v1_lowers;
+
+    rr_cnt++;
+    const int free_i = free_rows[current++];
+    j1 = 0;
+    v1 = cost[free_i][0] - v[0];
+    j2 = -1;
+    v2 = LARGE;
+    for (int j = 1; j < n; j++) {
+      const float c = cost[free_i][j] - v[j];
+      if (c < v2) {
+        if (c >= v1) {
+          v2 = c;
+          j2 = j;
+        } else {
+          v2 = v1;
+          v1 = c;
+          j2 = j1;
+          j1 = j;
+        }
+      }
+    }
+    i0 = y[j1];
+    v1_new = v[j1] - (v2 - v1);
+    v1_lowers = v1_new < v[j1];
+    if (rr_cnt < current * n) {
+      if (v1_lowers) {
+        v[j1] = v1_new;
+      } else if (i0 >= 0 && j2 >= 0) {
+        j1 = j2;
+        i0 = y[j2];
+      }
+      if (i0 >= 0) {
+        if (v1_lowers) {
+          free_rows[--current] = i0;
+        } else {
+          free_rows[new_free_rows++] = i0;
+        }
+      }
+    } else {
+      if (i0 >= 0) {
+        free_rows[new_free_rows++] = i0;
+      }
+    }
+    x[free_i] = j1;
+    y[j1] = free_i;
+  }
+  return new_free_rows;
+}
+
+/** Find columns with minimum d[j] and put them on the SCAN list.
+ */
+int _find_dense(const int n, int lo, float *d, int *cols, int *y) {
+  int hi = lo + 1;
+  float mind = d[cols[lo]];
+  for (int k = hi; k < n; k++) {
+    int j = cols[k];
+    if (d[j] <= mind) {
+      if (d[j] < mind) {
+        hi = lo;
+        mind = d[j];
+      }
+      cols[k] = cols[hi];
+      cols[hi++] = j;
+    }
+  }
+  return hi;
+}
+
+// Scan all columns in TODO starting from arbitrary column in SCAN
+// and try to decrease d of the TODO columns using the SCAN column.
+int _scan_dense(const int n,
+                float *cost[],
+                int *plo,
+                int *phi,
+                float *d,
+                int *cols,
+                int *pred,
+                int *y,
+                float *v) {
+  int lo = *plo;
+  int hi = *phi;
+  float h, cred_ij;
+
+  while (lo != hi) {
+    int j = cols[lo++];
+    const int i = y[j];
+    const float mind = d[j];
+    h = cost[i][j] - v[j] - mind;
+    // For all columns in TODO
+    for (int k = hi; k < n; k++) {
+      j = cols[k];
+      cred_ij = cost[i][j] - v[j] - h;
+      if (cred_ij < d[j]) {
+        d[j] = cred_ij;
+        pred[j] = i;
+        if (cred_ij == mind) {
+          if (y[j] < 0) {
+            return j;
+          }
+          cols[k] = cols[hi];
+          cols[hi++] = j;
+        }
+      }
+    }
+  }
+  *plo = lo;
+  *phi = hi;
+  return -1;
+}
+
+/** Single iteration of modified Dijkstra shortest path algorithm as explained
+ * in the JV paper.
+ *
+ * This is a dense matrix version.
+ *
+ * \return The closest free column index.
+ */
+int find_path_dense(const int n,
+                    float *cost[],
+                    const int start_i,
+                    int *y,
+                    float *v,
+                    int *pred) {
+  int lo = 0, hi = 0;
+  int final_j = -1;
+  int n_ready = 0;
+  int *cols;
+  float *d;
+
+  NEW(cols, int, n);
+  NEW(d, float, n);
+
+  for (int i = 0; i < n; i++) {
+    cols[i] = i;
+    pred[i] = start_i;
+    d[i] = cost[start_i][i] - v[i];
+  }
+  while (final_j == -1) {
+    // No columns left on the SCAN list.
+    if (lo == hi) {
+      n_ready = lo;
+      hi = _find_dense(n, lo, d, cols, y);
+      for (int k = lo; k < hi; k++) {
+        const int j = cols[k];
+        if (y[j] < 0) {
+          final_j = j;
+        }
+      }
+    }
+    if (final_j == -1) {
+      final_j = _scan_dense(n, cost, &lo, &hi, d, cols, pred, y, v);
+    }
+  }
+
+  {
+    const float mind = d[cols[lo]];
+    for (int k = 0; k < n_ready; k++) {
+      const int j = cols[k];
+      v[j] += d[j] - mind;
+    }
+  }
+
+  FREE(cols);
+  FREE(d);
+
+  return final_j;
+}
+
+/** Augment for a dense cost matrix.
+ */
+int _ca_dense(const int n,
+              float *cost[],
+              const int n_free_rows,
+              int *free_rows,
+              int *x,
+              int *y,
+              float *v) {
+  int *pred;
+
+  NEW(pred, int, n);
+
+  for (int *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) {
+    int i = -1, j;
+    int k = 0;
+
+    j = find_path_dense(n, cost, *pfree_i, y, v, pred);
+    while (i != *pfree_i) {
+      i = pred[j];
+      y[j] = i;
+      SWAP_INDICES(j, x[i]);
+      k++;
+    }
+  }
+  FREE(pred);
+  return 0;
+}
+
+/** Solve dense sparse LAP.
+ */
+int lapjv_internal(const cv::Mat &cost,
+                   const bool extend_cost,
+                   const float cost_limit,
+                   int *x,
+                   int *y) {
+  int n_rows = cost.rows;
+  int n_cols = cost.cols;
+  int n;
+  if (n_rows == n_cols) {
+    n = n_rows;
+  } else if (!extend_cost) {
+    throw std::invalid_argument(
+        "Square cost array expected. If cost is intentionally non-square, pass "
+        "extend_cost=True.");
+  }
+
+  // Get extend cost
+  if (extend_cost || cost_limit < LARGE) {
+    n = n_rows + n_cols;
+  }
+  cv::Mat cost_expand(n, n, CV_32F);
+  float expand_value;
+  if (cost_limit < LARGE) {
+    expand_value = cost_limit / 2;
+  } else {
+    double max_v;
+    minMaxLoc(cost, nullptr, &max_v);
+    expand_value = static_cast<float>(max_v) + 1.;
+  }
+
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      cost_expand.at<float>(i, j) = expand_value;
+      if (i >= n_rows && j >= n_cols) {
+        cost_expand.at<float>(i, j) = 0;
+      } else if (i < n_rows && j < n_cols) {
+        cost_expand.at<float>(i, j) = cost.at<float>(i, j);
+      }
+    }
+  }
+
+  // Convert Mat to pointer array
+  float **cost_ptr;
+  NEW(cost_ptr, float *, n);
+  for (int i = 0; i < n; ++i) {
+    NEW(cost_ptr[i], float, n);
+  }
+  for (int i = 0; i < n; ++i) {
+    for (int j = 0; j < n; ++j) {
+      cost_ptr[i][j] = cost_expand.at<float>(i, j);
+    }
+  }
+
+  int ret;
+  int *free_rows;
+  float *v;
+  int *x_c;
+  int *y_c;
+
+  NEW(free_rows, int, n);
+  NEW(v, float, n);
+  NEW(x_c, int, n);
+  NEW(y_c, int, n);
+
+  ret = _ccrrt_dense(n, cost_ptr, free_rows, x_c, y_c, v);
+  int i = 0;
+  while (ret > 0 && i < 2) {
+    ret = _carr_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v);
+    i++;
+  }
+  if (ret > 0) {
+    ret = _ca_dense(n, cost_ptr, ret, free_rows, x_c, y_c, v);
+  }
+  FREE(v);
+  FREE(free_rows);
+  for (int i = 0; i < n; ++i) {
+    FREE(cost_ptr[i]);
+  }
+  FREE(cost_ptr);
+  if (ret != 0) {
+    if (ret == -1) {
+      throw "Out of memory.";
+    }
+    throw "Unknown error (lapjv_internal)";
+  }
+  // Get output of x, y, opt
+  for (int i = 0; i < n; ++i) {
+    if (i < n_rows) {
+      x[i] = x_c[i];
+      if (x[i] >= n_cols) {
+        x[i] = -1;
+      }
+    }
+    if (i < n_cols) {
+      y[i] = y_c[i];
+      if (y[i] >= n_rows) {
+        y[i] = -1;
+      }
+    }
+  }
+
+  FREE(x_c);
+  FREE(y_c);
+  return ret;
+}
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/src/main.cc
+++ b/deploy/pptracking/cpp/src/main.cc
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <glog/logging.h>
+
+#include <math.h>
+#include <sys/types.h>
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#ifdef _WIN32
+#include <direct.h>
+#include <io.h>
+#else
+#include <stdarg.h>
+#include <sys/stat.h>
+#endif
+
+#include <gflags/gflags.h>
+#include "include/pipeline.h"
+
+DEFINE_string(video_file, "", "Path of input video.");
+DEFINE_string(video_other_file,
+              "",
+              "Path of other input video used for MTMCT.");
+DEFINE_string(device,
+              "CPU",
+              "Choose the device you want to run, it can be: CPU/GPU/XPU, "
+              "default is CPU.");
+DEFINE_double(threshold, 0.5, "Threshold of score.");
+DEFINE_string(output_dir, "output", "Directory of output visualization files.");
+DEFINE_string(run_mode,
+              "fluid",
+              "Mode of running(fluid/trt_fp32/trt_fp16/trt_int8)");
+DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
+DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU");
+DEFINE_int32(cpu_threads, 1, "Num of threads with CPU");
+DEFINE_bool(trt_calib_mode,
+            false,
+            "If the model is produced by TRT offline quantitative calibration, "
+            "trt_calib_mode need to set True");
+DEFINE_bool(tiny_obj, false, "Whether tracking tiny object");
+DEFINE_bool(do_entrance_counting,
+            false,
+            "Whether counting the numbers of identifiers entering "
+            "or getting out from the entrance.");
+DEFINE_int32(secs_interval, 10, "The seconds interval to count after tracking");
+DEFINE_bool(save_result, false, "Whether saving result after tracking");
+DEFINE_string(
+    scene,
+    "",
+    "scene of tracking system, it can be : pedestrian/vehicle/multiclass");
+DEFINE_bool(is_mtmct, false, "Whether use multi-target multi-camera tracking");
+DEFINE_string(track_model_dir, "", "Path of tracking model");
+DEFINE_string(det_model_dir, "", "Path of detection model");
+DEFINE_string(reid_model_dir, "", "Path of reid model");
+
+static std::string DirName(const std::string& filepath) {
+  auto pos = filepath.rfind(OS_PATH_SEP);
+  if (pos == std::string::npos) {
+    return "";
+  }
+  return filepath.substr(0, pos);
+}
+
+static bool PathExists(const std::string& path) {
+#ifdef _WIN32
+  struct _stat buffer;
+  return (_stat(path.c_str(), &buffer) == 0);
+#else
+  struct stat buffer;
+  return (stat(path.c_str(), &buffer) == 0);
+#endif  // !_WIN32
+}
+
+static void MkDir(const std::string& path) {
+  if (PathExists(path)) return;
+  int ret = 0;
+#ifdef _WIN32
+  ret = _mkdir(path.c_str());
+#else
+  ret = mkdir(path.c_str(), 0755);
+#endif  // !_WIN32
+  if (ret != 0) {
+    std::string path_error(path);
+    path_error += " mkdir failed!";
+    throw std::runtime_error(path_error);
+  }
+}
+
+static void MkDirs(const std::string& path) {
+  if (path.empty()) return;
+  if (PathExists(path)) return;
+
+  MkDirs(DirName(path));
+  MkDir(path);
+}
+
+int main(int argc, char** argv) {
+  // Parsing command-line
+  google::ParseCommandLineFlags(&argc, &argv, true);
+  bool has_model_dir =
+      !(FLAGS_track_model_dir.empty() && FLAGS_det_model_dir.empty() &&
+        FLAGS_reid_model_dir.empty());
+  if (FLAGS_video_file.empty() || (FLAGS_scene.empty() && !has_model_dir)) {
+    LOG(ERROR) << "Usage: \n"
+               << "1. ./main -video_file=/PATH/TO/INPUT/IMAGE/ "
+               << "-scene=pedestrian/vehicle/multiclass\n"
+               << "2. ./main -video_file=/PATH/TO/INPUT/IMAGE/ "
+               << "-track_model_dir=/PATH/TO/MODEL_DIR" << std::endl;
+
+    return -1;
+  }
+  if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32" ||
+        FLAGS_run_mode == "trt_fp16" || FLAGS_run_mode == "trt_int8")) {
+    LOG(ERROR)
+        << "run_mode should be 'fluid', 'trt_fp32', 'trt_fp16' or 'trt_int8'.";
+    return -1;
+  }
+  transform(FLAGS_device.begin(),
+            FLAGS_device.end(),
+            FLAGS_device.begin(),
+            ::toupper);
+  if (!(FLAGS_device == "CPU" || FLAGS_device == "GPU" ||
+        FLAGS_device == "XPU")) {
+    LOG(ERROR) << "device should be 'CPU', 'GPU' or 'XPU'.";
+    return -1;
+  }
+
+  if (!PathExists(FLAGS_output_dir)) {
+    MkDirs(FLAGS_output_dir);
+  }
+
+  PaddleDetection::Pipeline pipeline(FLAGS_device,
+                                     FLAGS_threshold,
+                                     FLAGS_output_dir,
+                                     FLAGS_run_mode,
+                                     FLAGS_gpu_id,
+                                     FLAGS_use_mkldnn,
+                                     FLAGS_cpu_threads,
+                                     FLAGS_trt_calib_mode,
+                                     FLAGS_do_entrance_counting,
+                                     FLAGS_save_result,
+                                     FLAGS_scene,
+                                     FLAGS_tiny_obj,
+                                     FLAGS_is_mtmct,
+                                     FLAGS_secs_interval,
+                                     FLAGS_track_model_dir,
+                                     FLAGS_det_model_dir,
+                                     FLAGS_reid_model_dir);
+
+  pipeline.SetInput(FLAGS_video_file);
+  if (!FLAGS_video_other_file.empty()) {
+    pipeline.SetInput(FLAGS_video_other_file);
+  }
+  pipeline.Run();
+  return 0;
+}
--- a/deploy/pptracking/cpp/src/pipeline.cc
+++ b/deploy/pptracking/cpp/src/pipeline.cc
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sstream>
+// for setprecision
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <string>
+
+#include "include/pipeline.h"
+#include "include/postprocess.h"
+#include "include/predictor.h"
+
+namespace PaddleDetection {
+
+void Pipeline::SetInput(const std::string& input_video) {
+  input_.push_back(input_video);
+}
+
+void Pipeline::ClearInput() {
+  input_.clear();
+  stream_.clear();
+}
+
+void Pipeline::SelectModel(const std::string& scene,
+                           const bool tiny_obj,
+                           const bool is_mtmct,
+                           const std::string track_model_dir,
+                           const std::string det_model_dir,
+                           const std::string reid_model_dir) {
+  // model_dir has higher priority
+  if (!track_model_dir.empty()) {
+    track_model_dir_ = track_model_dir;
+    return;
+  }
+  if (!det_model_dir.empty() && !reid_model_dir.empty()) {
+    det_model_dir_ = det_model_dir;
+    reid_model_dir_ = reid_model_dir;
+    return;
+  }
+
+  // Single camera model, based on FairMot
+  if (scene == "pedestrian") {
+    if (tiny_obj) {
+      track_model_dir_ = "../pedestrian_track_tiny";
+    } else {
+      track_model_dir_ = "../pedestrian_track";
+    }
+  } else if (scene != "vehicle") {
+    if (tiny_obj) {
+      track_model_dir_ = "../vehicle_track_tiny";
+    } else {
+      track_model_dir_ = "../vehicle_track";
+    }
+  } else if (scene == "multiclass") {
+    if (tiny_obj) {
+      track_model_dir_ = "../multiclass_track_tiny";
+    } else {
+      track_model_dir_ = "../multiclass_track";
+    }
+  }
+
+  // Multi-camera model, based on PicoDet & LCNet
+  if (is_mtmct && scene == "pedestrian") {
+    det_model_dir_ = "../pedestrian_det";
+    reid_model_dir_ = "../pedestrian_reid";
+  } else if (is_mtmct && scene == "vehicle") {
+    det_model_dir_ = "../vehicle_det";
+    reid_model_dir_ = "../vehicle_reid";
+  } else if (is_mtmct && scene == "multiclass") {
+    throw "Multi-camera tracking is not supported in multiclass scene now.";
+  }
+}
+
+void Pipeline::InitPredictor() {
+  if (track_model_dir_.empty() && det_model_dir_.empty()) {
+    throw "Predictor must receive track_model or det_model!";
+  }
+
+  if (!track_model_dir_.empty()) {
+    jde_sct_ = std::make_shared<PaddleDetection::JDEPredictor>(device_,
+                                                               track_model_dir_,
+                                                               threshold_,
+                                                               run_mode_,
+                                                               gpu_id_,
+                                                               use_mkldnn_,
+                                                               cpu_threads_,
+                                                               trt_calib_mode_);
+  }
+  if (!det_model_dir_.empty()) {
+    sde_sct_ = std::make_shared<PaddleDetection::SDEPredictor>(device_,
+                                                               det_model_dir_,
+                                                               reid_model_dir_,
+                                                               threshold_,
+                                                               run_mode_,
+                                                               gpu_id_,
+                                                               use_mkldnn_,
+                                                               cpu_threads_,
+                                                               trt_calib_mode_);
+  }
+}
+
+void Pipeline::Run() {
+  if (track_model_dir_.empty() && det_model_dir_.empty()) {
+    LOG(ERROR) << "Pipeline must use SelectModel before Run";
+    return;
+  }
+  if (input_.size() == 0) {
+    LOG(ERROR) << "Pipeline must use SetInput before Run";
+    return;
+  }
+
+  if (!track_model_dir_.empty()) {
+    // single camera
+    if (input_.size() > 1) {
+      throw "Single camera tracking except single video, but received %d",
+          input_.size();
+    }
+    PredictMOT(input_[0]);
+  } else {
+    // multi cameras
+    if (input_.size() != 2) {
+      throw "Multi camera tracking except two videos, but received %d",
+          input_.size();
+    }
+    PredictMTMCT(input_);
+  }
+}
+
+void Pipeline::PredictMOT(const std::string& video_path) {
+  // Open video
+  cv::VideoCapture capture;
+  capture.open(video_path.c_str());
+  if (!capture.isOpened()) {
+    printf("can not open video : %s\n", video_path.c_str());
+    return;
+  }
+
+  // Get Video info : resolution, fps
+  int video_width = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_WIDTH));
+  int video_height = static_cast<int>(capture.get(CV_CAP_PROP_FRAME_HEIGHT));
+  int video_fps = static_cast<int>(capture.get(CV_CAP_PROP_FPS));
+
+  LOG(INFO) << "----------------------- Input info -----------------------";
+  LOG(INFO) << "video_width: " << video_width;
+  LOG(INFO) << "video_height: " << video_height;
+  LOG(INFO) << "input fps: " << video_fps;
+
+  // Create VideoWriter for output
+  cv::VideoWriter video_out;
+  std::string video_out_path = output_dir_ + OS_PATH_SEP + "mot_output.mp4";
+  int fcc = cv::VideoWriter::fourcc('m', 'p', '4', 'v');
+  video_out.open(video_out_path.c_str(),
+                 fcc,  // 0x00000021,
+                 video_fps,
+                 cv::Size(video_width, video_height),
+                 true);
+  if (!video_out.isOpened()) {
+    printf("create video writer failed!\n");
+    return;
+  }
+
+  PaddleDetection::MOTResult result;
+  std::vector<double> det_times(3);
+  std::set<int> id_set;
+  std::set<int> interval_id_set;
+  std::vector<int> in_id_list;
+  std::vector<int> out_id_list;
+  std::map<int, std::vector<float>> prev_center;
+  Rect entrance = {0,
+                   static_cast<float>(video_height) / 2,
+                   static_cast<float>(video_width),
+                   static_cast<float>(video_height) / 2};
+  double times;
+  double total_time;
+  // Capture all frames and do inference
+  cv::Mat frame;
+  int frame_id = 0;
+
+  std::vector<std::string> records;
+  std::vector<std::string> flow_records;
+  records.push_back("result format: frame_id, track_id, x1, y1, w, h\n");
+
+  LOG(INFO) << "------------------- Predict info ------------------------";
+  while (capture.read(frame)) {
+    if (frame.empty()) {
+      break;
+    }
+    std::vector<cv::Mat> imgs;
+    imgs.push_back(frame);
+    jde_sct_->Predict(imgs, threshold_, &result, &det_times);
+    frame_id += 1;
+    total_time = std::accumulate(det_times.begin(), det_times.end(), 0.);
+    times = total_time / frame_id;
+
+    LOG(INFO) << "frame_id: " << frame_id
+              << " predict time(s): " << total_time / 1000;
+
+    cv::Mat out_img = PaddleDetection::VisualizeTrackResult(
+        frame, result, 1000. / times, frame_id);
+
+    // TODO(qianhui): the entrance line can be set by users
+    PaddleDetection::FlowStatistic(result,
+                                   frame_id,
+                                   secs_interval_,
+                                   do_entrance_counting_,
+                                   video_fps,
+                                   entrance,
+                                   &id_set,
+                                   &interval_id_set,
+                                   &in_id_list,
+                                   &out_id_list,
+                                   &prev_center,
+                                   &flow_records);
+
+    if (save_result_) {
+      PaddleDetection::SaveMOTResult(result, frame_id, &records);
+    }
+
+    // Draw the entrance line
+    if (do_entrance_counting_) {
+      float line_thickness = std::max(1, static_cast<int>(video_width / 500.));
+      cv::Point pt1 = cv::Point(entrance.left, entrance.top);
+      cv::Point pt2 = cv::Point(entrance.right, entrance.bottom);
+      cv::line(out_img, pt1, pt2, cv::Scalar(0, 255, 255), line_thickness);
+    }
+    video_out.write(out_img);
+  }
+  capture.release();
+  video_out.release();
+  PrintBenchmarkLog(det_times, frame_id);
+  LOG(INFO) << "-------------------- Final Output info -------------------";
+  LOG(INFO) << "Total frame: " << frame_id;
+  LOG(INFO) << "Visualized output saved as " << video_out_path.c_str();
+  if (save_result_) {
+    FILE* fp;
+
+    std::string result_output_path =
+        output_dir_ + OS_PATH_SEP + "mot_output.txt";
+    if ((fp = fopen(result_output_path.c_str(), "w+")) == NULL) {
+      printf("Open %s error.\n", result_output_path.c_str());
+      return;
+    }
+    for (int l; l < records.size(); ++l) {
+      fprintf(fp, records[l].c_str());
+    }
+
+    fclose(fp);
+    LOG(INFO) << "txt result output saved as " << result_output_path.c_str();
+
+    result_output_path = output_dir_ + OS_PATH_SEP + "flow_statistic.txt";
+    if ((fp = fopen(result_output_path.c_str(), "w+")) == NULL) {
+      printf("Open %s error.\n", result_output_path);
+      return;
+    }
+    for (int l; l < flow_records.size(); ++l) {
+      fprintf(fp, flow_records[l].c_str());
+    }
+    fclose(fp);
+    LOG(INFO) << "txt flow statistic saved as " << result_output_path.c_str();
+  }
+}
+
+void Pipeline::PredictMTMCT(const std::vector<std::string> video_path) {
+  throw "Not Implement!";
+}
+
+void Pipeline::RunMOTStream(const cv::Mat img,
+                            const int frame_id,
+                            const int video_fps,
+                            const Rect entrance,
+                            cv::Mat out_img,
+                            std::vector<std::string>* records,
+                            std::set<int>* id_set,
+                            std::set<int>* interval_id_set,
+                            std::vector<int>* in_id_list,
+                            std::vector<int>* out_id_list,
+                            std::map<int, std::vector<float>>* prev_center,
+                            std::vector<std::string>* flow_records) {
+  PaddleDetection::MOTResult result;
+  std::vector<double> det_times(3);
+  double times;
+  double total_time;
+
+  LOG(INFO) << "------------------- Predict info ------------------------";
+  std::vector<cv::Mat> imgs;
+  imgs.push_back(img);
+  jde_sct_->Predict(imgs, threshold_, &result, &det_times);
+  total_time = std::accumulate(det_times.begin(), det_times.end(), 0.);
+  times = total_time / frame_id;
+
+  LOG(INFO) << "frame_id: " << frame_id
+            << " predict time(s): " << total_time / 1000;
+
+  out_img = PaddleDetection::VisualizeTrackResult(
+      img, result, 1000. / times, frame_id);
+
+  // Count total number
+  // Count in & out number
+  PaddleDetection::FlowStatistic(result,
+                                 frame_id,
+                                 secs_interval_,
+                                 do_entrance_counting_,
+                                 video_fps,
+                                 entrance,
+                                 id_set,
+                                 interval_id_set,
+                                 in_id_list,
+                                 out_id_list,
+                                 prev_center,
+                                 flow_records);
+
+  PrintBenchmarkLog(det_times, frame_id);
+  if (save_result_) {
+    PaddleDetection::SaveMOTResult(result, frame_id, records);
+  }
+}
+
+void Pipeline::RunMTMCTStream(const std::vector<cv::Mat> imgs,
+                              std::vector<std::string>* records) {
+  throw "Not Implement!";
+}
+
+void Pipeline::PrintBenchmarkLog(const std::vector<double> det_time,
+                                 const int img_num) {
+  LOG(INFO) << "----------------------- Config info -----------------------";
+  LOG(INFO) << "runtime_device: " << device_;
+  LOG(INFO) << "ir_optim: "
+            << "True";
+  LOG(INFO) << "enable_memory_optim: "
+            << "True";
+  int has_trt = run_mode_.find("trt");
+  if (has_trt >= 0) {
+    LOG(INFO) << "enable_tensorrt: "
+              << "True";
+    std::string precision = run_mode_.substr(4, 8);
+    LOG(INFO) << "precision: " << precision;
+  } else {
+    LOG(INFO) << "enable_tensorrt: "
+              << "False";
+    LOG(INFO) << "precision: "
+              << "fp32";
+  }
+  LOG(INFO) << "enable_mkldnn: " << (use_mkldnn_ ? "True" : "False");
+  LOG(INFO) << "cpu_math_library_num_threads: " << cpu_threads_;
+  LOG(INFO) << "----------------------- Perf info ------------------------";
+  LOG(INFO) << "Total number of predicted data: " << img_num
+            << " and total time spent(s): "
+            << std::accumulate(det_time.begin(), det_time.end(), 0.) / 1000;
+  int num = std::max(1, img_num);
+  LOG(INFO) << "preproce_time(ms): " << det_time[0] / num
+            << ", inference_time(ms): " << det_time[1] / num
+            << ", postprocess_time(ms): " << det_time[2] / num;
+}
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/src/postprocess.cc
+++ b/deploy/pptracking/cpp/src/postprocess.cc
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sstream>
+// for setprecision
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include "include/postprocess.h"
+
+namespace PaddleDetection {
+
+cv::Scalar GetColor(int idx) {
+  idx = idx * 3;
+  cv::Scalar color =
+      cv::Scalar((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255);
+  return color;
+}
+
+cv::Mat VisualizeTrackResult(const cv::Mat& img,
+                             const MOTResult& results,
+                             const float fps,
+                             const int frame_id) {
+  cv::Mat vis_img = img.clone();
+  int im_h = img.rows;
+  int im_w = img.cols;
+  float text_scale = std::max(1, static_cast<int>(im_w / 1600.));
+  float text_thickness = 2.;
+  float line_thickness = std::max(1, static_cast<int>(im_w / 500.));
+
+  std::ostringstream oss;
+  oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
+  oss << "frame: " << frame_id << " ";
+  oss << "fps: " << fps << " ";
+  oss << "num: " << results.size();
+  std::string text = oss.str();
+
+  cv::Point origin;
+  origin.x = 0;
+  origin.y = static_cast<int>(15 * text_scale);
+  cv::putText(vis_img,
+              text,
+              origin,
+              cv::FONT_HERSHEY_PLAIN,
+              text_scale,
+              (0, 0, 255),
+              2);
+
+  for (int i = 0; i < results.size(); ++i) {
+    const int obj_id = results[i].ids;
+    const float score = results[i].score;
+
+    cv::Scalar color = GetColor(obj_id);
+
+    cv::Point pt1 = cv::Point(results[i].rects.left, results[i].rects.top);
+    cv::Point pt2 = cv::Point(results[i].rects.right, results[i].rects.bottom);
+    cv::Point id_pt =
+        cv::Point(results[i].rects.left, results[i].rects.top + 10);
+    cv::Point score_pt =
+        cv::Point(results[i].rects.left, results[i].rects.top - 10);
+    cv::rectangle(vis_img, pt1, pt2, color, line_thickness);
+
+    std::ostringstream idoss;
+    idoss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
+    idoss << obj_id;
+    std::string id_text = idoss.str();
+
+    cv::putText(vis_img,
+                id_text,
+                id_pt,
+                cv::FONT_HERSHEY_PLAIN,
+                text_scale,
+                cv::Scalar(0, 255, 255),
+                text_thickness);
+
+    std::ostringstream soss;
+    soss << std::setiosflags(std::ios::fixed) << std::setprecision(2);
+    soss << score;
+    std::string score_text = soss.str();
+
+    cv::putText(vis_img,
+                score_text,
+                score_pt,
+                cv::FONT_HERSHEY_PLAIN,
+                text_scale,
+                cv::Scalar(0, 255, 255),
+                text_thickness);
+  }
+  return vis_img;
+}
+
+void FlowStatistic(const MOTResult& results,
+                   const int frame_id,
+                   const int secs_interval,
+                   const bool do_entrance_counting,
+                   const int video_fps,
+                   const Rect entrance,
+                   std::set<int>* id_set,
+                   std::set<int>* interval_id_set,
+                   std::vector<int>* in_id_list,
+                   std::vector<int>* out_id_list,
+                   std::map<int, std::vector<float>>* prev_center,
+                   std::vector<std::string>* records) {
+  if (frame_id == 0) interval_id_set->clear();
+
+  if (do_entrance_counting) {
+    // Count in and out number:
+    // Use horizontal center line as the entrance just for simplification.
+    // If a person located in the above the horizontal center line
+    // at the previous frame and is in the below the line at the current frame,
+    // the in number is increased by one.
+    // If a person was in the below the horizontal center line
+    // at the previous frame and locates in the below the line at the current
+    // frame,
+    // the out number is increased by one.
+    // TODO(qianhui): if the entrance is not the horizontal center line,
+    // the counting method should be optimized.
+
+    float entrance_y = entrance.top;
+    for (const auto& result : results) {
+      float center_x = (result.rects.left + result.rects.right) / 2;
+      float center_y = (result.rects.top + result.rects.bottom) / 2;
+      int ids = result.ids;
+      std::map<int, std::vector<float>>::iterator iter;
+      iter = prev_center->find(ids);
+      if (iter != prev_center->end()) {
+        if (iter->second[1] <= entrance_y && center_y > entrance_y) {
+          in_id_list->push_back(ids);
+        }
+        if (iter->second[1] >= entrance_y && center_y < entrance_y) {
+          out_id_list->push_back(ids);
+        }
+        (*prev_center)[ids][0] = center_x;
+        (*prev_center)[ids][1] = center_y;
+      } else {
+        prev_center->insert(
+            std::pair<int, std::vector<float>>(ids, {center_x, center_y}));
+      }
+    }
+  }
+
+  // Count totol number, number at a manual-setting interval
+  for (const auto& result : results) {
+    id_set->insert(result.ids);
+    interval_id_set->insert(result.ids);
+  }
+
+  std::ostringstream os;
+  os << "Frame id: " << frame_id << ", Total count: " << id_set->size();
+  if (do_entrance_counting) {
+    os << ", In count: " << in_id_list->size()
+       << ", Out count: " << out_id_list->size();
+  }
+
+  // Reset counting at the interval beginning
+  int curr_interval_count = -1;
+  if (frame_id % video_fps == 0 && frame_id / video_fps % secs_interval == 0) {
+    curr_interval_count = interval_id_set->size();
+    os << ", Count during " << secs_interval
+       << " secs: " << curr_interval_count;
+    interval_id_set->clear();
+  }
+  os << "\n";
+  std::string record = os.str();
+  records->push_back(record);
+  LOG(INFO) << record;
+}
+
+void SaveMOTResult(const MOTResult& results,
+                   const int frame_id,
+                   std::vector<std::string>* records) {
+  // result format: frame_id, track_id, x1, y1, w, h
+  std::string record;
+  for (int i = 0; i < results.size(); ++i) {
+    MOTTrack mot_track = results[i];
+    int ids = mot_track.ids;
+    float score = mot_track.score;
+    Rect rects = mot_track.rects;
+    float x1 = rects.left;
+    float y1 = rects.top;
+    float x2 = rects.right;
+    float y2 = rects.bottom;
+    float w = x2 - x1;
+    float h = y2 - y1;
+    if (w == 0 || h == 0) {
+      continue;
+    }
+    std::ostringstream os;
+    os << frame_id << " " << ids << "" << x1 << " " << y1 << " " << w << " "
+       << h << "\n";
+    record = os.str();
+    records->push_back(record);
+  }
+}
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/src/predictor.cc
+++ b/deploy/pptracking/cpp/src/predictor.cc
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+// for setprecision
+#include <chrono>
+#include <iomanip>
+#include "include/predictor.h"
+
+using namespace paddle_infer;  // NOLINT
+
+namespace PaddleDetection {
+
+void Predictor::Predict(const std::vector<cv::Mat> imgs,
+                        const double threshold,
+                        MOTResult* result,
+                        std::vector<double>* times) {
+  if (use_jde_) {
+    jde_sct_->Predict(imgs, threshold, result, times);
+  } else {
+    sde_sct_->Predict(imgs, threshold, result, times);
+  }
+}
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/src/preprocess_op.cc
+++ b/deploy/pptracking/cpp/src/preprocess_op.cc
+//   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string>
+#include <thread>
+#include <vector>
+
+#include "include/preprocess_op.h"
+
+namespace PaddleDetection {
+
+void InitInfo::Run(cv::Mat* im, ImageBlob* data) {
+  data->im_shape_ = {static_cast<float>(im->rows),
+                     static_cast<float>(im->cols)};
+  data->scale_factor_ = {1., 1.};
+  data->in_net_shape_ = {static_cast<float>(im->rows),
+                         static_cast<float>(im->cols)};
+}
+
+void NormalizeImage::Run(cv::Mat* im, ImageBlob* data) {
+  double e = 1.0;
+  if (is_scale_) {
+    e /= 255.0;
+  }
+  (*im).convertTo(*im, CV_32FC3, e);
+  for (int h = 0; h < im->rows; h++) {
+    for (int w = 0; w < im->cols; w++) {
+      im->at<cv::Vec3f>(h, w)[0] =
+          (im->at<cv::Vec3f>(h, w)[0] - mean_[0]) / scale_[0];
+      im->at<cv::Vec3f>(h, w)[1] =
+          (im->at<cv::Vec3f>(h, w)[1] - mean_[1]) / scale_[1];
+      im->at<cv::Vec3f>(h, w)[2] =
+          (im->at<cv::Vec3f>(h, w)[2] - mean_[2]) / scale_[2];
+    }
+  }
+}
+
+void Permute::Run(cv::Mat* im, ImageBlob* data) {
+  (*im).convertTo(*im, CV_32FC3);
+  int rh = im->rows;
+  int rw = im->cols;
+  int rc = im->channels();
+  (data->im_data_).resize(rc * rh * rw);
+  float* base = (data->im_data_).data();
+  for (int i = 0; i < rc; ++i) {
+    cv::extractChannel(*im, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i);
+  }
+}
+
+void Resize::Run(cv::Mat* im, ImageBlob* data) {
+  auto resize_scale = GenerateScale(*im);
+  data->im_shape_ = {static_cast<float>(im->cols * resize_scale.first),
+                     static_cast<float>(im->rows * resize_scale.second)};
+  data->in_net_shape_ = {static_cast<float>(im->cols * resize_scale.first),
+                         static_cast<float>(im->rows * resize_scale.second)};
+  cv::resize(
+      *im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_);
+  data->im_shape_ = {
+      static_cast<float>(im->rows), static_cast<float>(im->cols),
+  };
+  data->scale_factor_ = {
+      resize_scale.second, resize_scale.first,
+  };
+}
+
+std::pair<float, float> Resize::GenerateScale(const cv::Mat& im) {
+  std::pair<float, float> resize_scale;
+  int origin_w = im.cols;
+  int origin_h = im.rows;
+
+  if (keep_ratio_) {
+    int im_size_max = std::max(origin_w, origin_h);
+    int im_size_min = std::min(origin_w, origin_h);
+    int target_size_max =
+        *std::max_element(target_size_.begin(), target_size_.end());
+    int target_size_min =
+        *std::min_element(target_size_.begin(), target_size_.end());
+    float scale_min =
+        static_cast<float>(target_size_min) / static_cast<float>(im_size_min);
+    float scale_max =
+        static_cast<float>(target_size_max) / static_cast<float>(im_size_max);
+    float scale_ratio = std::min(scale_min, scale_max);
+    resize_scale = {scale_ratio, scale_ratio};
+  } else {
+    resize_scale.first =
+        static_cast<float>(target_size_[1]) / static_cast<float>(origin_w);
+    resize_scale.second =
+        static_cast<float>(target_size_[0]) / static_cast<float>(origin_h);
+  }
+  return resize_scale;
+}
+
+void LetterBoxResize::Run(cv::Mat* im, ImageBlob* data) {
+  float resize_scale = GenerateScale(*im);
+  int new_shape_w = std::round(im->cols * resize_scale);
+  int new_shape_h = std::round(im->rows * resize_scale);
+  data->im_shape_ = {static_cast<float>(new_shape_h),
+                     static_cast<float>(new_shape_w)};
+  float padw = (target_size_[1] - new_shape_w) / 2.;
+  float padh = (target_size_[0] - new_shape_h) / 2.;
+
+  int top = std::round(padh - 0.1);
+  int bottom = std::round(padh + 0.1);
+  int left = std::round(padw - 0.1);
+  int right = std::round(padw + 0.1);
+
+  cv::resize(
+      *im, *im, cv::Size(new_shape_w, new_shape_h), 0, 0, cv::INTER_AREA);
+
+  data->in_net_shape_ = {
+      static_cast<float>(im->rows), static_cast<float>(im->cols),
+  };
+  cv::copyMakeBorder(*im,
+                     *im,
+                     top,
+                     bottom,
+                     left,
+                     right,
+                     cv::BORDER_CONSTANT,
+                     cv::Scalar(127.5));
+
+  data->in_net_shape_ = {
+      static_cast<float>(im->rows), static_cast<float>(im->cols),
+  };
+
+  data->scale_factor_ = {
+      resize_scale, resize_scale,
+  };
+}
+
+float LetterBoxResize::GenerateScale(const cv::Mat& im) {
+  int origin_w = im.cols;
+  int origin_h = im.rows;
+
+  int target_h = target_size_[0];
+  int target_w = target_size_[1];
+
+  float ratio_h = static_cast<float>(target_h) / static_cast<float>(origin_h);
+  float ratio_w = static_cast<float>(target_w) / static_cast<float>(origin_w);
+  float resize_scale = std::min(ratio_h, ratio_w);
+  return resize_scale;
+}
+
+void PadStride::Run(cv::Mat* im, ImageBlob* data) {
+  if (stride_ <= 0) {
+    return;
+  }
+  int rc = im->channels();
+  int rh = im->rows;
+  int rw = im->cols;
+  int nh = (rh / stride_) * stride_ + (rh % stride_ != 0) * stride_;
+  int nw = (rw / stride_) * stride_ + (rw % stride_ != 0) * stride_;
+  cv::copyMakeBorder(
+      *im, *im, 0, nh - rh, 0, nw - rw, cv::BORDER_CONSTANT, cv::Scalar(0));
+  data->in_net_shape_ = {
+      static_cast<float>(im->rows), static_cast<float>(im->cols),
+  };
+}
+
+// Preprocessor op running order
+const std::vector<std::string> Preprocessor::RUN_ORDER = {"InitInfo",
+                                                          "Resize",
+                                                          "LetterBoxResize",
+                                                          "NormalizeImage",
+                                                          "PadStride",
+                                                          "Permute"};
+
+void Preprocessor::Run(cv::Mat* im, ImageBlob* data) {
+  for (const auto& name : RUN_ORDER) {
+    if (ops_.find(name) != ops_.end()) {
+      ops_[name]->Run(im, data);
+    }
+  }
+}
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/src/sde_predictor.cc
+++ b/deploy/pptracking/cpp/src/sde_predictor.cc
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <sstream>
+// for setprecision
+#include <chrono>
+#include <iomanip>
+#include "include/sde_predictor.h"
+
+using namespace paddle_infer;  // NOLINT
+
+namespace PaddleDetection {
+
+// Load Model and create model predictor
+void SDEPredictor::LoadModel(const std::string& det_model_dir,
+                             const std::string& reid_model_dir,
+                             const std::string& run_mode) {
+  throw "Not Implement";
+}
+
+void SDEPredictor::Preprocess(const cv::Mat& ori_im) { throw "Not Implement"; }
+
+void SDEPredictor::Postprocess(const cv::Mat dets,
+                               const cv::Mat emb,
+                               MOTResult* result) {
+  throw "Not Implement";
+}
+
+void SDEPredictor::Predict(const std::vector<cv::Mat> imgs,
+                           const double threshold,
+                           MOTResult* result,
+                           std::vector<double>* times) {
+  throw "Not Implement";
+}
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/src/tracker.cc
+++ b/deploy/pptracking/cpp/src/tracker.cc
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/CnybTseng/JDE/blob/master/platforms/common/jdetracker.cpp
+// Ths copyright of CnybTseng/JDE is as follows:
+// MIT License
+
+#include <limits.h>
+#include <stdio.h>
+#include <algorithm>
+#include <map>
+
+#include "include/lapjv.h"
+#include "include/tracker.h"
+
+#define mat2vec4f(m)             \
+  cv::Vec4f(*m.ptr<float>(0, 0), \
+            *m.ptr<float>(0, 1), \
+            *m.ptr<float>(0, 2), \
+            *m.ptr<float>(0, 3))
+
+namespace PaddleDetection {
+
+static std::map<int, float> chi2inv95 = {{1, 3.841459f},
+                                         {2, 5.991465f},
+                                         {3, 7.814728f},
+                                         {4, 9.487729f},
+                                         {5, 11.070498f},
+                                         {6, 12.591587f},
+                                         {7, 14.067140f},
+                                         {8, 15.507313f},
+                                         {9, 16.918978f}};
+
+JDETracker *JDETracker::me = new JDETracker;
+
+JDETracker *JDETracker::instance(void) { return me; }
+
+JDETracker::JDETracker(void)
+    : timestamp(0), max_lost_time(30), lambda(0.98f), det_thresh(0.3f) {}
+
+bool JDETracker::update(const cv::Mat &dets,
+                        const cv::Mat &emb,
+                        std::vector<Track> *tracks) {
+  ++timestamp;
+  TrajectoryPool candidates(dets.rows);
+  for (int i = 0; i < dets.rows; ++i) {
+    float score = *dets.ptr<float>(i, 4);
+    const cv::Mat &ltrb_ = dets(cv::Rect(0, i, 4, 1));
+    cv::Vec4f ltrb = mat2vec4f(ltrb_);
+    const cv::Mat &embedding = emb(cv::Rect(0, i, emb.cols, 1));
+    candidates[i] = Trajectory(ltrb, score, embedding);
+  }
+
+  TrajectoryPtrPool tracked_trajectories;
+  TrajectoryPtrPool unconfirmed_trajectories;
+  for (size_t i = 0; i < this->tracked_trajectories.size(); ++i) {
+    if (this->tracked_trajectories[i].is_activated)
+      tracked_trajectories.push_back(&this->tracked_trajectories[i]);
+    else
+      unconfirmed_trajectories.push_back(&this->tracked_trajectories[i]);
+  }
+
+  TrajectoryPtrPool trajectory_pool =
+      tracked_trajectories + &(this->lost_trajectories);
+
+  for (size_t i = 0; i < trajectory_pool.size(); ++i)
+    trajectory_pool[i]->predict();
+
+  Match matches;
+  std::vector<int> mismatch_row;
+  std::vector<int> mismatch_col;
+
+  cv::Mat cost = motion_distance(trajectory_pool, candidates);
+  linear_assignment(cost, 0.7f, &matches, &mismatch_row, &mismatch_col);
+
+  MatchIterator miter;
+  TrajectoryPtrPool activated_trajectories;
+  TrajectoryPtrPool retrieved_trajectories;
+
+  for (miter = matches.begin(); miter != matches.end(); miter++) {
+    Trajectory *pt = trajectory_pool[miter->first];
+    Trajectory &ct = candidates[miter->second];
+    if (pt->state == Tracked) {
+      pt->update(&ct, timestamp);
+      activated_trajectories.push_back(pt);
+    } else {
+      pt->reactivate(&ct, timestamp);
+      retrieved_trajectories.push_back(pt);
+    }
+  }
+
+  TrajectoryPtrPool next_candidates(mismatch_col.size());
+  for (size_t i = 0; i < mismatch_col.size(); ++i)
+    next_candidates[i] = &candidates[mismatch_col[i]];
+
+  TrajectoryPtrPool next_trajectory_pool;
+  for (size_t i = 0; i < mismatch_row.size(); ++i) {
+    int j = mismatch_row[i];
+    if (trajectory_pool[j]->state == Tracked)
+      next_trajectory_pool.push_back(trajectory_pool[j]);
+  }
+
+  cost = iou_distance(next_trajectory_pool, next_candidates);
+  linear_assignment(cost, 0.5f, &matches, &mismatch_row, &mismatch_col);
+
+  for (miter = matches.begin(); miter != matches.end(); miter++) {
+    Trajectory *pt = next_trajectory_pool[miter->first];
+    Trajectory *ct = next_candidates[miter->second];
+    if (pt->state == Tracked) {
+      pt->update(ct, timestamp);
+      activated_trajectories.push_back(pt);
+    } else {
+      pt->reactivate(ct, timestamp);
+      retrieved_trajectories.push_back(pt);
+    }
+  }
+
+  TrajectoryPtrPool lost_trajectories;
+  for (size_t i = 0; i < mismatch_row.size(); ++i) {
+    Trajectory *pt = next_trajectory_pool[mismatch_row[i]];
+    if (pt->state != Lost) {
+      pt->mark_lost();
+      lost_trajectories.push_back(pt);
+    }
+  }
+
+  TrajectoryPtrPool nnext_candidates(mismatch_col.size());
+  for (size_t i = 0; i < mismatch_col.size(); ++i)
+    nnext_candidates[i] = next_candidates[mismatch_col[i]];
+  cost = iou_distance(unconfirmed_trajectories, nnext_candidates);
+  linear_assignment(cost, 0.7f, &matches, &mismatch_row, &mismatch_col);
+
+  for (miter = matches.begin(); miter != matches.end(); miter++) {
+    unconfirmed_trajectories[miter->first]->update(
+        nnext_candidates[miter->second], timestamp);
+    activated_trajectories.push_back(unconfirmed_trajectories[miter->first]);
+  }
+
+  TrajectoryPtrPool removed_trajectories;
+
+  for (size_t i = 0; i < mismatch_row.size(); ++i) {
+    unconfirmed_trajectories[mismatch_row[i]]->mark_removed();
+    removed_trajectories.push_back(unconfirmed_trajectories[mismatch_row[i]]);
+  }
+
+  for (size_t i = 0; i < mismatch_col.size(); ++i) {
+    if (nnext_candidates[mismatch_col[i]]->score < det_thresh) continue;
+    nnext_candidates[mismatch_col[i]]->activate(timestamp);
+    activated_trajectories.push_back(nnext_candidates[mismatch_col[i]]);
+  }
+
+  for (size_t i = 0; i < this->lost_trajectories.size(); ++i) {
+    Trajectory &lt = this->lost_trajectories[i];
+    if (timestamp - lt.timestamp > max_lost_time) {
+      lt.mark_removed();
+      removed_trajectories.push_back(&lt);
+    }
+  }
+
+  TrajectoryPoolIterator piter;
+  for (piter = this->tracked_trajectories.begin();
+       piter != this->tracked_trajectories.end();) {
+    if (piter->state != Tracked)
+      piter = this->tracked_trajectories.erase(piter);
+    else
+      ++piter;
+  }
+
+  this->tracked_trajectories += activated_trajectories;
+  this->tracked_trajectories += retrieved_trajectories;
+
+  this->lost_trajectories -= this->tracked_trajectories;
+  this->lost_trajectories += lost_trajectories;
+  this->lost_trajectories -= this->removed_trajectories;
+  this->removed_trajectories += removed_trajectories;
+  remove_duplicate_trajectory(&this->tracked_trajectories,
+                              &this->lost_trajectories);
+
+  tracks->clear();
+  for (size_t i = 0; i < this->tracked_trajectories.size(); ++i) {
+    if (this->tracked_trajectories[i].is_activated) {
+      Track track = {.id = this->tracked_trajectories[i].id,
+                     .score = this->tracked_trajectories[i].score,
+                     .ltrb = this->tracked_trajectories[i].ltrb};
+      tracks->push_back(track);
+    }
+  }
+  return 0;
+}
+
+cv::Mat JDETracker::motion_distance(const TrajectoryPtrPool &a,
+                                    const TrajectoryPool &b) {
+  if (0 == a.size() || 0 == b.size())
+    return cv::Mat(a.size(), b.size(), CV_32F);
+
+  cv::Mat edists = embedding_distance(a, b);
+  cv::Mat mdists = mahalanobis_distance(a, b);
+  cv::Mat fdists = lambda * edists + (1 - lambda) * mdists;
+
+  const float gate_thresh = chi2inv95[4];
+  for (int i = 0; i < fdists.rows; ++i) {
+    for (int j = 0; j < fdists.cols; ++j) {
+      if (*mdists.ptr<float>(i, j) > gate_thresh)
+        *fdists.ptr<float>(i, j) = FLT_MAX;
+    }
+  }
+
+  return fdists;
+}
+
+void JDETracker::linear_assignment(const cv::Mat &cost,
+                                   float cost_limit,
+                                   Match *matches,
+                                   std::vector<int> *mismatch_row,
+                                   std::vector<int> *mismatch_col) {
+  matches->clear();
+  mismatch_row->clear();
+  mismatch_col->clear();
+  if (cost.empty()) {
+    for (int i = 0; i < cost.rows; ++i) mismatch_row->push_back(i);
+    for (int i = 0; i < cost.cols; ++i) mismatch_col->push_back(i);
+    return;
+  }
+
+  float opt = 0;
+  cv::Mat x(cost.rows, 1, CV_32S);
+  cv::Mat y(cost.cols, 1, CV_32S);
+
+  lapjv_internal(cost,
+                 true,
+                 cost_limit,
+                 reinterpret_cast<int *>(x.data),
+                 reinterpret_cast<int *>(y.data));
+
+  for (int i = 0; i < x.rows; ++i) {
+    int j = *x.ptr<int>(i);
+    if (j >= 0)
+      matches->insert({i, j});
+    else
+      mismatch_row->push_back(i);
+  }
+
+  for (int i = 0; i < y.rows; ++i) {
+    int j = *y.ptr<int>(i);
+    if (j < 0) mismatch_col->push_back(i);
+  }
+
+  return;
+}
+
+void JDETracker::remove_duplicate_trajectory(TrajectoryPool *a,
+                                             TrajectoryPool *b,
+                                             float iou_thresh) {
+  if (a->size() == 0 || b->size() == 0) return;
+
+  cv::Mat dist = iou_distance(*a, *b);
+  cv::Mat mask = dist < iou_thresh;
+  std::vector<cv::Point> idx;
+  cv::findNonZero(mask, idx);
+
+  std::vector<int> da;
+  std::vector<int> db;
+  for (size_t i = 0; i < idx.size(); ++i) {
+    int ta = (*a)[idx[i].y].timestamp - (*a)[idx[i].y].starttime;
+    int tb = (*b)[idx[i].x].timestamp - (*b)[idx[i].x].starttime;
+    if (ta > tb)
+      db.push_back(idx[i].x);
+    else
+      da.push_back(idx[i].y);
+  }
+
+  int id = 0;
+  TrajectoryPoolIterator piter;
+  for (piter = a->begin(); piter != a->end();) {
+    std::vector<int>::iterator iter = find(da.begin(), da.end(), id++);
+    if (iter != da.end())
+      piter = a->erase(piter);
+    else
+      ++piter;
+  }
+
+  id = 0;
+  for (piter = b->begin(); piter != b->end();) {
+    std::vector<int>::iterator iter = find(db.begin(), db.end(), id++);
+    if (iter != db.end())
+      piter = b->erase(piter);
+    else
+      ++piter;
+  }
+}
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/cpp/src/trajectory.cc
+++ b/deploy/pptracking/cpp/src/trajectory.cc
+//   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The code is based on:
+// https://github.com/CnybTseng/JDE/blob/master/platforms/common/trajectory.cpp
+// Ths copyright of CnybTseng/JDE is as follows:
+// MIT License
+
+#include "include/trajectory.h"
+#include <algorithm>
+
+namespace PaddleDetection {
+
+void TKalmanFilter::init(const cv::Mat &measurement) {
+  measurement.copyTo(statePost(cv::Rect(0, 0, 1, 4)));
+  statePost(cv::Rect(0, 4, 1, 4)).setTo(0);
+  statePost.copyTo(statePre);
+
+  float varpos = 2 * std_weight_position * (*measurement.ptr<float>(3));
+  varpos *= varpos;
+  float varvel = 10 * std_weight_velocity * (*measurement.ptr<float>(3));
+  varvel *= varvel;
+
+  errorCovPost.setTo(0);
+  *errorCovPost.ptr<float>(0, 0) = varpos;
+  *errorCovPost.ptr<float>(1, 1) = varpos;
+  *errorCovPost.ptr<float>(2, 2) = 1e-4f;
+  *errorCovPost.ptr<float>(3, 3) = varpos;
+  *errorCovPost.ptr<float>(4, 4) = varvel;
+  *errorCovPost.ptr<float>(5, 5) = varvel;
+  *errorCovPost.ptr<float>(6, 6) = 1e-10f;
+  *errorCovPost.ptr<float>(7, 7) = varvel;
+  errorCovPost.copyTo(errorCovPre);
+}
+
+const cv::Mat &TKalmanFilter::predict() {
+  float varpos = std_weight_position * (*statePre.ptr<float>(3));
+  varpos *= varpos;
+  float varvel = std_weight_velocity * (*statePre.ptr<float>(3));
+  varvel *= varvel;
+
+  processNoiseCov.setTo(0);
+  *processNoiseCov.ptr<float>(0, 0) = varpos;
+  *processNoiseCov.ptr<float>(1, 1) = varpos;
+  *processNoiseCov.ptr<float>(2, 2) = 1e-4f;
+  *processNoiseCov.ptr<float>(3, 3) = varpos;
+  *processNoiseCov.ptr<float>(4, 4) = varvel;
+  *processNoiseCov.ptr<float>(5, 5) = varvel;
+  *processNoiseCov.ptr<float>(6, 6) = 1e-10f;
+  *processNoiseCov.ptr<float>(7, 7) = varvel;
+
+  return cv::KalmanFilter::predict();
+}
+
+const cv::Mat &TKalmanFilter::correct(const cv::Mat &measurement) {
+  float varpos = std_weight_position * (*measurement.ptr<float>(3));
+  varpos *= varpos;
+
+  measurementNoiseCov.setTo(0);
+  *measurementNoiseCov.ptr<float>(0, 0) = varpos;
+  *measurementNoiseCov.ptr<float>(1, 1) = varpos;
+  *measurementNoiseCov.ptr<float>(2, 2) = 1e-2f;
+  *measurementNoiseCov.ptr<float>(3, 3) = varpos;
+
+  return cv::KalmanFilter::correct(measurement);
+}
+
+void TKalmanFilter::project(cv::Mat *mean, cv::Mat *covariance) const {
+  float varpos = std_weight_position * (*statePost.ptr<float>(3));
+  varpos *= varpos;
+
+  cv::Mat measurementNoiseCov_ = cv::Mat::eye(4, 4, CV_32F);
+  *measurementNoiseCov_.ptr<float>(0, 0) = varpos;
+  *measurementNoiseCov_.ptr<float>(1, 1) = varpos;
+  *measurementNoiseCov_.ptr<float>(2, 2) = 1e-2f;
+  *measurementNoiseCov_.ptr<float>(3, 3) = varpos;
+
+  *mean = measurementMatrix * statePost;
+  cv::Mat temp = measurementMatrix * errorCovPost;
+  gemm(temp,
+       measurementMatrix,
+       1,
+       measurementNoiseCov_,
+       1,
+       *covariance,
+       cv::GEMM_2_T);
+}
+
+int Trajectory::count = 0;
+
+const cv::Mat &Trajectory::predict(void) {
+  if (state != Tracked) *cv::KalmanFilter::statePost.ptr<float>(7) = 0;
+  return TKalmanFilter::predict();
+}
+
+void Trajectory::update(Trajectory *traj,
+                        int timestamp_,
+                        bool update_embedding_) {
+  timestamp = timestamp_;
+  ++length;
+  ltrb = traj->ltrb;
+  xyah = traj->xyah;
+  TKalmanFilter::correct(cv::Mat(traj->xyah));
+  state = Tracked;
+  is_activated = true;
+  score = traj->score;
+  if (update_embedding_) update_embedding(traj->current_embedding);
+}
+
+void Trajectory::activate(int timestamp_) {
+  id = next_id();
+  TKalmanFilter::init(cv::Mat(xyah));
+  length = 0;
+  state = Tracked;
+  if (timestamp_ == 1) {
+    is_activated = true;
+  }
+  timestamp = timestamp_;
+  starttime = timestamp_;
+}
+
+void Trajectory::reactivate(Trajectory *traj, int timestamp_, bool newid) {
+  TKalmanFilter::correct(cv::Mat(traj->xyah));
+  update_embedding(traj->current_embedding);
+  length = 0;
+  state = Tracked;
+  is_activated = true;
+  timestamp = timestamp_;
+  if (newid) id = next_id();
+}
+
+void Trajectory::update_embedding(const cv::Mat &embedding) {
+  current_embedding = embedding / cv::norm(embedding);
+  if (smooth_embedding.empty()) {
+    smooth_embedding = current_embedding;
+  } else {
+    smooth_embedding = eta * smooth_embedding + (1 - eta) * current_embedding;
+  }
+  smooth_embedding = smooth_embedding / cv::norm(smooth_embedding);
+}
+
+TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPool &b) {
+  TrajectoryPool sum;
+  sum.insert(sum.end(), a.begin(), a.end());
+
+  std::vector<int> ids(a.size());
+  for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i].id;
+
+  for (size_t i = 0; i < b.size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i].id);
+    if (iter == ids.end()) {
+      sum.push_back(b[i]);
+      ids.push_back(b[i].id);
+    }
+  }
+
+  return sum;
+}
+
+TrajectoryPool operator+(const TrajectoryPool &a, const TrajectoryPtrPool &b) {
+  TrajectoryPool sum;
+  sum.insert(sum.end(), a.begin(), a.end());
+
+  std::vector<int> ids(a.size());
+  for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i].id;
+
+  for (size_t i = 0; i < b.size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i]->id);
+    if (iter == ids.end()) {
+      sum.push_back(*b[i]);
+      ids.push_back(b[i]->id);
+    }
+  }
+
+  return sum;
+}
+
+TrajectoryPool &operator+=(TrajectoryPool &a,  // NOLINT
+                           const TrajectoryPtrPool &b) {
+  std::vector<int> ids(a.size());
+  for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i].id;
+
+  for (size_t i = 0; i < b.size(); ++i) {
+    if (b[i]->smooth_embedding.empty()) continue;
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i]->id);
+    if (iter == ids.end()) {
+      a.push_back(*b[i]);
+      ids.push_back(b[i]->id);
+    }
+  }
+
+  return a;
+}
+
+TrajectoryPool operator-(const TrajectoryPool &a, const TrajectoryPool &b) {
+  TrajectoryPool dif;
+  std::vector<int> ids(b.size());
+  for (size_t i = 0; i < b.size(); ++i) ids[i] = b[i].id;
+
+  for (size_t i = 0; i < a.size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), a[i].id);
+    if (iter == ids.end()) dif.push_back(a[i]);
+  }
+
+  return dif;
+}
+
+TrajectoryPool &operator-=(TrajectoryPool &a,  // NOLINT
+                           const TrajectoryPool &b) {
+  std::vector<int> ids(b.size());
+  for (size_t i = 0; i < b.size(); ++i) ids[i] = b[i].id;
+
+  TrajectoryPoolIterator piter;
+  for (piter = a.begin(); piter != a.end();) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), piter->id);
+    if (iter == ids.end())
+      ++piter;
+    else
+      piter = a.erase(piter);
+  }
+
+  return a;
+}
+
+TrajectoryPtrPool operator+(const TrajectoryPtrPool &a,
+                            const TrajectoryPtrPool &b) {
+  TrajectoryPtrPool sum;
+  sum.insert(sum.end(), a.begin(), a.end());
+
+  std::vector<int> ids(a.size());
+  for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i]->id;
+
+  for (size_t i = 0; i < b.size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), b[i]->id);
+    if (iter == ids.end()) {
+      sum.push_back(b[i]);
+      ids.push_back(b[i]->id);
+    }
+  }
+
+  return sum;
+}
+
+TrajectoryPtrPool operator+(const TrajectoryPtrPool &a, TrajectoryPool *b) {
+  TrajectoryPtrPool sum;
+  sum.insert(sum.end(), a.begin(), a.end());
+
+  std::vector<int> ids(a.size());
+  for (size_t i = 0; i < a.size(); ++i) ids[i] = a[i]->id;
+
+  for (size_t i = 0; i < b->size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), (*b)[i].id);
+    if (iter == ids.end()) {
+      sum.push_back(&(*b)[i]);
+      ids.push_back((*b)[i].id);
+    }
+  }
+
+  return sum;
+}
+
+TrajectoryPtrPool operator-(const TrajectoryPtrPool &a,
+                            const TrajectoryPtrPool &b) {
+  TrajectoryPtrPool dif;
+  std::vector<int> ids(b.size());
+  for (size_t i = 0; i < b.size(); ++i) ids[i] = b[i]->id;
+
+  for (size_t i = 0; i < a.size(); ++i) {
+    std::vector<int>::iterator iter = find(ids.begin(), ids.end(), a[i]->id);
+    if (iter == ids.end()) dif.push_back(a[i]);
+  }
+
+  return dif;
+}
+
+cv::Mat embedding_distance(const TrajectoryPool &a, const TrajectoryPool &b) {
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      cv::Mat u = a[i].smooth_embedding;
+      cv::Mat v = b[j].smooth_embedding;
+      double uv = u.dot(v);
+      double uu = u.dot(u);
+      double vv = v.dot(v);
+      double dist = std::abs(1. - uv / std::sqrt(uu * vv));
+      // double dist = cv::norm(a[i].smooth_embedding, b[j].smooth_embedding,
+      // cv::NORM_L2);
+      distsi[j] = static_cast<float>(std::max(std::min(dist, 2.), 0.));
+    }
+  }
+  return dists;
+}
+
+cv::Mat embedding_distance(const TrajectoryPtrPool &a,
+                           const TrajectoryPtrPool &b) {
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      // double dist = cv::norm(a[i]->smooth_embedding, b[j]->smooth_embedding,
+      // cv::NORM_L2);
+      // distsi[j] = static_cast<float>(dist);
+      cv::Mat u = a[i]->smooth_embedding;
+      cv::Mat v = b[j]->smooth_embedding;
+      double uv = u.dot(v);
+      double uu = u.dot(u);
+      double vv = v.dot(v);
+      double dist = std::abs(1. - uv / std::sqrt(uu * vv));
+      distsi[j] = static_cast<float>(std::max(std::min(dist, 2.), 0.));
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat embedding_distance(const TrajectoryPtrPool &a,
+                           const TrajectoryPool &b) {
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      // double dist = cv::norm(a[i]->smooth_embedding, b[j].smooth_embedding,
+      // cv::NORM_L2);
+      // distsi[j] = static_cast<float>(dist);
+      cv::Mat u = a[i]->smooth_embedding;
+      cv::Mat v = b[j].smooth_embedding;
+      double uv = u.dot(v);
+      double uu = u.dot(u);
+      double vv = v.dot(v);
+      double dist = std::abs(1. - uv / std::sqrt(uu * vv));
+      distsi[j] = static_cast<float>(std::max(std::min(dist, 2.), 0.));
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat mahalanobis_distance(const TrajectoryPool &a, const TrajectoryPool &b) {
+  std::vector<cv::Mat> means(a.size());
+  std::vector<cv::Mat> icovariances(a.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    cv::Mat covariance;
+    a[i].project(&means[i], &covariance);
+    cv::invert(covariance, icovariances[i]);
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Mat x(b[j].xyah);
+      float dist =
+          static_cast<float>(cv::Mahalanobis(x, means[i], icovariances[i]));
+      distsi[j] = dist * dist;
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
+                             const TrajectoryPtrPool &b) {
+  std::vector<cv::Mat> means(a.size());
+  std::vector<cv::Mat> icovariances(a.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    cv::Mat covariance;
+    a[i]->project(&means[i], &covariance);
+    cv::invert(covariance, icovariances[i]);
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Mat x(b[j]->xyah);
+      float dist =
+          static_cast<float>(cv::Mahalanobis(x, means[i], icovariances[i]));
+      distsi[j] = dist * dist;
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat mahalanobis_distance(const TrajectoryPtrPool &a,
+                             const TrajectoryPool &b) {
+  std::vector<cv::Mat> means(a.size());
+  std::vector<cv::Mat> icovariances(a.size());
+
+  for (size_t i = 0; i < a.size(); ++i) {
+    cv::Mat covariance;
+    a[i]->project(&means[i], &covariance);
+    cv::invert(covariance, icovariances[i]);
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Mat x(b[j].xyah);
+      float dist =
+          static_cast<float>(cv::Mahalanobis(x, means[i], icovariances[i]));
+      distsi[j] = dist * dist;
+    }
+  }
+
+  return dists;
+}
+
+static inline float calc_inter_area(const cv::Vec4f &a, const cv::Vec4f &b) {
+  if (a[2] < b[0] || a[0] > b[2] || a[3] < b[1] || a[1] > b[3]) return 0.f;
+
+  float w = std::min(a[2], b[2]) - std::max(a[0], b[0]);
+  float h = std::min(a[3], b[3]) - std::max(a[1], b[1]);
+  return w * h;
+}
+
+cv::Mat iou_distance(const TrajectoryPool &a, const TrajectoryPool &b) {
+  std::vector<float> areaa(a.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    float w = a[i].ltrb[2] - a[i].ltrb[0];
+    float h = a[i].ltrb[3] - a[i].ltrb[1];
+    areaa[i] = w * h;
+  }
+
+  std::vector<float> areab(b.size());
+  for (size_t j = 0; j < b.size(); ++j) {
+    float w = b[j].ltrb[2] - b[j].ltrb[0];
+    float h = b[j].ltrb[3] - b[j].ltrb[1];
+    areab[j] = w * h;
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    const cv::Vec4f &boxa = a[i].ltrb;
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Vec4f &boxb = b[j].ltrb;
+      float inters = calc_inter_area(boxa, boxb);
+      distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters);
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPtrPool &b) {
+  std::vector<float> areaa(a.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    float w = a[i]->ltrb[2] - a[i]->ltrb[0];
+    float h = a[i]->ltrb[3] - a[i]->ltrb[1];
+    areaa[i] = w * h;
+  }
+
+  std::vector<float> areab(b.size());
+  for (size_t j = 0; j < b.size(); ++j) {
+    float w = b[j]->ltrb[2] - b[j]->ltrb[0];
+    float h = b[j]->ltrb[3] - b[j]->ltrb[1];
+    areab[j] = w * h;
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    const cv::Vec4f &boxa = a[i]->ltrb;
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Vec4f &boxb = b[j]->ltrb;
+      float inters = calc_inter_area(boxa, boxb);
+      distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters);
+    }
+  }
+
+  return dists;
+}
+
+cv::Mat iou_distance(const TrajectoryPtrPool &a, const TrajectoryPool &b) {
+  std::vector<float> areaa(a.size());
+  for (size_t i = 0; i < a.size(); ++i) {
+    float w = a[i]->ltrb[2] - a[i]->ltrb[0];
+    float h = a[i]->ltrb[3] - a[i]->ltrb[1];
+    areaa[i] = w * h;
+  }
+
+  std::vector<float> areab(b.size());
+  for (size_t j = 0; j < b.size(); ++j) {
+    float w = b[j].ltrb[2] - b[j].ltrb[0];
+    float h = b[j].ltrb[3] - b[j].ltrb[1];
+    areab[j] = w * h;
+  }
+
+  cv::Mat dists(a.size(), b.size(), CV_32F);
+  for (size_t i = 0; i < a.size(); ++i) {
+    const cv::Vec4f &boxa = a[i]->ltrb;
+    float *distsi = dists.ptr<float>(i);
+    for (size_t j = 0; j < b.size(); ++j) {
+      const cv::Vec4f &boxb = b[j].ltrb;
+      float inters = calc_inter_area(boxa, boxb);
+      distsi[j] = 1.f - inters / (areaa[i] + areab[j] - inters);
+    }
+  }
+
+  return dists;
+}
+
+}  // namespace PaddleDetection
--- a/deploy/pptracking/python/README.md
+++ b/deploy/pptracking/python/README.md
+# Python端预测部署
+
+在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 预测引擎使用了AnalysisPredictor，专门针对推理进行了优化，是基于[C++预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/native_infer.html)的Python接口，该引擎可以对模型进行多项图优化，减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求，我们提供了独立于PaddleDetection的预测脚本，方便用户直接集成部署。
+
+主要包含两个步骤：
+
+- 导出预测模型
+- 基于Python进行预测
+
+PaddleDetection在训练过程包括网络的前向和优化器相关参数，而在部署过程中，我们只需要前向参数，具体参考:[导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/deploy/EXPORT_MODEL.md)
+导出后目录下，包括`infer_cfg.yml`, `model.pdiparams`,  `model.pdiparams.info`, `model.pdmodel`四个文件。
+
+## 1. 对FairMOT模型的导出和预测
+
+### 1.1 导出预测模型
+```bash
+CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/fairmot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/fairmot_hrnetv2_w18_dlafpn_30e_576x320.pdparams
+```
+
+### 1.2 用导出的模型基于Python去预测
+```bash
+python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/fairmot_hrnetv2_w18_dlafpn_30e_576x320 --video_file={your video name}.mp4 --device=GPU --save_mot_txts
+```
+**注意:**
+ - 跟踪模型是对视频进行预测，不支持单张图的预测，默认保存跟踪结果可视化后的视频，可添加`--save_mot_txts`表示保存跟踪结果的txt文件，或`--save_images`表示保存跟踪结果可视化图片。
+ - 对于多类别或车辆的FairMOT模型的导出和Python预测只需更改相应的config和模型权重即可。如：
+ ```
+ job_name=mcfairmot_hrnetv2_w18_dlafpn_30e_576x320_visdrone
+ model_type=mot/mcfairmot
+ config=configs/${model_type}/${job_name}.yml
+
+ CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=https://paddledet.bj.bcebos.com/models/mot/${job_name}.pdparams
+ python deploy/pptracking/python/mot_jde_infer.py --model_dir=output_inference/${job_name} --video_file={your video name}.mp4 --device=GPU --save_mot_txts
+ ```
+
+
+## 2. 对DeepSORT模型的导出和预测
+### 2.1 导出预测模型
+
+Step 1：导出检测模型
+```bash
+# 导出JDE YOLOv3行人检测模型
+CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/jde_yolov3_darknet53_30e_1088x608_mix.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/jde_yolov3_darknet53_30e_1088x608_mix.pdparams
+
+# 或导出PPYOLOv2行人检测模型
+CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/detector/ppyolov2_r50vd_dcn_365e_640x640_mot17half.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/ppyolov2_r50vd_dcn_365e_640x640_mot17half.pdparams
+```
+
+Step 2：导出ReID模型
+```bash
+# 导出PCB Pyramid ReID模型
+CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/reid/deepsort_pcb_pyramid_r101.yml -o reid_weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pcb_pyramid_r101.pdparams
+# 或者导出PPLCNet ReID模型
+CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/deepsort/reid/deepsort_pplcnet.yml -o reid_weights=https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet.pdparams
+```
+
+### 2.2 用导出的模型基于Python去预测
+
+```bash
+# 用导出JDE YOLOv3行人检测模型和PCB Pyramid ReID模型
+python deploy/pptracking/python/mot_sde_infer.py --model_dir=output_inference/jde_yolov3_darknet53_30e_1088x608_mix/ --reid_model_dir=output_inference/deepsort_pcb_pyramid_r101/ --video_file={your video name}.mp4 --device=GPU --save_mot_txts
+
+# 或用导出的PPYOLOv2行人检测模型和PPLCNet ReID模型
+python deploy/pptracking/python/mot_sde_infer.py --model_dir=output_inference/ppyolov2_r50vd_dcn_365e_640x640_mot17half/ --reid_model_dir=output_inference/deepsort_pplcnet/ --video_file={your video name}.mp4 --device=GPU --scaled=True --save_mot_txts
+```
+**注意:**
+ - 跟踪模型是对视频进行预测，不支持单张图的预测，默认保存跟踪结果可视化后的视频，可添加`--save_mot_txts`(对每个视频保存一个txt)或`--save_images`表示保存跟踪结果可视化图片。
+ - `--scaled`表示在模型输出结果的坐标是否已经是缩放回原图的，如果使用的检测模型是JDE的YOLOv3则为False，如果使用通用检测模型则为True。
+
+
+## 3. 跨境跟踪模型的导出和预测
+### 3.1 导出预测模型
+Step 1：下载导出的检测模型
+```bash
+wget https://paddledet.bj.bcebos.com/models/mot/deepsort/picodet_l_640_aic21mtmct_vehicle.tar
+tar -xvf picodet_l_640_aic21mtmct_vehicle.tar
+```
+Step 2：下载导出的ReID模型
+```bash
+wget https://paddledet.bj.bcebos.com/models/mot/deepsort/deepsort_pplcnet_vehicle.tar
+tar -xvf deepsort_pplcnet_vehicle.tar
+```
+
+### 3.2 用导出的模型基于Python去预测
+```bash
+# 用导出PicoDet车辆检测模型和PPLCNet车辆ReID模型
+python deploy/pptracking/python/mot_sde_infer.py --model_dir=picodet_l_640_aic21mtmct_vehicle/ --reid_model_dir=deepsort_pplcnet_vehicle/ --mtmct_dir={your mtmct scene video folder} --mtmct_cfg=mtmct_cfg --device=GPU --scaled=True --save_mot_txts --save_images
+```
+**注意:**
+  跟踪模型是对视频进行预测，不支持单张图的预测，默认保存跟踪结果可视化后的视频，可添加`--save_mot_txts`(对每个视频保存一个txt)，或`--save_images`表示保存跟踪结果可视化图片。
+  `--scaled`表示在模型输出结果的坐标是否已经是缩放回原图的，如果使用的检测模型是JDE的YOLOv3则为False，如果使用通用检测模型则为True。
+  `--mtmct_dir`是MTMCT预测的某个场景的文件夹名字，里面包含该场景不同摄像头拍摄视频的图片文件夹，其数量至少为两个。
+
+
+## 参数说明:
+
+| 参数 | 是否必须|含义 |
+|-------|-------|----------|
+| --model_dir | Yes| 上述导出的模型路径 |
+| --image_file | Option | 需要预测的图片 |
+| --image_dir  | Option |  要预测的图片文件夹路径   |
+| --video_file | Option | 需要预测的视频 |
+| --camera_id | Option | 用来预测的摄像头ID，默认为-1(表示不使用摄像头预测，可设置为：0 - (摄像头数目-1) )，预测过程中在可视化界面按`q`退出输出预测结果到：output/output.mp4|
+| --device | Option | 运行时的设备，可选择`CPU/GPU/XPU`，默认为`CPU`|
+| --run_mode | Option |使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16/trt_int8）|
+| --batch_size | Option |预测时的batch size，在指定`image_dir`时有效，默认为1 |
+| --threshold | Option|预测得分的阈值，默认为0.5|
+| --output_dir | Option|可视化结果保存的根目录，默认为output/|
+| --run_benchmark | Option| 是否运行benchmark，同时需指定`--image_file`或`--image_dir`，默认为False |
+| --enable_mkldnn | Option | CPU预测中是否开启MKLDNN加速，默认为False |
+| --cpu_threads | Option| 设置cpu线程数，默认为1 |
+| --trt_calib_mode | Option| TensorRT是否使用校准功能，默认为False。使用TensorRT的int8功能时，需设置为True，使用PaddleSlim量化后的模型时需要设置为False |
+| --do_entrance_counting | Option | 是否统计出入口流量，默认为False |
+| --draw_center_traj | Option | 是否绘制跟踪轨迹，默认为False |
+| --mtmct_dir | Option | 需要进行MTMCT跨境头跟踪预测的图片文件夹路径，默认为None |
+| --mtmct_cfg | Option | 需要进行MTMCT跨境头跟踪预测的配置文件路径，默认为None |
+
+说明：
+
+- 参数优先级顺序：`camera_id` > `video_file` > `image_dir` > `image_file`。
+- run_mode：fluid代表使用AnalysisPredictor，精度float32来推理，其他参数指用AnalysisPredictor，TensorRT不同精度来推理。
+- 如果安装的PaddlePaddle不支持基于TensorRT进行预测，需要自行编译，详细可参考[预测库编译教程](https://paddleinference.paddlepaddle.org.cn/user_guides/source_compile.html)。
+- --run_benchmark如果设置为True，则需要安装依赖`pip install pynvml psutil GPUtil`。
--- a/deploy/pptracking/python/benchmark_utils.py
+++ b/deploy/pptracking/python/benchmark_utils.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import logging
+
+import paddle
+import paddle.inference as paddle_infer
+
+from pathlib import Path
+
+CUR_DIR = os.path.dirname(os.path.abspath(__file__))
+LOG_PATH_ROOT = f"{CUR_DIR}/../../output"
+
+
+class PaddleInferBenchmark(object):
+    def __init__(self,
+                 config,
+                 model_info: dict={},
+                 data_info: dict={},
+                 perf_info: dict={},
+                 resource_info: dict={},
+                 **kwargs):
+        """
+        Construct PaddleInferBenchmark Class to format logs.
+        args:
+            config(paddle.inference.Config): paddle inference config
+            model_info(dict): basic model info
+                {'model_name': 'resnet50'
+                 'precision': 'fp32'}
+            data_info(dict): input data info
+                {'batch_size': 1
+                 'shape': '3,224,224'
+                 'data_num': 1000}
+            perf_info(dict): performance result
+                {'preprocess_time_s': 1.0
+                'inference_time_s': 2.0
+                'postprocess_time_s': 1.0
+                'total_time_s': 4.0}
+            resource_info(dict): 
+                cpu and gpu resources
+                {'cpu_rss': 100
+                 'gpu_rss': 100
+                 'gpu_util': 60}
+        """
+        # PaddleInferBenchmark Log Version
+        self.log_version = "1.0.3"
+
+        # Paddle Version
+        self.paddle_version = paddle.__version__
+        self.paddle_commit = paddle.__git_commit__
+        paddle_infer_info = paddle_infer.get_version()
+        self.paddle_branch = paddle_infer_info.strip().split(': ')[-1]
+
+        # model info
+        self.model_info = model_info
+
+        # data info
+        self.data_info = data_info
+
+        # perf info
+        self.perf_info = perf_info
+
+        try:
+            # required value
+            self.model_name = model_info['model_name']
+            self.precision = model_info['precision']
+
+            self.batch_size = data_info['batch_size']
+            self.shape = data_info['shape']
+            self.data_num = data_info['data_num']
+
+            self.inference_time_s = round(perf_info['inference_time_s'], 4)
+        except:
+            self.print_help()
+            raise ValueError(
+                "Set argument wrong, please check input argument and its type")
+
+        self.preprocess_time_s = perf_info.get('preprocess_time_s', 0)
+        self.postprocess_time_s = perf_info.get('postprocess_time_s', 0)
+        self.total_time_s = perf_info.get('total_time_s', 0)
+
+        self.inference_time_s_90 = perf_info.get("inference_time_s_90", "")
+        self.inference_time_s_99 = perf_info.get("inference_time_s_99", "")
+        self.succ_rate = perf_info.get("succ_rate", "")
+        self.qps = perf_info.get("qps", "")
+
+        # conf info
+        self.config_status = self.parse_config(config)
+
+        # mem info
+        if isinstance(resource_info, dict):
+            self.cpu_rss_mb = int(resource_info.get('cpu_rss_mb', 0))
+            self.cpu_vms_mb = int(resource_info.get('cpu_vms_mb', 0))
+            self.cpu_shared_mb = int(resource_info.get('cpu_shared_mb', 0))
+            self.cpu_dirty_mb = int(resource_info.get('cpu_dirty_mb', 0))
+            self.cpu_util = round(resource_info.get('cpu_util', 0), 2)
+
+            self.gpu_rss_mb = int(resource_info.get('gpu_rss_mb', 0))
+            self.gpu_util = round(resource_info.get('gpu_util', 0), 2)
+            self.gpu_mem_util = round(resource_info.get('gpu_mem_util', 0), 2)
+        else:
+            self.cpu_rss_mb = 0
+            self.cpu_vms_mb = 0
+            self.cpu_shared_mb = 0
+            self.cpu_dirty_mb = 0
+            self.cpu_util = 0
+
+            self.gpu_rss_mb = 0
+            self.gpu_util = 0
+            self.gpu_mem_util = 0
+
+        # init benchmark logger
+        self.benchmark_logger()
+
+    def benchmark_logger(self):
+        """
+        benchmark logger
+        """
+        # remove other logging handler
+        for handler in logging.root.handlers[:]:
+            logging.root.removeHandler(handler)
+
+        # Init logger
+        FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        log_output = f"{LOG_PATH_ROOT}/{self.model_name}.log"
+        Path(f"{LOG_PATH_ROOT}").mkdir(parents=True, exist_ok=True)
+        logging.basicConfig(
+            level=logging.INFO,
+            format=FORMAT,
+            handlers=[
+                logging.FileHandler(
+                    filename=log_output, mode='w'),
+                logging.StreamHandler(),
+            ])
+        self.logger = logging.getLogger(__name__)
+        self.logger.info(
+            f"Paddle Inference benchmark log will be saved to {log_output}")
+
+    def parse_config(self, config) -> dict:
+        """
+        parse paddle predictor config
+        args:
+            config(paddle.inference.Config): paddle inference config
+        return:
+            config_status(dict): dict style config info
+        """
+        if isinstance(config, paddle_infer.Config):
+            config_status = {}
+            config_status['runtime_device'] = "gpu" if config.use_gpu(
+            ) else "cpu"
+            config_status['ir_optim'] = config.ir_optim()
+            config_status['enable_tensorrt'] = config.tensorrt_engine_enabled()
+            config_status['precision'] = self.precision
+            config_status['enable_mkldnn'] = config.mkldnn_enabled()
+            config_status[
+                'cpu_math_library_num_threads'] = config.cpu_math_library_num_threads(
+                )
+        elif isinstance(config, dict):
+            config_status['runtime_device'] = config.get('runtime_device', "")
+            config_status['ir_optim'] = config.get('ir_optim', "")
+            config_status['enable_tensorrt'] = config.get('enable_tensorrt', "")
+            config_status['precision'] = config.get('precision', "")
+            config_status['enable_mkldnn'] = config.get('enable_mkldnn', "")
+            config_status['cpu_math_library_num_threads'] = config.get(
+                'cpu_math_library_num_threads', "")
+        else:
+            self.print_help()
+            raise ValueError(
+                "Set argument config wrong, please check input argument and its type"
+            )
+        return config_status
+
+    def report(self, identifier=None):
+        """
+        print log report
+        args:
+            identifier(string): identify log
+        """
+        if identifier:
+            identifier = f"[{identifier}]"
+        else:
+            identifier = ""
+
+        self.logger.info("\n")
+        self.logger.info(
+            "---------------------- Paddle info ----------------------")
+        self.logger.info(f"{identifier} paddle_version: {self.paddle_version}")
+        self.logger.info(f"{identifier} paddle_commit: {self.paddle_commit}")
+        self.logger.info(f"{identifier} paddle_branch: {self.paddle_branch}")
+        self.logger.info(f"{identifier} log_api_version: {self.log_version}")
+        self.logger.info(
+            "----------------------- Conf info -----------------------")
+        self.logger.info(
+            f"{identifier} runtime_device: {self.config_status['runtime_device']}"
+        )
+        self.logger.info(
+            f"{identifier} ir_optim: {self.config_status['ir_optim']}")
+        self.logger.info(f"{identifier} enable_memory_optim: {True}")
+        self.logger.info(
+            f"{identifier} enable_tensorrt: {self.config_status['enable_tensorrt']}"
+        )
+        self.logger.info(
+            f"{identifier} enable_mkldnn: {self.config_status['enable_mkldnn']}")
+        self.logger.info(
+            f"{identifier} cpu_math_library_num_threads: {self.config_status['cpu_math_library_num_threads']}"
+        )
+        self.logger.info(
+            "----------------------- Model info ----------------------")
+        self.logger.info(f"{identifier} model_name: {self.model_name}")
+        self.logger.info(f"{identifier} precision: {self.precision}")
+        self.logger.info(
+            "----------------------- Data info -----------------------")
+        self.logger.info(f"{identifier} batch_size: {self.batch_size}")
+        self.logger.info(f"{identifier} input_shape: {self.shape}")
+        self.logger.info(f"{identifier} data_num: {self.data_num}")
+        self.logger.info(
+            "----------------------- Perf info -----------------------")
+        self.logger.info(
+            f"{identifier} cpu_rss(MB): {self.cpu_rss_mb}, cpu_vms: {self.cpu_vms_mb}, cpu_shared_mb: {self.cpu_shared_mb}, cpu_dirty_mb: {self.cpu_dirty_mb}, cpu_util: {self.cpu_util}%"
+        )
+        self.logger.info(
+            f"{identifier} gpu_rss(MB): {self.gpu_rss_mb}, gpu_util: {self.gpu_util}%, gpu_mem_util: {self.gpu_mem_util}%"
+        )
+        self.logger.info(
+            f"{identifier} total time spent(s): {self.total_time_s}")
+        self.logger.info(
+            f"{identifier} preprocess_time(ms): {round(self.preprocess_time_s*1000, 1)}, inference_time(ms): {round(self.inference_time_s*1000, 1)}, postprocess_time(ms): {round(self.postprocess_time_s*1000, 1)}"
+        )
+        if self.inference_time_s_90:
+            self.looger.info(
+                f"{identifier} 90%_cost: {self.inference_time_s_90}, 99%_cost: {self.inference_time_s_99}, succ_rate: {self.succ_rate}"
+            )
+        if self.qps:
+            self.logger.info(f"{identifier} QPS: {self.qps}")
+
+    def print_help(self):
+        """
+        print function help
+        """
+        print("""Usage: 
+            ==== Print inference benchmark logs. ====
+            config = paddle.inference.Config()
+            model_info = {'model_name': 'resnet50'
+                          'precision': 'fp32'}
+            data_info = {'batch_size': 1
+                         'shape': '3,224,224'
+                         'data_num': 1000}
+            perf_info = {'preprocess_time_s': 1.0
+                         'inference_time_s': 2.0
+                         'postprocess_time_s': 1.0
+                         'total_time_s': 4.0}
+            resource_info = {'cpu_rss_mb': 100
+                             'gpu_rss_mb': 100
+                             'gpu_util': 60}
+            log = PaddleInferBenchmark(config, model_info, data_info, perf_info, resource_info)
+            log('Test')
+            """)
+
+    def __call__(self, identifier=None):
+        """
+        __call__
+        args:
+            identifier(string): identify log
+        """
+        self.report(identifier)
--- a/deploy/pptracking/python/det_infer.py
+++ b/deploy/pptracking/python/det_infer.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import yaml
+import glob
+from functools import reduce
+
+import cv2
+import numpy as np
+import math
+
+import paddle
+from paddle.inference import Config
+from paddle.inference import create_predictor
+
+from benchmark_utils import PaddleInferBenchmark
+from picodet_postprocess import PicoDetPostProcess
+from preprocess import preprocess, Resize, NormalizeImage, Permute, PadStride, LetterBoxResize
+from visualize import visualize_box_mask
+from utils import argsparser, Timer, get_current_memory_mb
+
+# Global dictionary
+SUPPORT_MODELS = {
+    'YOLO',
+    'PicoDet',
+    'JDE',
+    'FairMOT',
+    'DeepSORT',
+}
+
+
+class Detector(object):
+    """
+    Args:
+        pred_config (object): config of model, defined by `Config(model_dir)`
+        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
+        batch_size (int): size of pre batch in inference
+        trt_min_shape (int): min shape for dynamic shape in trt
+        trt_max_shape (int): max shape for dynamic shape in trt
+        trt_opt_shape (int): opt shape for dynamic shape in trt
+        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
+            calibration, trt_calib_mode need to set True
+        cpu_threads (int): cpu threads
+        enable_mkldnn (bool): whether to open MKLDNN
+    """
+
+    def __init__(self,
+                 pred_config,
+                 model_dir,
+                 device='CPU',
+                 run_mode='fluid',
+                 batch_size=1,
+                 trt_min_shape=1,
+                 trt_max_shape=1280,
+                 trt_opt_shape=640,
+                 trt_calib_mode=False,
+                 cpu_threads=1,
+                 enable_mkldnn=False):
+        self.pred_config = pred_config
+        self.predictor, self.config = load_predictor(
+            model_dir,
+            run_mode=run_mode,
+            batch_size=batch_size,
+            min_subgraph_size=self.pred_config.min_subgraph_size,
+            device=device,
+            use_dynamic_shape=self.pred_config.use_dynamic_shape,
+            trt_min_shape=trt_min_shape,
+            trt_max_shape=trt_max_shape,
+            trt_opt_shape=trt_opt_shape,
+            trt_calib_mode=trt_calib_mode,
+            cpu_threads=cpu_threads,
+            enable_mkldnn=enable_mkldnn)
+        self.det_times = Timer()
+        self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
+
+    def preprocess(self, image_list):
+        preprocess_ops = []
+        for op_info in self.pred_config.preprocess_infos:
+            new_op_info = op_info.copy()
+            op_type = new_op_info.pop('type')
+            preprocess_ops.append(eval(op_type)(**new_op_info))
+
+        input_im_lst = []
+        input_im_info_lst = []
+        for im_path in image_list:
+            im, im_info = preprocess(im_path, preprocess_ops)
+            input_im_lst.append(im)
+            input_im_info_lst.append(im_info)
+        inputs = create_inputs(input_im_lst, input_im_info_lst)
+        return inputs
+
+    def postprocess(self,
+                    np_boxes,
+                    np_masks,
+                    inputs,
+                    np_boxes_num,
+                    threshold=0.5):
+        # postprocess output of predictor
+        results = {}
+        results['boxes'] = np_boxes
+        results['boxes_num'] = np_boxes_num
+        if np_masks is not None:
+            results['masks'] = np_masks
+        return results
+
+    def predict(self, image_list, threshold=0.5, warmup=0, repeats=1):
+        '''
+        Args:
+            image_list (list): list of image
+            threshold (float): threshold of predicted box' score
+        Returns:
+            results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
+                            matix element:[class, score, x_min, y_min, x_max, y_max]
+                            MaskRCNN's results include 'masks': np.ndarray:
+                            shape: [N, im_h, im_w]
+        '''
+        self.det_times.preprocess_time_s.start()
+        inputs = self.preprocess(image_list)
+        self.det_times.preprocess_time_s.end()
+        np_boxes, np_masks = None, None
+        input_names = self.predictor.get_input_names()
+        for i in range(len(input_names)):
+            input_tensor = self.predictor.get_input_handle(input_names[i])
+            input_tensor.copy_from_cpu(inputs[input_names[i]])
+        for i in range(warmup):
+            self.predictor.run()
+            output_names = self.predictor.get_output_names()
+            boxes_tensor = self.predictor.get_output_handle(output_names[0])
+            np_boxes = boxes_tensor.copy_to_cpu()
+            if self.pred_config.mask:
+                masks_tensor = self.predictor.get_output_handle(output_names[2])
+                np_masks = masks_tensor.copy_to_cpu()
+
+        self.det_times.inference_time_s.start()
+        for i in range(repeats):
+            self.predictor.run()
+            output_names = self.predictor.get_output_names()
+            boxes_tensor = self.predictor.get_output_handle(output_names[0])
+            np_boxes = boxes_tensor.copy_to_cpu()
+            boxes_num = self.predictor.get_output_handle(output_names[1])
+            np_boxes_num = boxes_num.copy_to_cpu()
+            if self.pred_config.mask:
+                masks_tensor = self.predictor.get_output_handle(output_names[2])
+                np_masks = masks_tensor.copy_to_cpu()
+        self.det_times.inference_time_s.end(repeats=repeats)
+
+        self.det_times.postprocess_time_s.start()
+        results = []
+        if reduce(lambda x, y: x * y, np_boxes.shape) < 6:
+            print('[WARNNING] No object detected.')
+            results = {'boxes': np.zeros([0, 6]), 'boxes_num': [0]}
+        else:
+            results = self.postprocess(
+                np_boxes, np_masks, inputs, np_boxes_num, threshold=threshold)
+        self.det_times.postprocess_time_s.end()
+        self.det_times.img_num += len(image_list)
+        return results
+
+    def get_timer(self):
+        return self.det_times
+
+
+class DetectorPicoDet(Detector):
+    """
+    Args:
+        config (object): config of model, defined by `Config(model_dir)`
+        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
+        batch_size (int): size of pre batch in inference
+        trt_min_shape (int): min shape for dynamic shape in trt
+        trt_max_shape (int): max shape for dynamic shape in trt
+        trt_opt_shape (int): opt shape for dynamic shape in trt
+        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
+            calibration, trt_calib_mode need to set True
+        cpu_threads (int): cpu threads
+        enable_mkldnn (bool): whether to open MKLDNN 
+    """
+
+    def __init__(self,
+                 pred_config,
+                 model_dir,
+                 device='CPU',
+                 run_mode='fluid',
+                 batch_size=1,
+                 trt_min_shape=1,
+                 trt_max_shape=1280,
+                 trt_opt_shape=640,
+                 trt_calib_mode=False,
+                 cpu_threads=1,
+                 enable_mkldnn=False):
+        self.pred_config = pred_config
+        self.predictor, self.config = load_predictor(
+            model_dir,
+            run_mode=run_mode,
+            batch_size=batch_size,
+            min_subgraph_size=self.pred_config.min_subgraph_size,
+            device=device,
+            use_dynamic_shape=self.pred_config.use_dynamic_shape,
+            trt_min_shape=trt_min_shape,
+            trt_max_shape=trt_max_shape,
+            trt_opt_shape=trt_opt_shape,
+            trt_calib_mode=trt_calib_mode,
+            cpu_threads=cpu_threads,
+            enable_mkldnn=enable_mkldnn)
+        self.det_times = Timer()
+        self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
+
+    def predict(self, image, threshold=0.5, warmup=0, repeats=1):
+        '''
+        Args:
+            image (str/np.ndarray): path of image/ np.ndarray read by cv2
+            threshold (float): threshold of predicted box' score
+        Returns:
+            results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
+                            matix element:[class, score, x_min, y_min, x_max, y_max]
+        '''
+        self.det_times.preprocess_time_s.start()
+        inputs = self.preprocess(image)
+        self.det_times.preprocess_time_s.end()
+        input_names = self.predictor.get_input_names()
+        for i in range(len(input_names)):
+            input_tensor = self.predictor.get_input_handle(input_names[i])
+            input_tensor.copy_from_cpu(inputs[input_names[i]])
+        np_score_list, np_boxes_list = [], []
+        for i in range(warmup):
+            self.predictor.run()
+            np_score_list.clear()
+            np_boxes_list.clear()
+            output_names = self.predictor.get_output_names()
+            num_outs = int(len(output_names) / 2)
+            for out_idx in range(num_outs):
+                np_score_list.append(
+                    self.predictor.get_output_handle(output_names[out_idx])
+                    .copy_to_cpu())
+                np_boxes_list.append(
+                    self.predictor.get_output_handle(output_names[
+                        out_idx + num_outs]).copy_to_cpu())
+
+        self.det_times.inference_time_s.start()
+        for i in range(repeats):
+            self.predictor.run()
+            np_score_list.clear()
+            np_boxes_list.clear()
+            output_names = self.predictor.get_output_names()
+            num_outs = int(len(output_names) / 2)
+            for out_idx in range(num_outs):
+                np_score_list.append(
+                    self.predictor.get_output_handle(output_names[out_idx])
+                    .copy_to_cpu())
+                np_boxes_list.append(
+                    self.predictor.get_output_handle(output_names[
+                        out_idx + num_outs]).copy_to_cpu())
+        self.det_times.inference_time_s.end(repeats=repeats)
+        self.det_times.img_num += 1
+        self.det_times.postprocess_time_s.start()
+        self.postprocess = PicoDetPostProcess(
+            inputs['image'].shape[2:],
+            inputs['im_shape'],
+            inputs['scale_factor'],
+            strides=self.pred_config.fpn_stride,
+            nms_threshold=self.pred_config.nms['nms_threshold'])
+        np_boxes, np_boxes_num = self.postprocess(np_score_list, np_boxes_list)
+        self.det_times.postprocess_time_s.end()
+        return dict(boxes=np_boxes, boxes_num=np_boxes_num)
+
+
+def create_inputs(imgs, im_info):
+    """generate input for different model type
+    Args:
+        imgs (list(numpy)): list of images (np.ndarray)
+        im_info (list(dict)): list of image info
+    Returns:
+        inputs (dict): input of model
+    """
+    inputs = {}
+
+    im_shape = []
+    scale_factor = []
+    if len(imgs) == 1:
+        inputs['image'] = np.array((imgs[0], )).astype('float32')
+        inputs['im_shape'] = np.array(
+            (im_info[0]['im_shape'], )).astype('float32')
+        inputs['scale_factor'] = np.array(
+            (im_info[0]['scale_factor'], )).astype('float32')
+        return inputs
+
+    for e in im_info:
+        im_shape.append(np.array((e['im_shape'], )).astype('float32'))
+        scale_factor.append(np.array((e['scale_factor'], )).astype('float32'))
+
+    inputs['im_shape'] = np.concatenate(im_shape, axis=0)
+    inputs['scale_factor'] = np.concatenate(scale_factor, axis=0)
+
+    imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs]
+    max_shape_h = max([e[0] for e in imgs_shape])
+    max_shape_w = max([e[1] for e in imgs_shape])
+    padding_imgs = []
+    for img in imgs:
+        im_c, im_h, im_w = img.shape[:]
+        padding_im = np.zeros(
+            (im_c, max_shape_h, max_shape_w), dtype=np.float32)
+        padding_im[:, :im_h, :im_w] = img
+        padding_imgs.append(padding_im)
+    inputs['image'] = np.stack(padding_imgs, axis=0)
+    return inputs
+
+
+class PredictConfig():
+    """set config of preprocess, postprocess and visualize
+    Args:
+        model_dir (str): root path of model.yml
+    """
+
+    def __init__(self, model_dir):
+        # parsing Yaml config for Preprocess
+        deploy_file = os.path.join(model_dir, 'infer_cfg.yml')
+        with open(deploy_file) as f:
+            yml_conf = yaml.safe_load(f)
+        self.check_model(yml_conf)
+        self.arch = yml_conf['arch']
+        self.preprocess_infos = yml_conf['Preprocess']
+        self.min_subgraph_size = yml_conf['min_subgraph_size']
+        self.labels = yml_conf['label_list']
+        self.mask = False
+        self.use_dynamic_shape = yml_conf['use_dynamic_shape']
+        if 'mask' in yml_conf:
+            self.mask = yml_conf['mask']
+        self.tracker = None
+        if 'tracker' in yml_conf:
+            self.tracker = yml_conf['tracker']
+        if 'NMS' in yml_conf:
+            self.nms = yml_conf['NMS']
+        if 'fpn_stride' in yml_conf:
+            self.fpn_stride = yml_conf['fpn_stride']
+        self.print_config()
+
+    def check_model(self, yml_conf):
+        """
+        Raises:
+            ValueError: loaded model not in supported model type 
+        """
+        for support_model in SUPPORT_MODELS:
+            if support_model in yml_conf['arch']:
+                return True
+        raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[
+            'arch'], SUPPORT_MODELS))
+
+    def print_config(self):
+        print('-----------  Model Configuration -----------')
+        print('%s: %s' % ('Model Arch', self.arch))
+        print('%s: ' % ('Transform Order'))
+        for op_info in self.preprocess_infos:
+            print('--%s: %s' % ('transform op', op_info['type']))
+        print('--------------------------------------------')
+
+
+def load_predictor(model_dir,
+                   run_mode='fluid',
+                   batch_size=1,
+                   device='CPU',
+                   min_subgraph_size=3,
+                   use_dynamic_shape=False,
+                   trt_min_shape=1,
+                   trt_max_shape=1280,
+                   trt_opt_shape=640,
+                   trt_calib_mode=False,
+                   cpu_threads=1,
+                   enable_mkldnn=False):
+    """set AnalysisConfig, generate AnalysisPredictor
+    Args:
+        model_dir (str): root path of __model__ and __params__
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16/trt_int8)
+        use_dynamic_shape (bool): use dynamic shape or not
+        trt_min_shape (int): min shape for dynamic shape in trt
+        trt_max_shape (int): max shape for dynamic shape in trt
+        trt_opt_shape (int): opt shape for dynamic shape in trt
+        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
+            calibration, trt_calib_mode need to set True
+    Returns:
+        predictor (PaddlePredictor): AnalysisPredictor
+    Raises:
+        ValueError: predict by TensorRT need device == 'GPU'.
+    """
+    if device != 'GPU' and run_mode != 'fluid':
+        raise ValueError(
+            "Predict by TensorRT mode: {}, expect device=='GPU', but device == {}"
+            .format(run_mode, device))
+    config = Config(
+        os.path.join(model_dir, 'model.pdmodel'),
+        os.path.join(model_dir, 'model.pdiparams'))
+    if device == 'GPU':
+        # initial GPU memory(M), device ID
+        config.enable_use_gpu(200, 0)
+        # optimize graph and fuse op
+        config.switch_ir_optim(True)
+    elif device == 'XPU':
+        config.enable_xpu(10 * 1024 * 1024)
+    else:
+        config.disable_gpu()
+        config.set_cpu_math_library_num_threads(cpu_threads)
+        if enable_mkldnn:
+            try:
+                # cache 10 different shapes for mkldnn to avoid memory leak
+                config.set_mkldnn_cache_capacity(10)
+                config.enable_mkldnn()
+            except Exception as e:
+                print(
+                    "The current environment does not support `mkldnn`, so disable mkldnn."
+                )
+                pass
+
+    precision_map = {
+        'trt_int8': Config.Precision.Int8,
+        'trt_fp32': Config.Precision.Float32,
+        'trt_fp16': Config.Precision.Half
+    }
+    if run_mode in precision_map.keys():
+        config.enable_tensorrt_engine(
+            workspace_size=1 << 10,
+            max_batch_size=batch_size,
+            min_subgraph_size=min_subgraph_size,
+            precision_mode=precision_map[run_mode],
+            use_static=False,
+            use_calib_mode=trt_calib_mode)
+
+        if use_dynamic_shape:
+            min_input_shape = {
+                'image': [batch_size, 3, trt_min_shape, trt_min_shape]
+            }
+            max_input_shape = {
+                'image': [batch_size, 3, trt_max_shape, trt_max_shape]
+            }
+            opt_input_shape = {
+                'image': [batch_size, 3, trt_opt_shape, trt_opt_shape]
+            }
+            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape,
+                                              opt_input_shape)
+            print('trt set dynamic shape done!')
+
+    # disable print log when predict
+    config.disable_glog_info()
+    # enable shared memory
+    config.enable_memory_optim()
+    # disable feed, fetch OP, needed by zero_copy_run
+    config.switch_use_feed_fetch_ops(False)
+    predictor = create_predictor(config)
+    return predictor, config
+
+
+def get_test_images(infer_dir, infer_img):
+    """
+    Get image path list in TEST mode
+    """
+    assert infer_img is not None or infer_dir is not None, \
+        "--infer_img or --infer_dir should be set"
+    assert infer_img is None or os.path.isfile(infer_img), \
+            "{} is not a file".format(infer_img)
+    assert infer_dir is None or os.path.isdir(infer_dir), \
+            "{} is not a directory".format(infer_dir)
+
+    # infer_img has a higher priority
+    if infer_img and os.path.isfile(infer_img):
+        return [infer_img]
+
+    images = set()
+    infer_dir = os.path.abspath(infer_dir)
+    assert os.path.isdir(infer_dir), \
+        "infer_dir {} is not a directory".format(infer_dir)
+    exts = ['jpg', 'jpeg', 'png', 'bmp']
+    exts += [ext.upper() for ext in exts]
+    for ext in exts:
+        images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
+    images = list(images)
+
+    assert len(images) > 0, "no image found in {}".format(infer_dir)
+    print("Found {} inference images in total.".format(len(images)))
+
+    return images
+
+
+def visualize(image_list, results, labels, output_dir='output/', threshold=0.5):
+    # visualize the predict result
+    start_idx = 0
+    for idx, image_file in enumerate(image_list):
+        im_bboxes_num = results['boxes_num'][idx]
+        im_results = {}
+        if 'boxes' in results:
+            im_results['boxes'] = results['boxes'][start_idx:start_idx +
+                                                   im_bboxes_num, :]
+        if 'label' in results:
+            im_results['label'] = results['label'][start_idx:start_idx +
+                                                   im_bboxes_num]
+        if 'score' in results:
+            im_results['score'] = results['score'][start_idx:start_idx +
+                                                   im_bboxes_num]
+
+        start_idx += im_bboxes_num
+        im = visualize_box_mask(
+            image_file, im_results, labels, threshold=threshold)
+        img_name = os.path.split(image_file)[-1]
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+        out_path = os.path.join(output_dir, img_name)
+        im.save(out_path, quality=95)
+        print("save result to: " + out_path)
+
+
+def print_arguments(args):
+    print('-----------  Running Arguments -----------')
+    for arg, value in sorted(vars(args).items()):
+        print('%s: %s' % (arg, value))
+    print('------------------------------------------')
+
+
+def predict_image(detector, image_list, batch_size=1):
+    batch_loop_cnt = math.ceil(float(len(image_list)) / batch_size)
+    for i in range(batch_loop_cnt):
+        start_index = i * batch_size
+        end_index = min((i + 1) * batch_size, len(image_list))
+        batch_image_list = image_list[start_index:end_index]
+        if FLAGS.run_benchmark:
+            detector.predict(
+                batch_image_list, FLAGS.threshold, warmup=10, repeats=10)
+            cm, gm, gu = get_current_memory_mb()
+            detector.cpu_mem += cm
+            detector.gpu_mem += gm
+            detector.gpu_util += gu
+            print('Test iter {}'.format(i))
+        else:
+            results = detector.predict(batch_image_list, FLAGS.threshold)
+            visualize(
+                batch_image_list,
+                results,
+                detector.pred_config.labels,
+                output_dir=FLAGS.output_dir,
+                threshold=FLAGS.threshold)
+
+
+def predict_video(detector, camera_id):
+    video_out_name = 'output.mp4'
+    if camera_id != -1:
+        capture = cv2.VideoCapture(camera_id)
+    else:
+        capture = cv2.VideoCapture(FLAGS.video_file)
+        video_out_name = os.path.split(FLAGS.video_file)[-1]
+    # Get Video info : resolution, fps, frame count
+    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fps = int(capture.get(cv2.CAP_PROP_FPS))
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    print("fps: %d, frame_count: %d" % (fps, frame_count))
+
+    if not os.path.exists(FLAGS.output_dir):
+        os.makedirs(FLAGS.output_dir)
+    out_path = os.path.join(FLAGS.output_dir, video_out_name)
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
+    index = 1
+    while (1):
+        ret, frame = capture.read()
+        if not ret:
+            break
+        print('detect frame: %d' % (index))
+        index += 1
+        results = detector.predict([frame], FLAGS.threshold)
+        im = visualize_box_mask(
+            frame,
+            results,
+            detector.pred_config.labels,
+            threshold=FLAGS.threshold)
+        im = np.array(im)
+        writer.write(im)
+        if camera_id != -1:
+            cv2.imshow('Mask Detection', im)
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                break
+    writer.release()
+
+
+def main():
+    pred_config = PredictConfig(FLAGS.model_dir)
+    detector_func = 'Detector'
+    if pred_config.arch == 'PicoDet':
+        detector_func = 'DetectorPicoDet'
+
+    detector = eval(detector_func)(pred_config,
+                                   FLAGS.model_dir,
+                                   device=FLAGS.device,
+                                   run_mode=FLAGS.run_mode,
+                                   batch_size=FLAGS.batch_size,
+                                   trt_min_shape=FLAGS.trt_min_shape,
+                                   trt_max_shape=FLAGS.trt_max_shape,
+                                   trt_opt_shape=FLAGS.trt_opt_shape,
+                                   trt_calib_mode=FLAGS.trt_calib_mode,
+                                   cpu_threads=FLAGS.cpu_threads,
+                                   enable_mkldnn=FLAGS.enable_mkldnn)
+
+    # predict from video file or camera video stream
+    if FLAGS.video_file is not None or FLAGS.camera_id != -1:
+        predict_video(detector, FLAGS.camera_id)
+    else:
+        # predict from image
+        if FLAGS.image_dir is None and FLAGS.image_file is not None:
+            assert FLAGS.batch_size == 1, "batch_size should be 1, when image_file is not None"
+        img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
+        predict_image(detector, img_list, FLAGS.batch_size)
+        if not FLAGS.run_benchmark:
+            detector.det_times.info(average=True)
+        else:
+            mems = {
+                'cpu_rss_mb': detector.cpu_mem / len(img_list),
+                'gpu_rss_mb': detector.gpu_mem / len(img_list),
+                'gpu_util': detector.gpu_util * 100 / len(img_list)
+            }
+
+            perf_info = detector.det_times.report(average=True)
+            model_dir = FLAGS.model_dir
+            mode = FLAGS.run_mode
+            model_info = {
+                'model_name': model_dir.strip('/').split('/')[-1],
+                'precision': mode.split('_')[-1]
+            }
+            data_info = {
+                'batch_size': FLAGS.batch_size,
+                'shape': "dynamic_shape",
+                'data_num': perf_info['img_num']
+            }
+            det_log = PaddleInferBenchmark(detector.config, model_info,
+                                           data_info, perf_info, mems)
+            det_log('Det')
+
+
+if __name__ == '__main__':
+    paddle.enable_static()
+    parser = argsparser()
+    FLAGS = parser.parse_args()
+    print_arguments(FLAGS)
+    FLAGS.device = FLAGS.device.upper()
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+                            ], "device should be CPU, GPU or XPU"
+    main()
--- a/deploy/pptracking/python/mot/__init__.py
+++ b/deploy/pptracking/python/mot/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import matching
+from . import tracker
+from . import motion
+from . import utils
+from . import mtmct
+
+from .matching import *
+from .tracker import *
+from .motion import *
+from .utils import *
+from .mtmct import *
--- a/deploy/pptracking/python/mot/matching/__init__.py
+++ b/deploy/pptracking/python/mot/matching/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import jde_matching
+from . import deepsort_matching
+
+from .jde_matching import *
+from .deepsort_matching import *
--- a/deploy/pptracking/python/mot/matching/deepsort_matching.py
+++ b/deploy/pptracking/python/mot/matching/deepsort_matching.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/nwojke/deep_sort/tree/master/deep_sort
+"""
+
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+from ..motion import kalman_filter
+
+INFTY_COST = 1e+5
+
+__all__ = [
+    'iou_1toN',
+    'iou_cost',
+    '_nn_euclidean_distance',
+    '_nn_cosine_distance',
+    'NearestNeighborDistanceMetric',
+    'min_cost_matching',
+    'matching_cascade',
+    'gate_cost_matrix',
+]
+
+
+def iou_1toN(bbox, candidates):
+    """
+    Computer intersection over union (IoU) by one box to N candidates.
+
+    Args:
+        bbox (ndarray): A bounding box in format `(top left x, top left y, width, height)`.
+            candidates (ndarray): A matrix of candidate bounding boxes (one per row) in the
+            same format as `bbox`.
+
+    Returns:
+        ious (ndarray): The intersection over union in [0, 1] between the `bbox`
+            and each candidate. A higher score means a larger fraction of the
+            `bbox` is occluded by the candidate.
+    """
+    bbox_tl = bbox[:2]
+    bbox_br = bbox[:2] + bbox[2:]
+    candidates_tl = candidates[:, :2]
+    candidates_br = candidates[:, :2] + candidates[:, 2:]
+
+    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
+               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
+    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
+               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
+    wh = np.maximum(0., br - tl)
+
+    area_intersection = wh.prod(axis=1)
+    area_bbox = bbox[2:].prod()
+    area_candidates = candidates[:, 2:].prod(axis=1)
+    ious = area_intersection / (area_bbox + area_candidates - area_intersection)
+    return ious
+
+
+def iou_cost(tracks, detections, track_indices=None, detection_indices=None):
+    """
+    IoU distance metric.
+
+    Args:
+        tracks (list[Track]): A list of tracks.
+        detections (list[Detection]): A list of detections.
+        track_indices (Optional[list[int]]): A list of indices to tracks that
+            should be matched. Defaults to all `tracks`.
+        detection_indices (Optional[list[int]]): A list of indices to detections
+            that should be matched. Defaults to all `detections`.
+
+    Returns:
+        cost_matrix (ndarray): A cost matrix of shape len(track_indices), 
+            len(detection_indices) where entry (i, j) is 
+            `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
+    """
+    if track_indices is None:
+        track_indices = np.arange(len(tracks))
+    if detection_indices is None:
+        detection_indices = np.arange(len(detections))
+
+    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
+    for row, track_idx in enumerate(track_indices):
+        if tracks[track_idx].time_since_update > 1:
+            cost_matrix[row, :] = 1e+5
+            continue
+
+        bbox = tracks[track_idx].to_tlwh()
+        candidates = np.asarray([detections[i].tlwh for i in detection_indices])
+        cost_matrix[row, :] = 1. - iou_1toN(bbox, candidates)
+    return cost_matrix
+
+
+def _nn_euclidean_distance(s, q):
+    """
+    Compute pair-wise squared (Euclidean) distance between points in `s` and `q`.
+
+    Args:
+        s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M.
+        q (ndarray): Query points: an LxM matrix of L samples of dimensionality M.
+
+    Returns:
+        distances (ndarray): A vector of length M that contains for each entry in `q` the
+            smallest Euclidean distance to a sample in `s`.
+    """
+    s, q = np.asarray(s), np.asarray(q)
+    if len(s) == 0 or len(q) == 0:
+        return np.zeros((len(s), len(q)))
+    s2, q2 = np.square(s).sum(axis=1), np.square(q).sum(axis=1)
+    distances = -2. * np.dot(s, q.T) + s2[:, None] + q2[None, :]
+    distances = np.clip(distances, 0., float(np.inf))
+
+    return np.maximum(0.0, distances.min(axis=0))
+
+
+def _nn_cosine_distance(s, q):
+    """
+    Compute pair-wise cosine distance between points in `s` and `q`.
+
+    Args:
+        s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M.
+        q (ndarray): Query points: an LxM matrix of L samples of dimensionality M.
+
+    Returns:
+        distances (ndarray): A vector of length M that contains for each entry in `q` the
+            smallest Euclidean distance to a sample in `s`.
+    """
+    s = np.asarray(s) / np.linalg.norm(s, axis=1, keepdims=True)
+    q = np.asarray(q) / np.linalg.norm(q, axis=1, keepdims=True)
+    distances = 1. - np.dot(s, q.T)
+
+    return distances.min(axis=0)
+
+
+class NearestNeighborDistanceMetric(object):
+    """
+    A nearest neighbor distance metric that, for each target, returns
+    the closest distance to any sample that has been observed so far.
+
+    Args:
+        metric (str): Either "euclidean" or "cosine".
+        matching_threshold (float): The matching threshold. Samples with larger
+            distance are considered an invalid match.
+        budget (Optional[int]): If not None, fix samples per class to at most
+            this number. Removes the oldest samples when the budget is reached.
+
+    Attributes: 
+        samples (Dict[int -> List[ndarray]]): A dictionary that maps from target
+            identities to the list of samples that have been observed so far.
+    """
+
+    def __init__(self, metric, matching_threshold, budget=None):
+        if metric == "euclidean":
+            self._metric = _nn_euclidean_distance
+        elif metric == "cosine":
+            self._metric = _nn_cosine_distance
+        else:
+            raise ValueError(
+                "Invalid metric; must be either 'euclidean' or 'cosine'")
+        self.matching_threshold = matching_threshold
+        self.budget = budget
+        self.samples = {}
+
+    def partial_fit(self, features, targets, active_targets):
+        """
+        Update the distance metric with new data.
+
+        Args:
+            features (ndarray): An NxM matrix of N features of dimensionality M.
+            targets (ndarray): An integer array of associated target identities.
+            active_targets (List[int]): A list of targets that are currently
+                present in the scene.
+        """
+        for feature, target in zip(features, targets):
+            self.samples.setdefault(target, []).append(feature)
+            if self.budget is not None:
+                self.samples[target] = self.samples[target][-self.budget:]
+        self.samples = {k: self.samples[k] for k in active_targets}
+
+    def distance(self, features, targets):
+        """
+        Compute distance between features and targets.
+
+        Args:
+            features (ndarray): An NxM matrix of N features of dimensionality M.
+            targets (list[int]): A list of targets to match the given `features` against.
+
+        Returns:
+            cost_matrix (ndarray): a cost matrix of shape len(targets), len(features),
+                where element (i, j) contains the closest squared distance between
+                `targets[i]` and `features[j]`.
+        """
+        cost_matrix = np.zeros((len(targets), len(features)))
+        for i, target in enumerate(targets):
+            cost_matrix[i, :] = self._metric(self.samples[target], features)
+        return cost_matrix
+
+
+def min_cost_matching(distance_metric,
+                      max_distance,
+                      tracks,
+                      detections,
+                      track_indices=None,
+                      detection_indices=None):
+    """
+    Solve linear assignment problem.
+
+    Args:
+        distance_metric :
+            Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+            The distance metric is given a list of tracks and detections as 
+            well as a list of N track indices and M detection indices. The 
+            metric should return the NxM dimensional cost matrix, where element
+            (i, j) is the association cost between the i-th track in the given
+            track indices and the j-th detection in the given detection_indices.
+        max_distance (float): Gating threshold. Associations with cost larger
+            than this value are disregarded.
+        tracks (list[Track]): A list of predicted tracks at the current time
+            step.
+        detections (list[Detection]): A list of detections at the current time
+            step.
+        track_indices (list[int]): List of track indices that maps rows in
+            `cost_matrix` to tracks in `tracks`.
+        detection_indices (List[int]): List of detection indices that maps
+            columns in `cost_matrix` to detections in `detections`.
+
+    Returns:
+        A tuple (List[(int, int)], List[int], List[int]) with the following
+        three entries:
+            * A list of matched track and detection indices.
+            * A list of unmatched track indices.
+            * A list of unmatched detection indices.
+    """
+    if track_indices is None:
+        track_indices = np.arange(len(tracks))
+    if detection_indices is None:
+        detection_indices = np.arange(len(detections))
+
+    if len(detection_indices) == 0 or len(track_indices) == 0:
+        return [], track_indices, detection_indices  # Nothing to match.
+
+    cost_matrix = distance_metric(tracks, detections, track_indices,
+                                  detection_indices)
+
+    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
+    indices = linear_sum_assignment(cost_matrix)
+
+    matches, unmatched_tracks, unmatched_detections = [], [], []
+    for col, detection_idx in enumerate(detection_indices):
+        if col not in indices[1]:
+            unmatched_detections.append(detection_idx)
+    for row, track_idx in enumerate(track_indices):
+        if row not in indices[0]:
+            unmatched_tracks.append(track_idx)
+    for row, col in zip(indices[0], indices[1]):
+        track_idx = track_indices[row]
+        detection_idx = detection_indices[col]
+        if cost_matrix[row, col] > max_distance:
+            unmatched_tracks.append(track_idx)
+            unmatched_detections.append(detection_idx)
+        else:
+            matches.append((track_idx, detection_idx))
+    return matches, unmatched_tracks, unmatched_detections
+
+
+def matching_cascade(distance_metric,
+                     max_distance,
+                     cascade_depth,
+                     tracks,
+                     detections,
+                     track_indices=None,
+                     detection_indices=None):
+    """
+    Run matching cascade.
+
+    Args:
+        distance_metric :
+            Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
+            The distance metric is given a list of tracks and detections as 
+            well as a list of N track indices and M detection indices. The 
+            metric should return the NxM dimensional cost matrix, where element
+            (i, j) is the association cost between the i-th track in the given
+            track indices and the j-th detection in the given detection_indices.
+        max_distance (float): Gating threshold. Associations with cost larger
+            than this value are disregarded.
+        cascade_depth (int): The cascade depth, should be se to the maximum
+            track age.
+        tracks (list[Track]): A list of predicted tracks at the current time
+            step.
+        detections (list[Detection]): A list of detections at the current time
+            step.
+        track_indices (list[int]): List of track indices that maps rows in
+            `cost_matrix` to tracks in `tracks`.
+        detection_indices (List[int]): List of detection indices that maps
+            columns in `cost_matrix` to detections in `detections`.
+
+    Returns:
+        A tuple (List[(int, int)], List[int], List[int]) with the following
+        three entries:
+            * A list of matched track and detection indices.
+            * A list of unmatched track indices.
+            * A list of unmatched detection indices.
+    """
+    if track_indices is None:
+        track_indices = list(range(len(tracks)))
+    if detection_indices is None:
+        detection_indices = list(range(len(detections)))
+
+    unmatched_detections = detection_indices
+    matches = []
+    for level in range(cascade_depth):
+        if len(unmatched_detections) == 0:  # No detections left
+            break
+
+        track_indices_l = [
+            k for k in track_indices if tracks[k].time_since_update == 1 + level
+        ]
+        if len(track_indices_l) == 0:  # Nothing to match at this level
+            continue
+
+        matches_l, _, unmatched_detections = \
+            min_cost_matching(
+                distance_metric, max_distance, tracks, detections,
+                track_indices_l, unmatched_detections)
+        matches += matches_l
+    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
+    return matches, unmatched_tracks, unmatched_detections
+
+
+def gate_cost_matrix(kf,
+                     cost_matrix,
+                     tracks,
+                     detections,
+                     track_indices,
+                     detection_indices,
+                     gated_cost=INFTY_COST,
+                     only_position=False):
+    """
+    Invalidate infeasible entries in cost matrix based on the state
+    distributions obtained by Kalman filtering.
+
+    Args:
+        kf (object): The Kalman filter.
+        cost_matrix (ndarray): The NxM dimensional cost matrix, where N is the
+            number of track indices and M is the number of detection indices,
+            such that entry (i, j) is the association cost between
+            `tracks[track_indices[i]]` and `detections[detection_indices[j]]`.
+        tracks (list[Track]): A list of predicted tracks at the current time
+            step.
+        detections (list[Detection]): A list of detections at the current time
+            step.
+        track_indices (List[int]): List of track indices that maps rows in
+            `cost_matrix` to tracks in `tracks`.
+        detection_indices (List[int]): List of detection indices that maps
+            columns in `cost_matrix` to detections in `detections`.
+        gated_cost (Optional[float]): Entries in the cost matrix corresponding
+            to infeasible associations are set this value. Defaults to a very
+            large value.
+        only_position (Optional[bool]): If True, only the x, y position of the
+            state distribution is considered during gating. Default False.
+    """
+    gating_dim = 2 if only_position else 4
+    gating_threshold = kalman_filter.chi2inv95[gating_dim]
+    measurements = np.asarray(
+        [detections[i].to_xyah() for i in detection_indices])
+    for row, track_idx in enumerate(track_indices):
+        track = tracks[track_idx]
+        gating_distance = kf.gating_distance(track.mean, track.covariance,
+                                             measurements, only_position)
+        cost_matrix[row, gating_distance > gating_threshold] = gated_cost
+    return cost_matrix
--- a/deploy/pptracking/python/mot/matching/jde_matching.py
+++ b/deploy/pptracking/python/mot/matching/jde_matching.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/matching.py
+"""
+
+import lap
+import scipy
+import numpy as np
+from scipy.spatial.distance import cdist
+from ..motion import kalman_filter
+
+__all__ = [
+    'merge_matches',
+    'linear_assignment',
+    'cython_bbox_ious',
+    'iou_distance',
+    'embedding_distance',
+    'fuse_motion',
+]
+
+
+def merge_matches(m1, m2, shape):
+    O, P, Q = shape
+    m1 = np.asarray(m1)
+    m2 = np.asarray(m2)
+
+    M1 = scipy.sparse.coo_matrix(
+        (np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
+    M2 = scipy.sparse.coo_matrix(
+        (np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
+
+    mask = M1 * M2
+    match = mask.nonzero()
+    match = list(zip(match[0], match[1]))
+    unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
+    unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
+
+    return match, unmatched_O, unmatched_Q
+
+
+def linear_assignment(cost_matrix, thresh):
+    if cost_matrix.size == 0:
+        return np.empty(
+            (0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(
+                range(cost_matrix.shape[1]))
+    matches, unmatched_a, unmatched_b = [], [], []
+    cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
+    for ix, mx in enumerate(x):
+        if mx >= 0:
+            matches.append([ix, mx])
+    unmatched_a = np.where(x < 0)[0]
+    unmatched_b = np.where(y < 0)[0]
+    matches = np.asarray(matches)
+    return matches, unmatched_a, unmatched_b
+
+
+def cython_bbox_ious(atlbrs, btlbrs):
+    ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
+    if ious.size == 0:
+        return ious
+    try:
+        import cython_bbox
+    except Exception as e:
+        print('cython_bbox not found, please install cython_bbox.'
+              'for example: `pip install cython_bbox`.')
+        exit()
+
+    ious = cython_bbox.bbox_overlaps(
+        np.ascontiguousarray(
+            atlbrs, dtype=np.float),
+        np.ascontiguousarray(
+            btlbrs, dtype=np.float))
+    return ious
+
+
+def iou_distance(atracks, btracks):
+    """
+    Compute cost based on IoU between two list[STrack].
+    """
+    if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (
+            len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
+        atlbrs = atracks
+        btlbrs = btracks
+    else:
+        atlbrs = [track.tlbr for track in atracks]
+        btlbrs = [track.tlbr for track in btracks]
+    _ious = cython_bbox_ious(atlbrs, btlbrs)
+    cost_matrix = 1 - _ious
+
+    return cost_matrix
+
+
+def embedding_distance(tracks, detections, metric='euclidean'):
+    """
+    Compute cost based on features between two list[STrack].
+    """
+    cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
+    if cost_matrix.size == 0:
+        return cost_matrix
+    det_features = np.asarray(
+        [track.curr_feat for track in detections], dtype=np.float)
+    track_features = np.asarray(
+        [track.smooth_feat for track in tracks], dtype=np.float)
+    cost_matrix = np.maximum(0.0, cdist(track_features, det_features,
+                                        metric))  # Nomalized features
+    return cost_matrix
+
+
+def fuse_motion(kf,
+                cost_matrix,
+                tracks,
+                detections,
+                only_position=False,
+                lambda_=0.98):
+    if cost_matrix.size == 0:
+        return cost_matrix
+    gating_dim = 2 if only_position else 4
+    gating_threshold = kalman_filter.chi2inv95[gating_dim]
+    measurements = np.asarray([det.to_xyah() for det in detections])
+    for row, track in enumerate(tracks):
+        gating_distance = kf.gating_distance(
+            track.mean,
+            track.covariance,
+            measurements,
+            only_position,
+            metric='maha')
+        cost_matrix[row, gating_distance > gating_threshold] = np.inf
+        cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_
+                                                         ) * gating_distance
+    return cost_matrix
--- a/deploy/pptracking/python/mot/motion/__init__.py
+++ b/deploy/pptracking/python/mot/motion/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import kalman_filter
+
+from .kalman_filter import *
--- a/deploy/pptracking/python/mot/motion/kalman_filter.py
+++ b/deploy/pptracking/python/mot/motion/kalman_filter.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/kalman_filter.py
+"""
+
+import numpy as np
+import scipy.linalg
+
+__all__ = ['KalmanFilter']
+"""
+Table for the 0.95 quantile of the chi-square distribution with N degrees of
+freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
+function and used as Mahalanobis gating threshold.
+"""
+
+chi2inv95 = {
+    1: 3.8415,
+    2: 5.9915,
+    3: 7.8147,
+    4: 9.4877,
+    5: 11.070,
+    6: 12.592,
+    7: 14.067,
+    8: 15.507,
+    9: 16.919
+}
+
+
+class KalmanFilter(object):
+    """
+    A simple Kalman filter for tracking bounding boxes in image space.
+
+    The 8-dimensional state space
+
+        x, y, a, h, vx, vy, va, vh
+
+    contains the bounding box center position (x, y), aspect ratio a, height h,
+    and their respective velocities.
+
+    Object motion follows a constant velocity model. The bounding box location
+    (x, y, a, h) is taken as direct observation of the state space (linear
+    observation model).
+
+    """
+
+    def __init__(self):
+        ndim, dt = 4, 1.
+
+        # Create Kalman filter model matrices.
+        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
+        for i in range(ndim):
+            self._motion_mat[i, ndim + i] = dt
+        self._update_mat = np.eye(ndim, 2 * ndim)
+
+        # Motion and observation uncertainty are chosen relative to the current
+        # state estimate. These weights control the amount of uncertainty in
+        # the model. This is a bit hacky.
+        self._std_weight_position = 1. / 20
+        self._std_weight_velocity = 1. / 160
+
+    def initiate(self, measurement):
+        """
+        Create track from unassociated measurement.
+
+        Args:
+            measurement (ndarray): Bounding box coordinates (x, y, a, h) with
+                center position (x, y), aspect ratio a, and height h.
+
+        Returns:
+            The mean vector (8 dimensional) and covariance matrix (8x8
+            dimensional) of the new track. Unobserved velocities are 
+            initialized to 0 mean.
+        """
+        mean_pos = measurement
+        mean_vel = np.zeros_like(mean_pos)
+        mean = np.r_[mean_pos, mean_vel]
+
+        std = [
+            2 * self._std_weight_position * measurement[3],
+            2 * self._std_weight_position * measurement[3], 1e-2,
+            2 * self._std_weight_position * measurement[3],
+            10 * self._std_weight_velocity * measurement[3],
+            10 * self._std_weight_velocity * measurement[3], 1e-5,
+            10 * self._std_weight_velocity * measurement[3]
+        ]
+        covariance = np.diag(np.square(std))
+        return mean, covariance
+
+    def predict(self, mean, covariance):
+        """
+        Run Kalman filter prediction step.
+
+        Args:
+            mean (ndarray): The 8 dimensional mean vector of the object state
+                at the previous time step.
+            covariance (ndarray): The 8x8 dimensional covariance matrix of the
+                object state at the previous time step.
+
+        Returns:
+            The mean vector and covariance matrix of the predicted state. 
+            Unobserved velocities are initialized to 0 mean.
+        """
+        std_pos = [
+            self._std_weight_position * mean[3], self._std_weight_position *
+            mean[3], 1e-2, self._std_weight_position * mean[3]
+        ]
+        std_vel = [
+            self._std_weight_velocity * mean[3], self._std_weight_velocity *
+            mean[3], 1e-5, self._std_weight_velocity * mean[3]
+        ]
+        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
+
+        #mean = np.dot(self._motion_mat, mean)
+        mean = np.dot(mean, self._motion_mat.T)
+        covariance = np.linalg.multi_dot(
+            (self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
+
+        return mean, covariance
+
+    def project(self, mean, covariance):
+        """
+        Project state distribution to measurement space.
+
+        Args
+            mean (ndarray): The state's mean vector (8 dimensional array).
+            covariance (ndarray): The state's covariance matrix (8x8 dimensional).
+
+        Returns:
+            The projected mean and covariance matrix of the given state estimate.
+        """
+        std = [
+            self._std_weight_position * mean[3], self._std_weight_position *
+            mean[3], 1e-1, self._std_weight_position * mean[3]
+        ]
+        innovation_cov = np.diag(np.square(std))
+
+        mean = np.dot(self._update_mat, mean)
+        covariance = np.linalg.multi_dot((self._update_mat, covariance,
+                                          self._update_mat.T))
+        return mean, covariance + innovation_cov
+
+    def multi_predict(self, mean, covariance):
+        """
+        Run Kalman filter prediction step (Vectorized version).
+        
+        Args:
+            mean (ndarray): The Nx8 dimensional mean matrix of the object states
+                at the previous time step.
+            covariance (ndarray): The Nx8x8 dimensional covariance matrics of the
+                object states at the previous time step.
+
+        Returns:
+            The mean vector and covariance matrix of the predicted state.
+            Unobserved velocities are initialized to 0 mean.
+        """
+        std_pos = [
+            self._std_weight_position * mean[:, 3], self._std_weight_position *
+            mean[:, 3], 1e-2 * np.ones_like(mean[:, 3]),
+            self._std_weight_position * mean[:, 3]
+        ]
+        std_vel = [
+            self._std_weight_velocity * mean[:, 3], self._std_weight_velocity *
+            mean[:, 3], 1e-5 * np.ones_like(mean[:, 3]),
+            self._std_weight_velocity * mean[:, 3]
+        ]
+        sqr = np.square(np.r_[std_pos, std_vel]).T
+
+        motion_cov = []
+        for i in range(len(mean)):
+            motion_cov.append(np.diag(sqr[i]))
+        motion_cov = np.asarray(motion_cov)
+
+        mean = np.dot(mean, self._motion_mat.T)
+        left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
+        covariance = np.dot(left, self._motion_mat.T) + motion_cov
+
+        return mean, covariance
+
+    def update(self, mean, covariance, measurement):
+        """
+        Run Kalman filter correction step.
+
+        Args:
+            mean (ndarray): The predicted state's mean vector (8 dimensional).
+            covariance (ndarray): The state's covariance matrix (8x8 dimensional).
+            measurement (ndarray): The 4 dimensional measurement vector
+                (x, y, a, h), where (x, y) is the center position, a the aspect
+                ratio, and h the height of the bounding box.
+
+        Returns:
+            The measurement-corrected state distribution.
+        """
+        projected_mean, projected_cov = self.project(mean, covariance)
+
+        chol_factor, lower = scipy.linalg.cho_factor(
+            projected_cov, lower=True, check_finite=False)
+        kalman_gain = scipy.linalg.cho_solve(
+            (chol_factor, lower),
+            np.dot(covariance, self._update_mat.T).T,
+            check_finite=False).T
+        innovation = measurement - projected_mean
+
+        new_mean = mean + np.dot(innovation, kalman_gain.T)
+        new_covariance = covariance - np.linalg.multi_dot(
+            (kalman_gain, projected_cov, kalman_gain.T))
+        return new_mean, new_covariance
+
+    def gating_distance(self,
+                        mean,
+                        covariance,
+                        measurements,
+                        only_position=False,
+                        metric='maha'):
+        """
+        Compute gating distance between state distribution and measurements.
+        A suitable distance threshold can be obtained from `chi2inv95`. If
+        `only_position` is False, the chi-square distribution has 4 degrees of
+        freedom, otherwise 2.
+        
+        Args:
+            mean (ndarray): Mean vector over the state distribution (8
+                dimensional).
+            covariance (ndarray): Covariance of the state distribution (8x8
+                dimensional).
+            measurements (ndarray): An Nx4 dimensional matrix of N measurements,
+                each in format (x, y, a, h) where (x, y) is the bounding box center
+                position, a the aspect ratio, and h the height.
+            only_position (Optional[bool]): If True, distance computation is 
+                done with respect to the bounding box center position only.
+            metric (str): Metric type, 'gaussian' or 'maha'.
+
+        Returns
+            An array of length N, where the i-th element contains the squared
+            Mahalanobis distance between (mean, covariance) and `measurements[i]`.
+        """
+        mean, covariance = self.project(mean, covariance)
+        if only_position:
+            mean, covariance = mean[:2], covariance[:2, :2]
+            measurements = measurements[:, :2]
+
+        d = measurements - mean
+        if metric == 'gaussian':
+            return np.sum(d * d, axis=1)
+        elif metric == 'maha':
+            cholesky_factor = np.linalg.cholesky(covariance)
+            z = scipy.linalg.solve_triangular(
+                cholesky_factor,
+                d.T,
+                lower=True,
+                check_finite=False,
+                overwrite_b=True)
+            squared_maha = np.sum(z * z, axis=0)
+            return squared_maha
+        else:
+            raise ValueError('invalid distance metric')
--- a/deploy/pptracking/python/mot/mtmct/__init__.py
+++ b/deploy/pptracking/python/mot/mtmct/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import utils
+from . import postprocess
+from .utils import *
+from .postprocess import *
+
+# The following codes are strongly related to zone and camera parameters
+from . import camera_utils
+from . import zone
+from .camera_utils import *
+from .zone import *
--- a/deploy/pptracking/python/mot/mtmct/camera_utils.py
+++ b/deploy/pptracking/python/mot/mtmct/camera_utils.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
+"""
+
+import numpy as np
+from sklearn.cluster import AgglomerativeClustering
+from .utils import get_dire, get_match, get_cid_tid, combin_feature, combin_cluster
+from .utils import normalize, intracam_ignore, visual_rerank
+
+__all__ = [
+    'st_filter',
+    'get_labels_with_camera',
+]
+
+CAM_DIST = [[0, 40, 55, 100, 120, 145], [40, 0, 15, 60, 80, 105],
+            [55, 15, 0, 40, 65, 90], [100, 60, 40, 0, 20, 45],
+            [120, 80, 65, 20, 0, 25], [145, 105, 90, 45, 25, 0]]
+
+
+def st_filter(st_mask, cid_tids, cid_tid_dict):
+    count = len(cid_tids)
+    for i in range(count):
+        i_tracklet = cid_tid_dict[cid_tids[i]]
+        i_cid = i_tracklet['cam']
+        i_dire = get_dire(i_tracklet['zone_list'], i_cid)
+        i_iot = i_tracklet['io_time']
+        for j in range(count):
+            j_tracklet = cid_tid_dict[cid_tids[j]]
+            j_cid = j_tracklet['cam']
+            j_dire = get_dire(j_tracklet['zone_list'], j_cid)
+            j_iot = j_tracklet['io_time']
+
+            match_dire = True
+            cam_dist = CAM_DIST[i_cid - 41][j_cid - 41]
+            # if time overlopped
+            if i_iot[0] - cam_dist < j_iot[0] and j_iot[0] < i_iot[
+                    1] + cam_dist:
+                match_dire = False
+            if i_iot[0] - cam_dist < j_iot[1] and j_iot[1] < i_iot[
+                    1] + cam_dist:
+                match_dire = False
+
+            # not match after go out
+            if i_dire[1] in [1, 2]:  # i out
+                if i_iot[0] < j_iot[1] + cam_dist:
+                    match_dire = False
+
+            if i_dire[1] in [1, 2]:
+                if i_dire[0] in [3] and i_cid > j_cid:
+                    match_dire = False
+                if i_dire[0] in [4] and i_cid < j_cid:
+                    match_dire = False
+
+            if i_cid in [41] and i_dire[1] in [4]:
+                if i_iot[0] < j_iot[1] + cam_dist:
+                    match_dire = False
+                if i_iot[1] > 199:
+                    match_dire = False
+            if i_cid in [46] and i_dire[1] in [3]:
+                if i_iot[0] < j_iot[1] + cam_dist:
+                    match_dire = False
+
+            # match after come into
+            if i_dire[0] in [1, 2]:
+                if i_iot[1] > j_iot[0] - cam_dist:
+                    match_dire = False
+
+            if i_dire[0] in [1, 2]:
+                if i_dire[1] in [3] and i_cid > j_cid:
+                    match_dire = False
+                if i_dire[1] in [4] and i_cid < j_cid:
+                    match_dire = False
+
+            is_ignore = False
+            if ((i_dire[0] == i_dire[1] and i_dire[0] in [3, 4]) or
+                (j_dire[0] == j_dire[1] and j_dire[0] in [3, 4])):
+                is_ignore = True
+
+            if not is_ignore:
+                # direction conflict
+                if (i_dire[0] in [3] and j_dire[0] in [4]) or (
+                        i_dire[1] in [3] and j_dire[1] in [4]):
+                    match_dire = False
+                # filter before going next scene
+                if i_dire[1] in [3] and i_cid < j_cid:
+                    if i_iot[1] > j_iot[1] - cam_dist:
+                        match_dire = False
+                if i_dire[1] in [4] and i_cid > j_cid:
+                    if i_iot[1] > j_iot[1] - cam_dist:
+                        match_dire = False
+
+                if i_dire[0] in [3] and i_cid < j_cid:
+                    if i_iot[0] < j_iot[0] + cam_dist:
+                        match_dire = False
+                if i_dire[0] in [4] and i_cid > j_cid:
+                    if i_iot[0] < j_iot[0] + cam_dist:
+                        match_dire = False
+                ## 3-30
+                ## 4-1
+                if i_dire[0] in [3] and i_cid > j_cid:
+                    if i_iot[1] > j_iot[0] - cam_dist:
+                        match_dire = False
+                if i_dire[0] in [4] and i_cid < j_cid:
+                    if i_iot[1] > j_iot[0] - cam_dist:
+                        match_dire = False
+                # filter before going next scene
+                ## 4-7
+                if i_dire[1] in [3] and i_cid > j_cid:
+                    if i_iot[0] < j_iot[1] + cam_dist:
+                        match_dire = False
+                if i_dire[1] in [4] and i_cid < j_cid:
+                    if i_iot[0] < j_iot[1] + cam_dist:
+                        match_dire = False
+            else:
+                if i_iot[1] > 199:
+                    if i_dire[0] in [3] and i_cid < j_cid:
+                        if i_iot[0] < j_iot[0] + cam_dist:
+                            match_dire = False
+                    if i_dire[0] in [4] and i_cid > j_cid:
+                        if i_iot[0] < j_iot[0] + cam_dist:
+                            match_dire = False
+                    if i_dire[0] in [3] and i_cid > j_cid:
+                        match_dire = False
+                    if i_dire[0] in [4] and i_cid < j_cid:
+                        match_dire = False
+                if i_iot[0] < 1:
+                    if i_dire[1] in [3] and i_cid > j_cid:
+                        match_dire = False
+                    if i_dire[1] in [4] and i_cid < j_cid:
+                        match_dire = False
+
+            if not match_dire:
+                st_mask[i, j] = 0.0
+                st_mask[j, i] = 0.0
+    return st_mask
+
+
+def subcam_list(cid_tid_dict, cid_tids):
+    sub_3_4 = dict()
+    sub_4_3 = dict()
+    for cid_tid in cid_tids:
+        cid, tid = cid_tid
+        tracklet = cid_tid_dict[cid_tid]
+        zs, ze = get_dire(tracklet['zone_list'], cid)
+        if zs in [3] and cid not in [46]:  # 4 to 3
+            if not cid + 1 in sub_4_3:
+                sub_4_3[cid + 1] = []
+            sub_4_3[cid + 1].append(cid_tid)
+        if ze in [4] and cid not in [41]:  # 4 to 3
+            if not cid in sub_4_3:
+                sub_4_3[cid] = []
+            sub_4_3[cid].append(cid_tid)
+        if zs in [4] and cid not in [41]:  # 3 to 4
+            if not cid - 1 in sub_3_4:
+                sub_3_4[cid - 1] = []
+            sub_3_4[cid - 1].append(cid_tid)
+        if ze in [3] and cid not in [46]:  # 3 to 4
+            if not cid in sub_3_4:
+                sub_3_4[cid] = []
+            sub_3_4[cid].append(cid_tid)
+    sub_cid_tids = dict()
+    for i in sub_3_4:
+        sub_cid_tids[(i, i + 1)] = sub_3_4[i]
+    for i in sub_4_3:
+        sub_cid_tids[(i, i - 1)] = sub_4_3[i]
+    return sub_cid_tids
+
+
+def subcam_list2(cid_tid_dict, cid_tids):
+    sub_dict = dict()
+    for cid_tid in cid_tids:
+        cid, tid = cid_tid
+        if cid not in [41]:
+            if not cid in sub_dict:
+                sub_dict[cid] = []
+            sub_dict[cid].append(cid_tid)
+        if cid not in [46]:
+            if not cid + 1 in sub_dict:
+                sub_dict[cid + 1] = []
+            sub_dict[cid + 1].append(cid_tid)
+    return sub_dict
+
+
+def get_sim_matrix(cid_tid_dict,
+                   cid_tids,
+                   use_ff=True,
+                   use_rerank=True,
+                   use_st_filter=False):
+    # Note: carame releated get_sim_matrix function,
+    # which is different from the one in utils.py.
+    count = len(cid_tids)
+
+    q_arr = np.array(
+        [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
+    g_arr = np.array(
+        [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
+    q_arr = normalize(q_arr, axis=1)
+    g_arr = normalize(g_arr, axis=1)
+
+    st_mask = np.ones((count, count), dtype=np.float32)
+    st_mask = intracam_ignore(st_mask, cid_tids)
+
+    # different from utils.py
+    if use_st_filter:
+        st_mask = st_filter(st_mask, cid_tids, cid_tid_dict)
+
+    visual_sim_matrix = visual_rerank(
+        q_arr, g_arr, cid_tids, use_ff=use_ff, use_rerank=use_rerank)
+    visual_sim_matrix = visual_sim_matrix.astype('float32')
+
+    np.set_printoptions(precision=3)
+    sim_matrix = visual_sim_matrix * st_mask
+
+    np.fill_diagonal(sim_matrix, 0)
+    return sim_matrix
+
+
+def get_labels_with_camera(cid_tid_dict,
+                           cid_tids,
+                           use_ff=True,
+                           use_rerank=True,
+                           use_st_filter=False):
+    # 1st cluster
+    sub_cid_tids = subcam_list(cid_tid_dict, cid_tids)
+    sub_labels = dict()
+    dis_thrs = [0.7, 0.5, 0.5, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5]
+
+    for i, sub_c_to_c in enumerate(sub_cid_tids):
+        sim_matrix = get_sim_matrix(
+            cid_tid_dict,
+            sub_cid_tids[sub_c_to_c],
+            use_ff=use_ff,
+            use_rerank=use_rerank,
+            use_st_filter=use_st_filter)
+        cluster_labels = AgglomerativeClustering(
+            n_clusters=None,
+            distance_threshold=1 - dis_thrs[i],
+            affinity='precomputed',
+            linkage='complete').fit_predict(1 - sim_matrix)
+        labels = get_match(cluster_labels)
+        cluster_cid_tids = get_cid_tid(labels, sub_cid_tids[sub_c_to_c])
+        sub_labels[sub_c_to_c] = cluster_cid_tids
+    labels, sub_cluster = combin_cluster(sub_labels, cid_tids)
+
+    # 2nd cluster
+    cid_tid_dict_new = combin_feature(cid_tid_dict, sub_cluster)
+    sub_cid_tids = subcam_list2(cid_tid_dict_new, cid_tids)
+    sub_labels = dict()
+    for i, sub_c_to_c in enumerate(sub_cid_tids):
+        sim_matrix = get_sim_matrix(
+            cid_tid_dict_new,
+            sub_cid_tids[sub_c_to_c],
+            use_ff=use_ff,
+            use_rerank=use_rerank,
+            use_st_filter=use_st_filter)
+        cluster_labels = AgglomerativeClustering(
+            n_clusters=None,
+            distance_threshold=1 - 0.1,
+            affinity='precomputed',
+            linkage='complete').fit_predict(1 - sim_matrix)
+        labels = get_match(cluster_labels)
+        cluster_cid_tids = get_cid_tid(labels, sub_cid_tids[sub_c_to_c])
+        sub_labels[sub_c_to_c] = cluster_cid_tids
+    labels, sub_cluster = combin_cluster(sub_labels, cid_tids)
+
+    return labels
--- a/deploy/pptracking/python/mot/mtmct/postprocess.py
+++ b/deploy/pptracking/python/mot/mtmct/postprocess.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
+"""
+
+import re
+import cv2
+from tqdm import tqdm
+import pickle
+import os
+import os.path as osp
+from os.path import join as opj
+import numpy as np
+import motmetrics as mm
+from functools import reduce
+
+from .utils import parse_pt_gt, parse_pt, compare_dataframes_mtmc
+from .utils import get_labels, getData, gen_new_mot
+from .camera_utils import get_labels_with_camera
+from .zone import Zone
+from ..utils import plot_tracking
+
+__all__ = [
+    'trajectory_fusion',
+    'sub_cluster',
+    'gen_res',
+    'print_mtmct_result',
+    'get_mtmct_matching_results',
+    'save_mtmct_crops',
+    'save_mtmct_vis_results',
+]
+
+
+def trajectory_fusion(mot_feature, cid, cid_bias, use_zone=False, zone_path=''):
+    cur_bias = cid_bias[cid]
+    mot_list_break = {}
+    if use_zone:
+        zones = Zone(zone_path=zone_path)
+        zones.set_cam(cid)
+        mot_list = parse_pt(mot_feature, zones)
+    else:
+        mot_list = parse_pt(mot_feature)
+
+    if use_zone:
+        mot_list = zones.break_mot(mot_list, cid)
+        mot_list = zones.filter_mot(mot_list, cid)  # filter by zone
+        mot_list = zones.filter_bbox(mot_list, cid)  # filter bbox
+
+    mot_list_break = gen_new_mot(mot_list)  # save break feature for gen result
+
+    tid_data = dict()
+    for tid in mot_list:
+        tracklet = mot_list[tid]
+        if len(tracklet) <= 1:
+            continue
+        frame_list = list(tracklet.keys())
+        frame_list.sort()
+        # filter area too large
+        zone_list = [tracklet[f]['zone'] for f in frame_list]
+        feature_list = [
+            tracklet[f]['feat'] for f in frame_list
+            if (tracklet[f]['bbox'][3] - tracklet[f]['bbox'][1]
+                ) * (tracklet[f]['bbox'][2] - tracklet[f]['bbox'][0]) > 2000
+        ]
+        if len(feature_list) < 2:
+            feature_list = [tracklet[f]['feat'] for f in frame_list]
+        io_time = [
+            cur_bias + frame_list[0] / 10., cur_bias + frame_list[-1] / 10.
+        ]
+        all_feat = np.array([feat for feat in feature_list])
+        mean_feat = np.mean(all_feat, axis=0)
+        tid_data[tid] = {
+            'cam': cid,
+            'tid': tid,
+            'mean_feat': mean_feat,
+            'zone_list': zone_list,
+            'frame_list': frame_list,
+            'tracklet': tracklet,
+            'io_time': io_time
+        }
+    return tid_data, mot_list_break
+
+
+def sub_cluster(cid_tid_dict,
+                scene_cluster,
+                use_ff=True,
+                use_rerank=True,
+                use_camera=False,
+                use_st_filter=False):
+    '''
+    cid_tid_dict: all camera_id and track_id
+    scene_cluster: like [41, 42, 43, 44, 45, 46] in AIC21 MTMCT S06 test videos
+    '''
+    assert (len(scene_cluster) != 0), "Error: scene_cluster length equals 0"
+    cid_tids = sorted(
+        [key for key in cid_tid_dict.keys() if key[0] in scene_cluster])
+    if use_camera:
+        clu = get_labels_with_camera(
+            cid_tid_dict,
+            cid_tids,
+            use_ff=use_ff,
+            use_rerank=use_rerank,
+            use_st_filter=use_st_filter)
+    else:
+        clu = get_labels(
+            cid_tid_dict,
+            cid_tids,
+            use_ff=use_ff,
+            use_rerank=use_rerank,
+            use_st_filter=use_st_filter)
+    new_clu = list()
+    for c_list in clu:
+        if len(c_list) <= 1: continue
+        cam_list = [cid_tids[c][0] for c in c_list]
+        if len(cam_list) != len(set(cam_list)): continue
+        new_clu.append([cid_tids[c] for c in c_list])
+    all_clu = new_clu
+    cid_tid_label = dict()
+    for i, c_list in enumerate(all_clu):
+        for c in c_list:
+            cid_tid_label[c] = i + 1
+    return cid_tid_label
+
+
+def gen_res(output_dir_filename,
+            scene_cluster,
+            map_tid,
+            mot_list_breaks,
+            use_roi=False,
+            roi_dir=''):
+    f_w = open(output_dir_filename, 'w')
+    for idx, mot_feature in enumerate(mot_list_breaks):
+        cid = scene_cluster[idx]
+        img_rects = parse_pt_gt(mot_feature)
+        if use_roi:
+            assert (roi_dir != ''), "Error: roi_dir is not empty!"
+            roi = cv2.imread(os.path.join(roi_dir, f'c{cid:03d}/roi.jpg'), 0)
+            height, width = roi.shape
+
+        for fid in img_rects:
+            tid_rects = img_rects[fid]
+            fid = int(fid) + 1
+            for tid_rect in tid_rects:
+                tid = tid_rect[0]
+                rect = tid_rect[1:]
+                cx = 0.5 * rect[0] + 0.5 * rect[2]
+                cy = 0.5 * rect[1] + 0.5 * rect[3]
+                w = rect[2] - rect[0]
+                w = min(w * 1.2, w + 40)
+                h = rect[3] - rect[1]
+                h = min(h * 1.2, h + 40)
+                rect[2] -= rect[0]
+                rect[3] -= rect[1]
+                rect[0] = max(0, rect[0])
+                rect[1] = max(0, rect[1])
+                x1, y1 = max(0, cx - 0.5 * w), max(0, cy - 0.5 * h)
+                if use_roi:
+                    x2, y2 = min(width, cx + 0.5 * w), min(height, cy + 0.5 * h)
+                else:
+                    x2, y2 = cx + 0.5 * w, cy + 0.5 * h
+                w, h = x2 - x1, y2 - y1
+                new_rect = list(map(int, [x1, y1, w, h]))
+                rect = list(map(int, rect))
+                if (cid, tid) in map_tid:
+                    new_tid = map_tid[(cid, tid)]
+                    f_w.write(
+                        str(cid) + ' ' + str(new_tid) + ' ' + str(fid) + ' ' +
+                        ' '.join(map(str, new_rect)) + ' -1 -1'
+                        '\n')
+    print('gen_res: write file in {}'.format(output_dir_filename))
+    f_w.close()
+
+
+def print_mtmct_result(gt_file, pred_file):
+    names = [
+        'CameraId', 'Id', 'FrameId', 'X', 'Y', 'Width', 'Height', 'Xworld',
+        'Yworld'
+    ]
+    gt = getData(gt_file, names=names)
+    pred = getData(pred_file, names=names)
+    summary = compare_dataframes_mtmc(gt, pred)
+    print('MTMCT summary: ', summary.columns.tolist())
+
+    formatters = {
+        'idf1': '{:2.2f}'.format,
+        'idp': '{:2.2f}'.format,
+        'idr': '{:2.2f}'.format,
+        'mota': '{:2.2f}'.format
+    }
+    summary = summary[['idf1', 'idp', 'idr', 'mota']]
+    summary.loc[:, 'idp'] *= 100
+    summary.loc[:, 'idr'] *= 100
+    summary.loc[:, 'idf1'] *= 100
+    summary.loc[:, 'mota'] *= 100
+    print(
+        mm.io.render_summary(
+            summary,
+            formatters=formatters,
+            namemap=mm.io.motchallenge_metric_names))
+
+
+def get_mtmct_matching_results(pred_mtmct_file, secs_interval=0.5,
+                               video_fps=20):
+    res = np.loadtxt(pred_mtmct_file)  # 'cid, tid, fid, x1, y1, w, h, -1, -1'
+    carame_ids = list(map(int, np.unique(res[:, 0])))
+
+    num_track_ids = int(np.max(res[:, 1]))
+    num_frames = int(np.max(res[:, 2]))
+
+    res = res[:, :7]
+    # each line in res: 'cid, tid, fid, x1, y1, w, h'
+
+    carame_tids = []
+    carame_results = dict()
+    for c_id in carame_ids:
+        carame_results[c_id] = res[res[:, 0] == c_id]
+        tids = np.unique(carame_results[c_id][:, 1])
+        tids = list(map(int, tids))
+        carame_tids.append(tids)
+
+    # select common tids throughout each video
+    common_tids = reduce(np.intersect1d, carame_tids)
+    if len(common_tids) == 0:
+        print(
+            'No common tracked ids in these videos, please check your MOT result or select new videos.'
+        )
+        return None
+
+    # get mtmct matching results by cid_tid_fid_results[c_id][t_id][f_id]
+    cid_tid_fid_results = dict()
+    cid_tid_to_fids = dict()
+    interval = int(secs_interval * video_fps)  # preferably less than 10
+    for c_id in carame_ids:
+        cid_tid_fid_results[c_id] = dict()
+        cid_tid_to_fids[c_id] = dict()
+        for t_id in common_tids:
+            tid_mask = carame_results[c_id][:, 1] == t_id
+            cid_tid_fid_results[c_id][t_id] = dict()
+
+            carame_trackid_results = carame_results[c_id][tid_mask]
+            fids = np.unique(carame_trackid_results[:, 2])
+            fids = fids[fids % interval == 0]
+            fids = list(map(int, fids))
+            cid_tid_to_fids[c_id][t_id] = fids
+
+            for f_id in fids:
+                st_frame = f_id
+                ed_frame = f_id + interval
+
+                st_mask = carame_trackid_results[:, 2] >= st_frame
+                ed_mask = carame_trackid_results[:, 2] < ed_frame
+                frame_mask = np.logical_and(st_mask, ed_mask)
+                cid_tid_fid_results[c_id][t_id][f_id] = carame_trackid_results[
+                    frame_mask]
+
+    return carame_results, cid_tid_fid_results
+
+
+def save_mtmct_crops(cid_tid_fid_res,
+                     images_dir,
+                     crops_dir,
+                     width=300,
+                     height=200):
+    carame_ids = cid_tid_fid_res.keys()
+    seqs_folder = os.listdir(images_dir)
+    seqs = []
+    for x in seqs_folder:
+        if os.path.isdir(os.path.join(images_dir, x)):
+            seqs.append(x)
+    assert len(seqs) == len(carame_ids)
+    seqs.sort()
+
+    if not os.path.exists(crops_dir):
+        os.makedirs(crops_dir)
+
+    common_tids = list(cid_tid_fid_res[list(carame_ids)[0]].keys())
+
+    # get crops by name 'tid_cid_fid.jpg
+    for t_id in common_tids:
+        for i, c_id in enumerate(carame_ids):
+            infer_dir = os.path.join(images_dir, seqs[i])
+            if os.path.exists(os.path.join(infer_dir, 'img1')):
+                infer_dir = os.path.join(infer_dir, 'img1')
+            all_images = os.listdir(infer_dir)
+            all_images.sort()
+
+            for f_id in cid_tid_fid_res[c_id][t_id].keys():
+                frame_idx = f_id - 1 if f_id > 0 else 0
+                im_path = os.path.join(infer_dir, all_images[frame_idx])
+                
+                im = cv2.imread(im_path)  # (H, W, 3)
+
+                track = cid_tid_fid_res[c_id][t_id][f_id][
+                    0]  # only select one track
+                cid, tid, fid, x1, y1, w, h = [int(v) for v in track]
+                clip = im[y1:(y1 + h), x1:(x1 + w)]
+                clip = cv2.resize(clip, (width, height))
+
+                cv2.imwrite(
+                    os.path.join(crops_dir,
+                                 'tid{:06d}_cid{:06d}_fid{:06d}.jpg'.format(
+                                     tid, cid, fid)), clip)
+
+            print("Finish cropping image of tracked_id {} in camera: {}".format(
+                t_id, c_id))
+
+
+def save_mtmct_vis_results(carame_results,
+                           images_dir,
+                           save_dir,
+                           save_videos=False):
+    # carame_results: 'cid, tid, fid, x1, y1, w, h'
+    carame_ids = carame_results.keys()
+    seqs_folder = os.listdir(images_dir)
+    seqs = []
+    for x in seqs_folder:
+        if os.path.isdir(os.path.join(images_dir, x)):
+            seqs.append(x)
+    assert len(seqs) == len(carame_ids)
+    seqs.sort()
+
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+
+    for i, c_id in enumerate(carame_ids):
+        print("Start visualization for camera {} of sequence {}.".format(
+            c_id, seqs[i]))
+        cid_save_dir = os.path.join(save_dir, '{}'.format(seqs[i]))
+        if not os.path.exists(cid_save_dir):
+            os.makedirs(cid_save_dir)
+
+        infer_dir = os.path.join(images_dir, seqs[i])
+        if os.path.exists(os.path.join(infer_dir, 'img1')):
+            infer_dir = os.path.join(infer_dir, 'img1')
+        all_images = os.listdir(infer_dir)
+        all_images.sort()
+
+        for f_id, im_path in enumerate(all_images):
+            img = cv2.imread(os.path.join(infer_dir, im_path))
+            tracks = carame_results[c_id][carame_results[c_id][:, 2] == f_id]
+            if tracks.shape[0] > 0:
+                tracked_ids = tracks[:, 1]
+                xywhs = tracks[:, 3:]
+                online_im = plot_tracking(
+                    img, xywhs, tracked_ids, scores=None, frame_id=f_id)
+            else:
+                online_im = img
+                print('Frame {} of seq {} has no tracking results'.format(
+                    f_id, seqs[i]))
+
+            cv2.imwrite(
+                os.path.join(cid_save_dir, '{:05d}.jpg'.format(f_id)),
+                online_im)
+            if f_id % 40 == 0:
+                print('Processing frame {}'.format(f_id))
+
+        if save_videos:
+            output_video_path = os.path.join(cid_save_dir, '..',
+                                             '{}_mtmct_vis.mp4'.format(seqs[i]))
+            cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(
+                cid_save_dir, output_video_path)
+            os.system(cmd_str)
+            print('Save camera {} video in {}.'.format(seqs[i],
+                                                       output_video_path))
--- a/deploy/pptracking/python/mot/mtmct/utils.py
+++ b/deploy/pptracking/python/mot/mtmct/utils.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
+"""
+
+import os
+import re
+import cv2
+import paddle
+import numpy as np
+from sklearn import preprocessing
+from sklearn.cluster import AgglomerativeClustering
+import gc
+import motmetrics as mm
+import pandas as pd
+from tqdm import tqdm
+import warnings
+warnings.filterwarnings("ignore")
+
+__all__ = [
+    'parse_pt', 'parse_bias', 'get_dire', 'parse_pt_gt',
+    'compare_dataframes_mtmc', 'get_sim_matrix', 'get_labels', 'getData',
+    'gen_new_mot'
+]
+
+
+def parse_pt(mot_feature, zones=None):
+    mot_list = dict()
+    for line in mot_feature:
+        fid = int(re.sub('[a-z,A-Z]', "", mot_feature[line]['frame']))
+        tid = mot_feature[line]['id']
+        bbox = list(map(lambda x: int(float(x)), mot_feature[line]['bbox']))
+        if tid not in mot_list:
+            mot_list[tid] = dict()
+        out_dict = mot_feature[line]
+        if zones is not None:
+            out_dict['zone'] = zones.get_zone(bbox)
+        else:
+            out_dict['zone'] = None
+        mot_list[tid][fid] = out_dict
+    return mot_list
+
+
+def gen_new_mot(mot_list):
+    out_dict = dict()
+    for tracklet in mot_list:
+        tracklet = mot_list[tracklet]
+        for f in tracklet:
+            out_dict[tracklet[f]['imgname']] = tracklet[f]
+    return out_dict
+
+
+def mergesetfeat1_notrk(P, neg_vector, in_feats, in_labels):
+    out_feats = []
+    for i in range(in_feats.shape[0]):
+        camera_id = in_labels[i, 1]
+        feat = in_feats[i] - neg_vector[camera_id]
+        feat = P[camera_id].dot(feat)
+        feat = feat / np.linalg.norm(feat, ord=2)
+        out_feats.append(feat)
+    out_feats = np.vstack(out_feats)
+    return out_feats
+
+
+def compute_P2(prb_feats, gal_feats, gal_labels, la=3.0):
+    X = gal_feats
+    neg_vector = {}
+    u_labels = np.unique(gal_labels[:, 1])
+    P = {}
+    for label in u_labels:
+        curX = gal_feats[gal_labels[:, 1] == label, :]
+        neg_vector[label] = np.mean(curX, axis=0)
+        P[label] = np.linalg.inv(
+            curX.T.dot(curX) + curX.shape[0] * la * np.eye(X.shape[1]))
+    return P, neg_vector
+
+
+def parse_bias(cameras_bias):
+    cid_bias = dict()
+    for cameras in cameras_bias.keys():
+        cameras_id = re.sub('[a-z,A-Z]', "", cameras)
+        cameras_id = int(cameras_id)
+        bias = cameras_bias[cameras]
+        cid_bias[cameras_id] = float(bias)
+    return cid_bias
+
+
+def get_dire(zone_list, cid):
+    zs, ze = zone_list[0], zone_list[-1]
+    return (zs, ze)
+
+
+def intracam_ignore(st_mask, cid_tids):
+    count = len(cid_tids)
+    for i in range(count):
+        for j in range(count):
+            if cid_tids[i][0] == cid_tids[j][0]:
+                st_mask[i, j] = 0.
+    return st_mask
+
+
+def mergesetfeat(in_feats, in_labels, in_tracks):
+    trackset = list(set(list(in_tracks)))
+    out_feats = []
+    out_labels = []
+    for track in trackset:
+        feat = np.mean(in_feats[in_tracks == track], axis=0)
+        feat = feat / np.linalg.norm(feat, ord=2)
+        label = in_labels[in_tracks == track][0]
+        out_feats.append(feat)
+        out_labels.append(label)
+    out_feats = np.vstack(out_feats)
+    out_labels = np.vstack(out_labels)
+    return out_feats, out_labels
+
+
+def mergesetfeat3(X, labels, gX, glabels, beta=0.08, knn=20, lr=0.5):
+    for i in range(0, X.shape[0]):
+        if i % 1000 == 0:
+            print('feat3:%d/%d' % (i, X.shape[0]))
+        knnX = gX[glabels[:, 1] != labels[i, 1], :]
+        sim = knnX.dot(X[i, :])
+        knnX = knnX[sim > 0, :]
+        sim = sim[sim > 0]
+        if len(sim) > 0:
+            idx = np.argsort(-sim)
+            if len(sim) > 2 * knn:
+                sim = sim[idx[:2 * knn]]
+                knnX = knnX[idx[:2 * knn], :]
+            else:
+                sim = sim[idx]
+                knnX = knnX[idx, :]
+                knn = min(knn, len(sim))
+            knn_pos_weight = np.exp((sim[:knn] - 1) / beta)
+            knn_neg_weight = np.ones(len(sim) - knn)
+            knn_pos_prob = knn_pos_weight / np.sum(knn_pos_weight)
+            knn_neg_prob = knn_neg_weight / np.sum(knn_neg_weight)
+            X[i, :] += lr * (knn_pos_prob.dot(knnX[:knn, :]) -
+                             knn_neg_prob.dot(knnX[knn:, :]))
+            X[i, :] /= np.linalg.norm(X[i, :])
+    return X
+
+
+def run_fic(prb_feats, gal_feats, prb_labels, gal_labels, la=3.0):
+    P, neg_vector = compute_P2(prb_feats, gal_feats, gal_labels, la)
+    prb_feats_new = mergesetfeat1_notrk(P, neg_vector, prb_feats, prb_labels)
+    gal_feats_new = mergesetfeat1_notrk(P, neg_vector, gal_feats, gal_labels)
+    return prb_feats_new, gal_feats_new
+
+
+def run_fac(prb_feats,
+            gal_feats,
+            prb_labels,
+            gal_labels,
+            beta=0.08,
+            knn=20,
+            lr=0.5,
+            prb_epoch=2,
+            gal_epoch=3):
+    gal_feats_new = gal_feats.copy()
+    for i in range(prb_epoch):
+        gal_feats_new = mergesetfeat3(gal_feats_new, gal_labels, gal_feats,
+                                      gal_labels, beta, knn, lr)
+    prb_feats_new = prb_feats.copy()
+    for i in range(gal_epoch):
+        prb_feats_new = mergesetfeat3(prb_feats_new, prb_labels, gal_feats_new,
+                                      gal_labels, beta, knn, lr)
+    return prb_feats_new, gal_feats_new
+
+
+def euclidean_distance(qf, gf):
+    m = qf.shape[0]
+    n = gf.shape[0]
+    dist_mat = 2 - 2 * paddle.matmul(qf, gf.t())
+    return dist_mat
+
+
+def batch_paddle_topk(qf, gf, k1, N=6000):
+    m = qf.shape[0]
+    n = gf.shape[0]
+    dist_mat = []
+    initial_rank = []
+    for j in range(n // N + 1):
+        temp_gf = gf[j * N:j * N + N]
+        temp_qd = []
+        for i in range(m // N + 1):
+            temp_qf = qf[i * N:i * N + N]
+            temp_d = euclidean_distance(temp_qf, temp_gf)
+            temp_qd.append(temp_d)
+        temp_qd = paddle.concat(temp_qd, axis=0)
+        temp_qd = temp_qd / (paddle.max(temp_qd, axis=0)[0])
+        temp_qd = temp_qd.t()
+        initial_rank.append(
+            paddle.topk(
+                temp_qd, k=k1, axis=1, largest=False, sorted=True)[1])
+    del temp_qd
+    del temp_gf
+    del temp_qf
+    del temp_d
+    initial_rank = paddle.concat(initial_rank, axis=0).cpu().numpy()
+    return initial_rank
+
+
+def batch_euclidean_distance(qf, gf, N=6000):
+    m = qf.shape[0]
+    n = gf.shape[0]
+    dist_mat = []
+    for j in range(n // N + 1):
+        temp_gf = gf[j * N:j * N + N]
+        temp_qd = []
+        for i in range(m // N + 1):
+            temp_qf = qf[i * N:i * N + N]
+            temp_d = euclidean_distance(temp_qf, temp_gf)
+            temp_qd.append(temp_d)
+        temp_qd = paddle.concat(temp_qd, axis=0)
+        temp_qd = temp_qd / (paddle.max(temp_qd, axis=0)[0])
+        dist_mat.append(temp_qd.t())  # transpose
+    del temp_qd
+    del temp_gf
+    del temp_qf
+    del temp_d
+    dist_mat = paddle.concat(dist_mat, axis=0)
+    return dist_mat
+
+
+def batch_v(feat, R, all_num):
+    V = np.zeros((all_num, all_num), dtype=np.float32)
+    m = feat.shape[0]
+    for i in tqdm(range(m)):
+        temp_gf = feat[i].unsqueeze(0)
+        temp_qd = euclidean_distance(temp_gf, feat)
+        temp_qd = temp_qd / (paddle.max(temp_qd))
+        temp_qd = temp_qd.squeeze()
+        temp_qd = temp_qd.numpy()[R[i].tolist()]
+        temp_qd = paddle.to_tensor(temp_qd)
+        weight = paddle.exp(-temp_qd)
+        weight = (weight / paddle.sum(weight)).numpy()
+        V[i, R[i]] = weight.astype(np.float32)
+    return V
+
+
+def k_reciprocal_neigh(initial_rank, i, k1):
+    forward_k_neigh_index = initial_rank[i, :k1 + 1]
+    backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1]
+    fi = np.where(backward_k_neigh_index == i)[0]
+    return forward_k_neigh_index[fi]
+
+
+def ReRank2(probFea, galFea, k1=20, k2=6, lambda_value=0.3):
+    # The following naming, e.g. gallery_num, is different from outer scope.
+    # Don't care about it.
+    query_num = probFea.shape[0]
+    all_num = query_num + galFea.shape[0]
+    feat = paddle.concat([probFea, galFea], axis=0)
+    initial_rank = batch_paddle_topk(feat, feat, k1 + 1, N=6000)
+    # del feat
+    del probFea
+    del galFea
+    gc.collect()  # empty memory
+    R = []
+    for i in tqdm(range(all_num)):
+        # k-reciprocal neighbors
+        k_reciprocal_index = k_reciprocal_neigh(initial_rank, i, k1)
+        k_reciprocal_expansion_index = k_reciprocal_index
+        for j in range(len(k_reciprocal_index)):
+            candidate = k_reciprocal_index[j]
+            candidate_k_reciprocal_index = k_reciprocal_neigh(
+                initial_rank, candidate, int(np.around(k1 / 2)))
+            if len(
+                    np.intersect1d(candidate_k_reciprocal_index,
+                                   k_reciprocal_index)) > 2. / 3 * len(
+                                       candidate_k_reciprocal_index):
+                k_reciprocal_expansion_index = np.append(
+                    k_reciprocal_expansion_index, candidate_k_reciprocal_index)
+        k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
+        R.append(k_reciprocal_expansion_index)
+
+    gc.collect()  # empty memory
+    V = batch_v(feat, R, all_num)
+    del R
+    gc.collect()  # empty memory
+    initial_rank = initial_rank[:, :k2]
+    ### Faster version
+    if k2 != 1:
+        V_qe = np.zeros_like(V, dtype=np.float16)
+        for i in range(all_num):
+            V_qe[i, :] = np.mean(V[initial_rank[i], :], axis=0)
+        V = V_qe
+        del V_qe
+    del initial_rank
+    gc.collect()  # empty memory
+    invIndex = []
+    for i in range(all_num):
+        invIndex.append(np.where(V[:, i] != 0)[0])
+    jaccard_dist = np.zeros((query_num, all_num), dtype=np.float32)
+    for i in tqdm(range(query_num)):
+        temp_min = np.zeros(shape=[1, all_num], dtype=np.float32)
+        indNonZero = np.where(V[i, :] != 0)[0]
+        indImages = [invIndex[ind] for ind in indNonZero]
+        for j in range(len(indNonZero)):
+            temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(
+                V[i, indNonZero[j]], V[indImages[j], indNonZero[j]])
+        jaccard_dist[i] = 1 - temp_min / (2. - temp_min)
+    del V
+    gc.collect()  # empty memory
+    original_dist = batch_euclidean_distance(feat, feat[:query_num, :]).numpy()
+    final_dist = jaccard_dist * (1 - lambda_value
+                                 ) + original_dist * lambda_value
+    del original_dist
+    del jaccard_dist
+    final_dist = final_dist[:query_num, query_num:]
+    return final_dist
+
+
+def visual_rerank(prb_feats,
+                  gal_feats,
+                  cid_tids,
+                  use_ff=False,
+                  use_rerank=False):
+    """Rerank by visual cures."""
+    gal_labels = np.array([[0, item[0]] for item in cid_tids])
+    prb_labels = gal_labels.copy()
+    if use_ff:
+        print('current use ff finetuned parameters....')
+        # Step1-1: fic. finetuned parameters: [la]
+        prb_feats, gal_feats = run_fic(prb_feats, gal_feats, prb_labels,
+                                       gal_labels, 3.0)
+        # Step1=2: fac. finetuned parameters: [beta,knn,lr,prb_epoch,gal_epoch]
+        prb_feats, gal_feats = run_fac(prb_feats, gal_feats, prb_labels,
+                                       gal_labels, 0.08, 20, 0.5, 1, 1)
+    if use_rerank:
+        paddle.enable_static()
+        print('current use rerank finetuned parameters....')
+        # Step2: k-reciprocal. finetuned parameters: [k1,k2,lambda_value]
+        sims = ReRank2(
+            paddle.to_tensor(prb_feats),
+            paddle.to_tensor(gal_feats), 20, 3, 0.3)
+    else:
+        # sims = ComputeEuclid(prb_feats, gal_feats, 1)
+        sims = 1.0 - np.dot(prb_feats, gal_feats.T)
+
+    # NOTE: sims here is actually dist, the smaller the more similar
+    return 1.0 - sims
+
+
+# sub_cluster
+def normalize(nparray, axis=0):
+    nparray = preprocessing.normalize(nparray, norm='l2', axis=axis)
+    return nparray
+
+
+def get_match(cluster_labels):
+    cluster_dict = dict()
+    cluster = list()
+    for i, l in enumerate(cluster_labels):
+        if l in list(cluster_dict.keys()):
+            cluster_dict[l].append(i)
+        else:
+            cluster_dict[l] = [i]
+    for idx in cluster_dict:
+        cluster.append(cluster_dict[idx])
+    return cluster
+
+
+def get_cid_tid(cluster_labels, cid_tids):
+    cluster = list()
+    for labels in cluster_labels:
+        cid_tid_list = list()
+        for label in labels:
+            cid_tid_list.append(cid_tids[label])
+        cluster.append(cid_tid_list)
+    return cluster
+
+
+def combin_feature(cid_tid_dict, sub_cluster):
+    for sub_ct in sub_cluster:
+        if len(sub_ct) < 2: continue
+        mean_feat = np.array([cid_tid_dict[i]['mean_feat'] for i in sub_ct])
+        for i in sub_ct:
+            cid_tid_dict[i]['mean_feat'] = mean_feat.mean(axis=0)
+    return cid_tid_dict
+
+
+def combin_cluster(sub_labels, cid_tids):
+    cluster = list()
+    for sub_c_to_c in sub_labels:
+        if len(cluster) < 1:
+            cluster = sub_labels[sub_c_to_c]
+            continue
+        for c_ts in sub_labels[sub_c_to_c]:
+            is_add = False
+            for i_c, c_set in enumerate(cluster):
+                if len(set(c_ts) & set(c_set)) > 0:
+                    new_list = list(set(c_ts) | set(c_set))
+                    cluster[i_c] = new_list
+                    is_add = True
+                    break
+            if not is_add:
+                cluster.append(c_ts)
+    labels = list()
+    num_tr = 0
+    for c_ts in cluster:
+        label_list = list()
+        for c_t in c_ts:
+            label_list.append(cid_tids.index(c_t))
+            num_tr += 1
+        label_list.sort()
+        labels.append(label_list)
+    return labels, cluster
+
+
+def parse_pt_gt(mot_feature):
+    img_rects = dict()
+    for line in mot_feature:
+        fid = int(re.sub('[a-z,A-Z]', "", mot_feature[line]['frame']))
+        tid = mot_feature[line]['id']
+        rect = list(map(lambda x: int(float(x)), mot_feature[line]['bbox']))
+        if fid not in img_rects:
+            img_rects[fid] = list()
+        rect.insert(0, tid)
+        img_rects[fid].append(rect)
+    return img_rects
+
+
+# eval result
+def compare_dataframes_mtmc(gts, ts):
+    """Compute ID-based evaluation metrics for MTMCT
+    Return:
+        df (pandas.DataFrame): Results of the evaluations in a df with only the 'idf1', 'idp', and 'idr' columns.
+    """
+    gtds = []
+    tsds = []
+    gtcams = gts['CameraId'].drop_duplicates().tolist()
+    tscams = ts['CameraId'].drop_duplicates().tolist()
+    maxFrameId = 0
+
+    for k in sorted(gtcams):
+        gtd = gts.query('CameraId == %d' % k)
+        gtd = gtd[['FrameId', 'Id', 'X', 'Y', 'Width', 'Height']]
+        # max FrameId in gtd only
+        mfid = gtd['FrameId'].max()
+        gtd['FrameId'] += maxFrameId
+        gtd = gtd.set_index(['FrameId', 'Id'])
+        gtds.append(gtd)
+
+        if k in tscams:
+            tsd = ts.query('CameraId == %d' % k)
+            tsd = tsd[['FrameId', 'Id', 'X', 'Y', 'Width', 'Height']]
+            # max FrameId among both gtd and tsd
+            mfid = max(mfid, tsd['FrameId'].max())
+            tsd['FrameId'] += maxFrameId
+            tsd = tsd.set_index(['FrameId', 'Id'])
+            tsds.append(tsd)
+
+        maxFrameId += mfid
+
+    # compute multi-camera tracking evaluation stats
+    multiCamAcc = mm.utils.compare_to_groundtruth(
+        pd.concat(gtds), pd.concat(tsds), 'iou')
+    metrics = list(mm.metrics.motchallenge_metrics)
+    metrics.extend(['num_frames', 'idfp', 'idfn', 'idtp'])
+    mh = mm.metrics.create()
+    summary = mh.compute(multiCamAcc, metrics=metrics, name='MultiCam')
+    return summary
+
+
+def get_sim_matrix(cid_tid_dict,
+                   cid_tids,
+                   use_ff=True,
+                   use_rerank=True,
+                   use_st_filter=False):
+    # Note: carame independent get_sim_matrix function,
+    # which is different from the one in camera_utils.py.
+    count = len(cid_tids)
+
+    q_arr = np.array(
+        [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
+    g_arr = np.array(
+        [cid_tid_dict[cid_tids[i]]['mean_feat'] for i in range(count)])
+    q_arr = normalize(q_arr, axis=1)
+    g_arr = normalize(g_arr, axis=1)
+
+    st_mask = np.ones((count, count), dtype=np.float32)
+    st_mask = intracam_ignore(st_mask, cid_tids)
+
+    visual_sim_matrix = visual_rerank(
+        q_arr, g_arr, cid_tids, use_ff=use_ff, use_rerank=use_rerank)
+    visual_sim_matrix = visual_sim_matrix.astype('float32')
+
+    np.set_printoptions(precision=3)
+    sim_matrix = visual_sim_matrix * st_mask
+
+    np.fill_diagonal(sim_matrix, 0)
+    return sim_matrix
+
+
+def get_labels(cid_tid_dict,
+               cid_tids,
+               use_ff=True,
+               use_rerank=True,
+               use_st_filter=False):
+    # 1st cluster
+    sub_cid_tids = list(cid_tid_dict.keys())
+    sub_labels = dict()
+    dis_thrs = [0.7, 0.5, 0.5, 0.5, 0.5, 0.7, 0.5, 0.5, 0.5, 0.5]
+
+    sim_matrix = get_sim_matrix(
+        cid_tid_dict,
+        cid_tids,
+        use_ff=use_ff,
+        use_rerank=use_rerank,
+        use_st_filter=use_st_filter)
+    cluster_labels = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=0.5,
+        affinity='precomputed',
+        linkage='complete').fit_predict(1 - sim_matrix)
+    labels = get_match(cluster_labels)
+    sub_cluster = get_cid_tid(labels, cid_tids)
+
+    # 2nd cluster
+    cid_tid_dict_new = combin_feature(cid_tid_dict, sub_cluster)
+    sub_labels = dict()
+    sim_matrix = get_sim_matrix(
+        cid_tid_dict_new,
+        cid_tids,
+        use_ff=use_ff,
+        use_rerank=use_rerank,
+        use_st_filter=use_st_filter)
+    cluster_labels = AgglomerativeClustering(
+        n_clusters=None,
+        distance_threshold=0.9,
+        affinity='precomputed',
+        linkage='complete').fit_predict(1 - sim_matrix)
+    labels = get_match(cluster_labels)
+    sub_cluster = get_cid_tid(labels, cid_tids)
+
+    return labels
+
+
+def getData(fpath, names=None, sep='\s+|\t+|,'):
+    """ Get the necessary track data from a file handle.
+    Args:
+        fpath (str) : Original path of file reading from.
+        names (list[str]): List of column names for the data.
+        sep (str): Allowed separators regular expression string.
+    Return:
+        df (pandas.DataFrame): Data frame containing the data loaded from the
+            stream with optionally assigned column names. No index is set on the data.
+    """
+    try:
+        df = pd.read_csv(
+            fpath,
+            sep=sep,
+            index_col=None,
+            skipinitialspace=True,
+            header=None,
+            names=names,
+            engine='python')
+        return df
+
+    except Exception as e:
+        raise ValueError("Could not read input from %s. Error: %s" %
+                         (fpath, repr(e)))
--- a/deploy/pptracking/python/mot/mtmct/zone.py
+++ b/deploy/pptracking/python/mot/mtmct/zone.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+https://github.com/LCFractal/AIC21-MTMC/tree/main/reid/reid-matching/tools
+"""
+
+import os
+import cv2
+import numpy as np
+from sklearn.cluster import AgglomerativeClustering
+
+BBOX_B = 10 / 15
+
+
+class Zone(object):
+    def __init__(self, zone_path='datasets/zone'):
+        # 0: b 1: g 3: r 123:w
+        # w r not high speed
+        # b g high speed
+        assert zone_path != '', "Error: zone_path is not empty!"
+        zones = {}
+        for img_name in os.listdir(zone_path):
+            camnum = int(img_name.split('.')[0][-3:])
+            zone_img = cv2.imread(os.path.join(zone_path, img_name))
+            zones[camnum] = zone_img
+        self.zones = zones
+        self.current_cam = 0
+
+    def set_cam(self, cam):
+        self.current_cam = cam
+
+    def get_zone(self, bbox):
+        cx = int((bbox[0] + bbox[2]) / 2)
+        cy = int((bbox[1] + bbox[3]) / 2)
+        pix = self.zones[self.current_cam][max(cy - 1, 0), max(cx - 1, 0), :]
+        zone_num = 0
+        if pix[0] > 50 and pix[1] > 50 and pix[2] > 50:  # w
+            zone_num = 1
+        if pix[0] < 50 and pix[1] < 50 and pix[2] > 50:  # r
+            zone_num = 2
+        if pix[0] < 50 and pix[1] > 50 and pix[2] < 50:  # g
+            zone_num = 3
+        if pix[0] > 50 and pix[1] < 50 and pix[2] < 50:  # b
+            zone_num = 4
+        return zone_num
+
+    def is_ignore(self, zone_list, frame_list, cid):
+        # 0 not in any corssroad, 1 white 2 red 3 green 4 bule
+        zs, ze = zone_list[0], zone_list[-1]
+        fs, fe = frame_list[0], frame_list[-1]
+        if zs == ze:
+            # if always on one section, excluding
+            if ze in [1, 2]:
+                return 2
+            if zs != 0 and 0 in zone_list:
+                return 0
+            if fe - fs > 1500:
+                return 2
+            if fs < 2:
+                if cid in [45]:
+                    if ze in [3, 4]:
+                        return 1
+                    else:
+                        return 2
+            if fe > 1999:
+                if cid in [41]:
+                    if ze not in [3]:
+                        return 2
+                    else:
+                        return 0
+            if fs < 2 or fe > 1999:
+                if ze in [3, 4]:
+                    return 0
+            if ze in [3, 4]:
+                return 1
+            return 2
+        else:
+            # if camera section change
+            if cid in [41, 42, 43, 44, 45, 46]:
+                # come from road extension, exclusing
+                if zs == 1 and ze == 2:
+                    return 2
+                if zs == 2 and ze == 1:
+                    return 2
+            if cid in [41]:
+                # On 41 camera, no vehicle come into 42 camera
+                if (zs in [1, 2]) and ze == 4:
+                    return 2
+                if zs == 4 and (ze in [1, 2]):
+                    return 2
+            if cid in [46]:
+                # On 46 camera，no vehicle come into 45
+                if (zs in [1, 2]) and ze == 3:
+                    return 2
+                if zs == 3 and (ze in [1, 2]):
+                    return 2
+            return 0
+
+    def filter_mot(self, mot_list, cid):
+        new_mot_list = dict()
+        sub_mot_list = dict()
+        for tracklet in mot_list:
+            tracklet_dict = mot_list[tracklet]
+            frame_list = list(tracklet_dict.keys())
+            frame_list.sort()
+            zone_list = []
+            for f in frame_list:
+                zone_list.append(tracklet_dict[f]['zone'])
+            if self.is_ignore(zone_list, frame_list, cid) == 0:
+                new_mot_list[tracklet] = tracklet_dict
+            if self.is_ignore(zone_list, frame_list, cid) == 1:
+                sub_mot_list[tracklet] = tracklet_dict
+        return new_mot_list
+
+    def filter_bbox(self, mot_list, cid):
+        new_mot_list = dict()
+        yh = self.zones[cid].shape[0]
+        for tracklet in mot_list:
+            tracklet_dict = mot_list[tracklet]
+            frame_list = list(tracklet_dict.keys())
+            frame_list.sort()
+            bbox_list = []
+            for f in frame_list:
+                bbox_list.append(tracklet_dict[f]['bbox'])
+            bbox_x = [b[0] for b in bbox_list]
+            bbox_y = [b[1] for b in bbox_list]
+            bbox_w = [b[2] - b[0] for b in bbox_list]
+            bbox_h = [b[3] - b[1] for b in bbox_list]
+            new_frame_list = list()
+            if 0 in bbox_x or 0 in bbox_y:
+                b0 = [
+                    i for i, f in enumerate(frame_list)
+                    if bbox_x[i] < 5 or bbox_y[i] + bbox_h[i] > yh - 5
+                ]
+                if len(b0) == len(frame_list):
+                    if cid in [41, 42, 44, 45, 46]:
+                        continue
+                    max_w = max(bbox_w)
+                    max_h = max(bbox_h)
+                    for i, f in enumerate(frame_list):
+                        if bbox_w[i] > max_w * BBOX_B and bbox_h[
+                                i] > max_h * BBOX_B:
+                            new_frame_list.append(f)
+                else:
+                    l_i, r_i = 0, len(frame_list) - 1
+                    if len(b0) == 0:
+                        continue
+                    if b0[0] == 0:
+                        for i in range(len(b0) - 1):
+                            if b0[i] + 1 == b0[i + 1]:
+                                l_i = b0[i + 1]
+                            else:
+                                break
+                    if b0[-1] == len(frame_list) - 1:
+                        for i in range(len(b0) - 1):
+                            i = len(b0) - 1 - i
+                            if b0[i] - 1 == b0[i - 1]:
+                                r_i = b0[i - 1]
+                            else:
+                                break
+
+                    max_lw, max_lh = bbox_w[l_i], bbox_h[l_i]
+                    max_rw, max_rh = bbox_w[r_i], bbox_h[r_i]
+                    for i, f in enumerate(frame_list):
+                        if i < l_i:
+                            if bbox_w[i] > max_lw * BBOX_B and bbox_h[
+                                    i] > max_lh * BBOX_B:
+                                new_frame_list.append(f)
+                        elif i > r_i:
+                            if bbox_w[i] > max_rw * BBOX_B and bbox_h[
+                                    i] > max_rh * BBOX_B:
+                                new_frame_list.append(f)
+                        else:
+                            new_frame_list.append(f)
+                new_tracklet_dict = dict()
+                for f in new_frame_list:
+                    new_tracklet_dict[f] = tracklet_dict[f]
+                new_mot_list[tracklet] = new_tracklet_dict
+            else:
+                new_mot_list[tracklet] = tracklet_dict
+        return new_mot_list
+
+    def break_mot(self, mot_list, cid):
+        new_mot_list = dict()
+        new_num_tracklets = max(mot_list) + 1
+        for tracklet in mot_list:
+            tracklet_dict = mot_list[tracklet]
+            frame_list = list(tracklet_dict.keys())
+            frame_list.sort()
+            zone_list = []
+            back_tracklet = False
+            new_zone_f = 0
+            pre_frame = frame_list[0]
+            time_break = False
+            for f in frame_list:
+                if f - pre_frame > 100:
+                    if cid in [44, 45]:
+                        time_break = True
+                        break
+                if not cid in [41, 44, 45, 46]:
+                    break
+                pre_frame = f
+                new_zone = tracklet_dict[f]['zone']
+                if len(zone_list) > 0 and zone_list[-1] == new_zone:
+                    continue
+                if new_zone_f > 1:
+                    if len(zone_list) > 1 and new_zone in zone_list:
+                        back_tracklet = True
+                    zone_list.append(new_zone)
+                    new_zone_f = 0
+                else:
+                    new_zone_f += 1
+            if back_tracklet:
+                new_tracklet_dict = dict()
+                pre_bbox = -1
+                pre_arrow = 0
+                have_break = False
+                for f in frame_list:
+                    now_bbox = tracklet_dict[f]['bbox']
+                    if type(pre_bbox) == int:
+                        if pre_bbox == -1:
+                            pre_bbox = now_bbox
+                    now_arrow = now_bbox[0] - pre_bbox[0]
+                    if pre_arrow * now_arrow < 0 and len(
+                            new_tracklet_dict) > 15 and not have_break:
+                        new_mot_list[tracklet] = new_tracklet_dict
+                        new_tracklet_dict = dict()
+                        have_break = True
+                    if have_break:
+                        tracklet_dict[f]['id'] = new_num_tracklets
+                    new_tracklet_dict[f] = tracklet_dict[f]
+                    pre_bbox, pre_arrow = now_bbox, now_arrow
+
+                if have_break:
+                    new_mot_list[new_num_tracklets] = new_tracklet_dict
+                    new_num_tracklets += 1
+                else:
+                    new_mot_list[tracklet] = new_tracklet_dict
+            elif time_break:
+                new_tracklet_dict = dict()
+                have_break = False
+                pre_frame = frame_list[0]
+                for f in frame_list:
+                    if f - pre_frame > 100:
+                        new_mot_list[tracklet] = new_tracklet_dict
+                        new_tracklet_dict = dict()
+                        have_break = True
+                    new_tracklet_dict[f] = tracklet_dict[f]
+                    pre_frame = f
+                if have_break:
+                    new_mot_list[new_num_tracklets] = new_tracklet_dict
+                    new_num_tracklets += 1
+                else:
+                    new_mot_list[tracklet] = new_tracklet_dict
+            else:
+                new_mot_list[tracklet] = tracklet_dict
+        return new_mot_list
+
+    def intra_matching(self, mot_list, sub_mot_list):
+        sub_zone_dict = dict()
+        new_mot_list = dict()
+        new_mot_list, new_sub_mot_list = self.do_intra_matching2(mot_list,
+                                                                 sub_mot_list)
+        return new_mot_list
+
+    def do_intra_matching2(self, mot_list, sub_list):
+        new_zone_dict = dict()
+
+        def get_trac_info(tracklet1):
+            t1_f = list(tracklet1)
+            t1_f.sort()
+            t1_fs = t1_f[0]
+            t1_fe = t1_f[-1]
+            t1_zs = tracklet1[t1_fs]['zone']
+            t1_ze = tracklet1[t1_fe]['zone']
+            t1_boxs = tracklet1[t1_fs]['bbox']
+            t1_boxe = tracklet1[t1_fe]['bbox']
+            t1_boxs = [(t1_boxs[2] + t1_boxs[0]) / 2,
+                       (t1_boxs[3] + t1_boxs[1]) / 2]
+            t1_boxe = [(t1_boxe[2] + t1_boxe[0]) / 2,
+                       (t1_boxe[3] + t1_boxe[1]) / 2]
+            return t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe
+
+        for t1id in sub_list:
+            tracklet1 = sub_list[t1id]
+            if tracklet1 == -1:
+                continue
+            t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe = get_trac_info(
+                tracklet1)
+            sim_dict = dict()
+            for t2id in mot_list:
+                tracklet2 = mot_list[t2id]
+                t2_fs, t2_fe, t2_zs, t2_ze, t2_boxs, t2_boxe = get_trac_info(
+                    tracklet2)
+                if t1_ze == t2_zs:
+                    if abs(t2_fs - t1_fe) < 5 and abs(t2_boxe[0] - t1_boxs[
+                            0]) < 50 and abs(t2_boxe[1] - t1_boxs[1]) < 50:
+                        t1_feat = tracklet1[t1_fe]['feat']
+                        t2_feat = tracklet2[t2_fs]['feat']
+                        sim_dict[t2id] = np.matmul(t1_feat, t2_feat)
+                if t1_zs == t2_ze:
+                    if abs(t2_fe - t1_fs) < 5 and abs(t2_boxs[0] - t1_boxe[
+                            0]) < 50 and abs(t2_boxs[1] - t1_boxe[1]) < 50:
+                        t1_feat = tracklet1[t1_fs]['feat']
+                        t2_feat = tracklet2[t2_fe]['feat']
+                        sim_dict[t2id] = np.matmul(t1_feat, t2_feat)
+            if len(sim_dict) > 0:
+                max_sim = 0
+                max_id = 0
+                for t2id in sim_dict:
+                    if sim_dict[t2id] > max_sim:
+                        sim_dict[t2id] = max_sim
+                        max_id = t2id
+                if max_sim > 0.5:
+                    t2 = mot_list[max_id]
+                    for t1f in tracklet1:
+                        if t1f not in t2:
+                            tracklet1[t1f]['id'] = max_id
+                            t2[t1f] = tracklet1[t1f]
+                    mot_list[max_id] = t2
+                    sub_list[t1id] = -1
+        return mot_list, sub_list
+
+    def do_intra_matching(self, sub_zone_dict, sub_zone):
+        new_zone_dict = dict()
+        id_list = list(sub_zone_dict)
+        id2index = dict()
+        for index, id in enumerate(id_list):
+            id2index[id] = index
+
+        def get_trac_info(tracklet1):
+            t1_f = list(tracklet1)
+            t1_f.sort()
+            t1_fs = t1_f[0]
+            t1_fe = t1_f[-1]
+            t1_zs = tracklet1[t1_fs]['zone']
+            t1_ze = tracklet1[t1_fe]['zone']
+            t1_boxs = tracklet1[t1_fs]['bbox']
+            t1_boxe = tracklet1[t1_fe]['bbox']
+            t1_boxs = [(t1_boxs[2] + t1_boxs[0]) / 2,
+                       (t1_boxs[3] + t1_boxs[1]) / 2]
+            t1_boxe = [(t1_boxe[2] + t1_boxe[0]) / 2,
+                       (t1_boxe[3] + t1_boxe[1]) / 2]
+            return t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe
+
+        sim_matrix = np.zeros([len(id_list), len(id_list)])
+
+        for t1id in sub_zone_dict:
+            tracklet1 = sub_zone_dict[t1id]
+            t1_fs, t1_fe, t1_zs, t1_ze, t1_boxs, t1_boxe = get_trac_info(
+                tracklet1)
+            t1_feat = tracklet1[t1_fe]['feat']
+            for t2id in sub_zone_dict:
+                if t1id == t2id:
+                    continue
+                tracklet2 = sub_zone_dict[t2id]
+                t2_fs, t2_fe, t2_zs, t2_ze, t2_boxs, t2_boxe = get_trac_info(
+                    tracklet2)
+                if t1_zs != t1_ze and t2_ze != t2_zs or t1_fe > t2_fs:
+                    continue
+                if abs(t1_boxe[0] - t2_boxs[0]) > 50 or abs(t1_boxe[1] -
+                                                            t2_boxs[1]) > 50:
+                    continue
+                if t2_fs - t1_fe > 5:
+                    continue
+                t2_feat = tracklet2[t2_fs]['feat']
+                sim_matrix[id2index[t1id], id2index[t2id]] = np.matmul(t1_feat,
+                                                                       t2_feat)
+                sim_matrix[id2index[t2id], id2index[t1id]] = np.matmul(t1_feat,
+                                                                       t2_feat)
+        sim_matrix = 1 - sim_matrix
+        cluster_labels = AgglomerativeClustering(
+            n_clusters=None,
+            distance_threshold=0.7,
+            affinity='precomputed',
+            linkage='complete').fit_predict(sim_matrix)
+        new_zone_dict = dict()
+        label2id = dict()
+        for index, label in enumerate(cluster_labels):
+            tracklet = sub_zone_dict[id_list[index]]
+            if label not in label2id:
+                new_id = tracklet[list(tracklet)[0]]
+                new_tracklet = dict()
+            else:
+                new_id = label2id[label]
+                new_tracklet = new_zone_dict[label2id[label]]
+            for tf in tracklet:
+                tracklet[tf]['id'] = new_id
+                new_tracklet[tf] = tracklet[tf]
+            new_zone_dict[label] = new_tracklet
+
+        return new_zone_dict
--- a/deploy/pptracking/python/mot/tracker/__init__.py
+++ b/deploy/pptracking/python/mot/tracker/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import base_jde_tracker
+from . import base_sde_tracker
+from . import jde_tracker
+from . import deepsort_tracker
+
+from .base_jde_tracker import *
+from .base_sde_tracker import *
+from .jde_tracker import *
+from .deepsort_tracker import *
--- a/deploy/pptracking/python/mot/tracker/base_jde_tracker.py
+++ b/deploy/pptracking/python/mot/tracker/base_jde_tracker.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
+"""
+
+import numpy as np
+from collections import defaultdict
+from collections import deque, OrderedDict
+from ..matching import jde_matching as matching
+
+__all__ = [
+    'TrackState',
+    'BaseTrack',
+    'STrack',
+    'joint_stracks',
+    'sub_stracks',
+    'remove_duplicate_stracks',
+]
+
+
+class TrackState(object):
+    New = 0
+    Tracked = 1
+    Lost = 2
+    Removed = 3
+
+
+class BaseTrack(object):
+    _count_dict = defaultdict(int)  # support single class and multi classes
+
+    track_id = 0
+    is_activated = False
+    state = TrackState.New
+
+    history = OrderedDict()
+    features = []
+    curr_feature = None
+    score = 0
+    start_frame = 0
+    frame_id = 0
+    time_since_update = 0
+
+    # multi-camera
+    location = (np.inf, np.inf)
+
+    @property
+    def end_frame(self):
+        return self.frame_id
+
+    @staticmethod
+    def next_id(cls_id):
+        BaseTrack._count_dict[cls_id] += 1
+        return BaseTrack._count_dict[cls_id]
+
+    # @even: reset track id
+    @staticmethod
+    def init_count(num_classes):
+        """
+        Initiate _count for all object classes
+        :param num_classes:
+        """
+        for cls_id in range(num_classes):
+            BaseTrack._count_dict[cls_id] = 0
+
+    @staticmethod
+    def reset_track_count(cls_id):
+        BaseTrack._count_dict[cls_id] = 0
+
+    def activate(self, *args):
+        raise NotImplementedError
+
+    def predict(self):
+        raise NotImplementedError
+
+    def update(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def mark_lost(self):
+        self.state = TrackState.Lost
+
+    def mark_removed(self):
+        self.state = TrackState.Removed
+
+
+class STrack(BaseTrack):
+    def __init__(self,
+                 tlwh,
+                 score,
+                 temp_feat,
+                 num_classes,
+                 cls_id,
+                 buff_size=30):
+        # object class id
+        self.cls_id = cls_id
+        # wait activate
+        self._tlwh = np.asarray(tlwh, dtype=np.float)
+        self.kalman_filter = None
+        self.mean, self.covariance = None, None
+        self.is_activated = False
+
+        self.score = score
+        self.track_len = 0
+
+        self.smooth_feat = None
+        self.update_features(temp_feat)
+        self.features = deque([], maxlen=buff_size)
+        self.alpha = 0.9
+
+    def update_features(self, feat):
+        # L2 normalizing
+        feat /= np.linalg.norm(feat)
+        self.curr_feat = feat
+        if self.smooth_feat is None:
+            self.smooth_feat = feat
+        else:
+            self.smooth_feat = self.alpha * self.smooth_feat + (1.0 - self.alpha
+                                                                ) * feat
+        self.features.append(feat)
+        self.smooth_feat /= np.linalg.norm(self.smooth_feat)
+
+    def predict(self):
+        mean_state = self.mean.copy()
+        if self.state != TrackState.Tracked:
+            mean_state[7] = 0
+        self.mean, self.covariance = self.kalman_filter.predict(mean_state,
+                                                                self.covariance)
+
+    @staticmethod
+    def multi_predict(tracks, kalman_filter):
+        if len(tracks) > 0:
+            multi_mean = np.asarray([track.mean.copy() for track in tracks])
+            multi_covariance = np.asarray(
+                [track.covariance for track in tracks])
+            for i, st in enumerate(tracks):
+                if st.state != TrackState.Tracked:
+                    multi_mean[i][7] = 0
+            multi_mean, multi_covariance = kalman_filter.multi_predict(
+                multi_mean, multi_covariance)
+            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
+                tracks[i].mean = mean
+                tracks[i].covariance = cov
+
+    def reset_track_id(self):
+        self.reset_track_count(self.cls_id)
+
+    def activate(self, kalman_filter, frame_id):
+        """Start a new track"""
+        self.kalman_filter = kalman_filter
+        # update track id for the object class
+        self.track_id = self.next_id(self.cls_id)
+        self.mean, self.covariance = self.kalman_filter.initiate(
+            self.tlwh_to_xyah(self._tlwh))
+
+        self.track_len = 0
+        self.state = TrackState.Tracked  # set flag 'tracked'
+
+        if frame_id == 1:  # to record the first frame's detection result
+            self.is_activated = True
+
+        self.frame_id = frame_id
+        self.start_frame = frame_id
+
+    def re_activate(self, new_track, frame_id, new_id=False):
+        self.mean, self.covariance = self.kalman_filter.update(
+            self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh))
+        self.update_features(new_track.curr_feat)
+        self.track_len = 0
+        self.state = TrackState.Tracked
+        self.is_activated = True
+        self.frame_id = frame_id
+        if new_id:  # update track id for the object class
+            self.track_id = self.next_id(self.cls_id)
+
+    def update(self, new_track, frame_id, update_feature=True):
+        self.frame_id = frame_id
+        self.track_len += 1
+
+        new_tlwh = new_track.tlwh
+        self.mean, self.covariance = self.kalman_filter.update(
+            self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
+        self.state = TrackState.Tracked  # set flag 'tracked'
+        self.is_activated = True  # set flag 'activated'
+
+        self.score = new_track.score
+        if update_feature:
+            self.update_features(new_track.curr_feat)
+
+    @property
+    def tlwh(self):
+        """Get current position in bounding box format `(top left x, top left y,
+                width, height)`.
+        """
+        if self.mean is None:
+            return self._tlwh.copy()
+
+        ret = self.mean[:4].copy()
+        ret[2] *= ret[3]
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    @property
+    def tlbr(self):
+        """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    @staticmethod
+    def tlwh_to_xyah(tlwh):
+        """Convert bounding box to format `(center x, center y, aspect ratio,
+        height)`, where the aspect ratio is `width / height`.
+        """
+        ret = np.asarray(tlwh).copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
+
+    def to_xyah(self):
+        return self.tlwh_to_xyah(self.tlwh)
+
+    @staticmethod
+    def tlbr_to_tlwh(tlbr):
+        ret = np.asarray(tlbr).copy()
+        ret[2:] -= ret[:2]
+        return ret
+
+    @staticmethod
+    def tlwh_to_tlbr(tlwh):
+        ret = np.asarray(tlwh).copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    def __repr__(self):
+        return 'OT_({}-{})_({}-{})'.format(self.cls_id, self.track_id,
+                                           self.start_frame, self.end_frame)
+
+
+def joint_stracks(tlista, tlistb):
+    exists = {}
+    res = []
+    for t in tlista:
+        exists[t.track_id] = 1
+        res.append(t)
+    for t in tlistb:
+        tid = t.track_id
+        if not exists.get(tid, 0):
+            exists[tid] = 1
+            res.append(t)
+    return res
+
+
+def sub_stracks(tlista, tlistb):
+    stracks = {}
+    for t in tlista:
+        stracks[t.track_id] = t
+    for t in tlistb:
+        tid = t.track_id
+        if stracks.get(tid, 0):
+            del stracks[tid]
+    return list(stracks.values())
+
+
+def remove_duplicate_stracks(stracksa, stracksb):
+    pdist = matching.iou_distance(stracksa, stracksb)
+    pairs = np.where(pdist < 0.15)
+    dupa, dupb = list(), list()
+    for p, q in zip(*pairs):
+        timep = stracksa[p].frame_id - stracksa[p].start_frame
+        timeq = stracksb[q].frame_id - stracksb[q].start_frame
+        if timep > timeq:
+            dupb.append(q)
+        else:
+            dupa.append(p)
+    resa = [t for i, t in enumerate(stracksa) if not i in dupa]
+    resb = [t for i, t in enumerate(stracksb) if not i in dupb]
+    return resa, resb
--- a/deploy/pptracking/python/mot/tracker/base_sde_tracker.py
+++ b/deploy/pptracking/python/mot/tracker/base_sde_tracker.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/track.py
+"""
+
+import datetime
+
+__all__ = ['TrackState', 'Track']
+
+
+class TrackState(object):
+    """
+    Enumeration type for the single target track state. Newly created tracks are
+    classified as `tentative` until enough evidence has been collected. Then,
+    the track state is changed to `confirmed`. Tracks that are no longer alive
+    are classified as `deleted` to mark them for removal from the set of active
+    tracks.
+    """
+    Tentative = 1
+    Confirmed = 2
+    Deleted = 3
+
+
+class Track(object):
+    """
+    A single target track with state space `(x, y, a, h)` and associated
+    velocities, where `(x, y)` is the center of the bounding box, `a` is the
+    aspect ratio and `h` is the height.
+
+    Args:
+        mean (ndarray): Mean vector of the initial state distribution.
+        covariance (ndarray): Covariance matrix of the initial state distribution.
+        track_id (int): A unique track identifier.
+        n_init (int): Number of consecutive detections before the track is confirmed.
+            The track state is set to `Deleted` if a miss occurs within the first
+            `n_init` frames.
+        max_age (int): The maximum number of consecutive misses before the track
+            state is set to `Deleted`.
+        cls_id (int): The category id of the tracked box.
+        score (float): The confidence score of the tracked box.
+        feature (Optional[ndarray]): Feature vector of the detection this track
+            originates from. If not None, this feature is added to the `features` cache.
+
+    Attributes:
+        hits (int): Total number of measurement updates.
+        age (int): Total number of frames since first occurance.
+        time_since_update (int): Total number of frames since last measurement
+            update.
+        state (TrackState): The current track state.
+        features (List[ndarray]): A cache of features. On each measurement update,
+            the associated feature vector is added to this list.
+    """
+
+    def __init__(self,
+                 mean,
+                 covariance,
+                 track_id,
+                 n_init,
+                 max_age,
+                 cls_id,
+                 score,
+                 feature=None):
+        self.mean = mean
+        self.covariance = covariance
+        self.track_id = track_id
+        self.hits = 1
+        self.age = 1
+        self.time_since_update = 0
+        self.cls_id = cls_id
+        self.score = score
+        self.start_time = datetime.datetime.now()
+
+        self.state = TrackState.Tentative
+        self.features = []
+        self.feat = feature
+        if feature is not None:
+            self.features.append(feature)
+
+        self._n_init = n_init
+        self._max_age = max_age
+
+    def to_tlwh(self):
+        """Get position in format `(top left x, top left y, width, height)`."""
+        ret = self.mean[:4].copy()
+        ret[2] *= ret[3]
+        ret[:2] -= ret[2:] / 2
+        return ret
+
+    def to_tlbr(self):
+        """Get position in bounding box format `(min x, miny, max x, max y)`."""
+        ret = self.to_tlwh()
+        ret[2:] = ret[:2] + ret[2:]
+        return ret
+
+    def predict(self, kalman_filter):
+        """
+        Propagate the state distribution to the current time step using a Kalman
+        filter prediction step.
+        """
+        self.mean, self.covariance = kalman_filter.predict(self.mean,
+                                                           self.covariance)
+        self.age += 1
+        self.time_since_update += 1
+
+    def update(self, kalman_filter, detection):
+        """
+        Perform Kalman filter measurement update step and update the associated
+        detection feature cache.
+        """
+        self.mean, self.covariance = kalman_filter.update(self.mean,
+                                                          self.covariance,
+                                                          detection.to_xyah())
+        self.features.append(detection.feature)
+        self.feat = detection.feature
+        self.cls_id = detection.cls_id
+        self.score = detection.score
+
+        self.hits += 1
+        self.time_since_update = 0
+        if self.state == TrackState.Tentative and self.hits >= self._n_init:
+            self.state = TrackState.Confirmed
+
+    def mark_missed(self):
+        """Mark this track as missed (no association at the current time step).
+        """
+        if self.state == TrackState.Tentative:
+            self.state = TrackState.Deleted
+        elif self.time_since_update > self._max_age:
+            self.state = TrackState.Deleted
+
+    def is_tentative(self):
+        """Returns True if this track is tentative (unconfirmed)."""
+        return self.state == TrackState.Tentative
+
+    def is_confirmed(self):
+        """Returns True if this track is confirmed."""
+        return self.state == TrackState.Confirmed
+
+    def is_deleted(self):
+        """Returns True if this track is dead and should be deleted."""
+        return self.state == TrackState.Deleted
--- a/deploy/pptracking/python/mot/tracker/deepsort_tracker.py
+++ b/deploy/pptracking/python/mot/tracker/deepsort_tracker.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/nwojke/deep_sort/blob/master/deep_sort/tracker.py
+"""
+
+import numpy as np
+
+from ..motion import KalmanFilter
+from ..matching.deepsort_matching import NearestNeighborDistanceMetric
+from ..matching.deepsort_matching import iou_cost, min_cost_matching, matching_cascade, gate_cost_matrix
+from .base_sde_tracker import Track
+from ..utils import Detection
+
+__all__ = ['DeepSORTTracker']
+
+
+class DeepSORTTracker(object):
+    """
+    DeepSORT tracker
+
+    Args:
+        input_size (list): input feature map size to reid model, [h, w] format,
+            [64, 192] as default.
+        min_box_area (int): min box area to filter out low quality boxes
+        vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
+            bad results, set 1.6 default for pedestrian tracking. If set <=0
+            means no need to filter bboxes.
+        budget (int): If not None, fix samples per class to at most this number.
+            Removes the oldest samples when the budget is reached.
+        max_age (int): maximum number of missed misses before a track is deleted
+        n_init (float): Number of frames that a track remains in initialization
+            phase. Number of consecutive detections before the track is confirmed. 
+            The track state is set to `Deleted` if a miss occurs within the first 
+            `n_init` frames.
+        metric_type (str): either "euclidean" or "cosine", the distance metric 
+            used for measurement to track association.
+        matching_threshold (float): samples with larger distance are 
+            considered an invalid match.
+        max_iou_distance (float): max iou distance threshold
+        motion (object): KalmanFilter instance
+    """
+
+    def __init__(self,
+                 input_size=[64, 192],
+                 min_box_area=0,
+                 vertical_ratio=-1,
+                 budget=100,
+                 max_age=70,
+                 n_init=3,
+                 metric_type='cosine',
+                 matching_threshold=0.2,
+                 max_iou_distance=0.9,
+                 motion='KalmanFilter'):
+        self.input_size = input_size
+        self.min_box_area = min_box_area
+        self.vertical_ratio = vertical_ratio
+        self.max_age = max_age
+        self.n_init = n_init
+        self.metric = NearestNeighborDistanceMetric(metric_type,
+                                                    matching_threshold, budget)
+        self.max_iou_distance = max_iou_distance
+        if motion == 'KalmanFilter':
+            self.motion = KalmanFilter()
+
+        self.tracks = []
+        self._next_id = 1
+
+    def predict(self):
+        """
+        Propagate track state distributions one time step forward.
+        This function should be called once every time step, before `update`.
+        """
+        for track in self.tracks:
+            track.predict(self.motion)
+
+    def update(self, pred_dets, pred_embs):
+        """
+        Perform measurement update and track management.
+        Args:
+            pred_dets (np.array): Detection results of the image, the shape is
+                [N, 6], means 'x0, y0, x1, y1, score, cls_id'.
+            pred_embs (np.array): Embedding results of the image, the shape is
+                [N, 128], usually pred_embs.shape[1] is a multiple of 128.
+        """
+        pred_tlwhs = pred_dets[:, :4]
+        pred_scores = pred_dets[:, 4:5]
+        pred_cls_ids = pred_dets[:, 5:]
+
+        detections = [
+            Detection(tlwh, score, feat, cls_id)
+            for tlwh, score, feat, cls_id in zip(pred_tlwhs, pred_scores,
+                                                 pred_embs, pred_cls_ids)
+        ]
+
+        # Run matching cascade.
+        matches, unmatched_tracks, unmatched_detections = \
+            self._match(detections)
+
+        # Update track set.
+        for track_idx, detection_idx in matches:
+            self.tracks[track_idx].update(self.motion,
+                                          detections[detection_idx])
+        for track_idx in unmatched_tracks:
+            self.tracks[track_idx].mark_missed()
+        for detection_idx in unmatched_detections:
+            self._initiate_track(detections[detection_idx])
+        self.tracks = [t for t in self.tracks if not t.is_deleted()]
+
+        # Update distance metric.
+        active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
+        features, targets = [], []
+        for track in self.tracks:
+            if not track.is_confirmed():
+                continue
+            features += track.features
+            targets += [track.track_id for _ in track.features]
+            track.features = []
+        self.metric.partial_fit(
+            np.asarray(features), np.asarray(targets), active_targets)
+        output_stracks = self.tracks
+        return output_stracks
+
+    def _match(self, detections):
+        def gated_metric(tracks, dets, track_indices, detection_indices):
+            features = np.array([dets[i].feature for i in detection_indices])
+            targets = np.array([tracks[i].track_id for i in track_indices])
+            cost_matrix = self.metric.distance(features, targets)
+            cost_matrix = gate_cost_matrix(self.motion, cost_matrix, tracks,
+                                           dets, track_indices,
+                                           detection_indices)
+            return cost_matrix
+
+        # Split track set into confirmed and unconfirmed tracks.
+        confirmed_tracks = [
+            i for i, t in enumerate(self.tracks) if t.is_confirmed()
+        ]
+        unconfirmed_tracks = [
+            i for i, t in enumerate(self.tracks) if not t.is_confirmed()
+        ]
+
+        # Associate confirmed tracks using appearance features.
+        matches_a, unmatched_tracks_a, unmatched_detections = \
+            matching_cascade(
+                gated_metric, self.metric.matching_threshold, self.max_age,
+                self.tracks, detections, confirmed_tracks)
+
+        # Associate remaining tracks together with unconfirmed tracks using IOU.
+        iou_track_candidates = unconfirmed_tracks + [
+            k for k in unmatched_tracks_a
+            if self.tracks[k].time_since_update == 1
+        ]
+        unmatched_tracks_a = [
+            k for k in unmatched_tracks_a
+            if self.tracks[k].time_since_update != 1
+        ]
+        matches_b, unmatched_tracks_b, unmatched_detections = \
+            min_cost_matching(
+                iou_cost, self.max_iou_distance, self.tracks,
+                detections, iou_track_candidates, unmatched_detections)
+
+        matches = matches_a + matches_b
+        unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
+        return matches, unmatched_tracks, unmatched_detections
+
+    def _initiate_track(self, detection):
+        mean, covariance = self.motion.initiate(detection.to_xyah())
+        self.tracks.append(
+            Track(mean, covariance, self._next_id, self.n_init, self.max_age,
+                  detection.cls_id, detection.score, detection.feature))
+        self._next_id += 1
--- a/deploy/pptracking/python/mot/tracker/jde_tracker.py
+++ b/deploy/pptracking/python/mot/tracker/jde_tracker.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
+"""
+
+import numpy as np
+from collections import defaultdict
+
+from ..matching import jde_matching as matching
+from ..motion import KalmanFilter
+from .base_jde_tracker import TrackState, STrack
+from .base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks
+
+__all__ = ['JDETracker']
+
+
+class JDETracker(object):
+    __shared__ = ['num_classes']
+    """
+    JDE tracker, support single class and multi classes
+
+    Args:
+        num_classes (int): the number of classes
+        det_thresh (float): threshold of detection score
+        track_buffer (int): buffer for tracker
+        min_box_area (int): min box area to filter out low quality boxes
+        vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
+            bad results. If set <0 means no need to filter bboxes，usually set
+            1.6 for pedestrian tracking.
+        tracked_thresh (float): linear assignment threshold of tracked 
+            stracks and detections
+        r_tracked_thresh (float): linear assignment threshold of 
+            tracked stracks and unmatched detections
+        unconfirmed_thresh (float): linear assignment threshold of 
+            unconfirmed stracks and unmatched detections
+        motion (str): motion model, KalmanFilter as default
+        conf_thres (float): confidence threshold for tracking
+        metric_type (str): either "euclidean" or "cosine", the distance metric 
+            used for measurement to track association.
+    """
+
+    def __init__(self,
+                 num_classes=1,
+                 det_thresh=0.3,
+                 track_buffer=30,
+                 min_box_area=200,
+                 vertical_ratio=1.6,
+                 tracked_thresh=0.7,
+                 r_tracked_thresh=0.5,
+                 unconfirmed_thresh=0.7,
+                 motion='KalmanFilter',
+                 conf_thres=0,
+                 metric_type='euclidean'):
+        self.num_classes = num_classes
+        self.det_thresh = det_thresh
+        self.track_buffer = track_buffer
+        self.min_box_area = min_box_area
+        self.vertical_ratio = vertical_ratio
+
+        self.tracked_thresh = tracked_thresh
+        self.r_tracked_thresh = r_tracked_thresh
+        self.unconfirmed_thresh = unconfirmed_thresh
+        if motion == 'KalmanFilter':
+            self.motion = KalmanFilter()
+        self.conf_thres = conf_thres
+        self.metric_type = metric_type
+
+        self.frame_id = 0
+        self.tracked_tracks_dict = defaultdict(list)  # dict(list[STrack])
+        self.lost_tracks_dict = defaultdict(list)  # dict(list[STrack])
+        self.removed_tracks_dict = defaultdict(list)  # dict(list[STrack])
+
+        self.max_time_lost = 0
+        # max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer)
+
+    def update(self, pred_dets, pred_embs):
+        """
+        Processes the image frame and finds bounding box(detections).
+        Associates the detection with corresponding tracklets and also handles
+            lost, removed, refound and active tracklets.
+
+        Args:
+            pred_dets (np.array): Detection results of the image, the shape is
+                [N, 6], means 'x0, y0, x1, y1, score, cls_id'.
+            pred_embs (np.array): Embedding results of the image, the shape is
+                [N, 128] or [N, 512].
+
+        Return:
+            output_stracks_dict (dict(list)): The list contains information
+                regarding the online_tracklets for the recieved image tensor.
+        """
+        self.frame_id += 1
+        if self.frame_id == 1:
+            STrack.init_count(self.num_classes)
+        activated_tracks_dict = defaultdict(list)
+        refined_tracks_dict = defaultdict(list)
+        lost_tracks_dict = defaultdict(list)
+        removed_tracks_dict = defaultdict(list)
+        output_tracks_dict = defaultdict(list)
+
+        pred_dets_dict = defaultdict(list)
+        pred_embs_dict = defaultdict(list)
+
+        # unify single and multi classes detection and embedding results
+        for cls_id in range(self.num_classes):
+            cls_idx = (pred_dets[:, 5:] == cls_id).squeeze(-1)
+            pred_dets_dict[cls_id] = pred_dets[cls_idx]
+            pred_embs_dict[cls_id] = pred_embs[cls_idx]
+
+        for cls_id in range(self.num_classes):
+            """ Step 1: Get detections by class"""
+            pred_dets_cls = pred_dets_dict[cls_id]
+            pred_embs_cls = pred_embs_dict[cls_id]
+            remain_inds = (pred_dets_cls[:, 4:5] > self.conf_thres).squeeze(-1)
+            if remain_inds.sum() > 0:
+                pred_dets_cls = pred_dets_cls[remain_inds]
+                pred_embs_cls = pred_embs_cls[remain_inds]
+                detections = [
+                    STrack(
+                        STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f,
+                        self.num_classes, cls_id, 30)
+                    for (tlbrs, f) in zip(pred_dets_cls, pred_embs_cls)
+                ]
+            else:
+                detections = []
+            ''' Add newly detected tracklets to tracked_stracks'''
+            unconfirmed_dict = defaultdict(list)
+            tracked_tracks_dict = defaultdict(list)
+            for track in self.tracked_tracks_dict[cls_id]:
+                if not track.is_activated:
+                    # previous tracks which are not active in the current frame are added in unconfirmed list
+                    unconfirmed_dict[cls_id].append(track)
+                else:
+                    # Active tracks are added to the local list 'tracked_stracks'
+                    tracked_tracks_dict[cls_id].append(track)
+            """ Step 2: First association, with embedding"""
+            # building tracking pool for the current frame
+            track_pool_dict = defaultdict(list)
+            track_pool_dict[cls_id] = joint_stracks(
+                tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id])
+
+            # Predict the current location with KalmanFilter
+            STrack.multi_predict(track_pool_dict[cls_id], self.motion)
+
+            dists = matching.embedding_distance(
+                track_pool_dict[cls_id], detections, metric=self.metric_type)
+            dists = matching.fuse_motion(self.motion, dists,
+                                         track_pool_dict[cls_id], detections)
+            matches, u_track, u_detection = matching.linear_assignment(
+                dists, thresh=self.tracked_thresh)
+
+            for i_tracked, idet in matches:
+                # i_tracked is the id of the track and idet is the detection
+                track = track_pool_dict[cls_id][i_tracked]
+                det = detections[idet]
+                if track.state == TrackState.Tracked:
+                    # If the track is active, add the detection to the track
+                    track.update(detections[idet], self.frame_id)
+                    activated_tracks_dict[cls_id].append(track)
+                else:
+                    # We have obtained a detection from a track which is not active,
+                    # hence put the track in refind_stracks list
+                    track.re_activate(det, self.frame_id, new_id=False)
+                    refined_tracks_dict[cls_id].append(track)
+
+            # None of the steps below happen if there are no undetected tracks.
+            """ Step 3: Second association, with IOU"""
+            detections = [detections[i] for i in u_detection]
+            r_tracked_stracks = []
+            for i in u_track:
+                if track_pool_dict[cls_id][i].state == TrackState.Tracked:
+                    r_tracked_stracks.append(track_pool_dict[cls_id][i])
+
+            dists = matching.iou_distance(r_tracked_stracks, detections)
+            matches, u_track, u_detection = matching.linear_assignment(
+                dists, thresh=self.r_tracked_thresh)
+
+            for i_tracked, idet in matches:
+                track = r_tracked_stracks[i_tracked]
+                det = detections[idet]
+                if track.state == TrackState.Tracked:
+                    track.update(det, self.frame_id)
+                    activated_tracks_dict[cls_id].append(track)
+                else:
+                    track.re_activate(det, self.frame_id, new_id=False)
+                    refined_tracks_dict[cls_id].append(track)
+
+            for it in u_track:
+                track = r_tracked_stracks[it]
+                if not track.state == TrackState.Lost:
+                    track.mark_lost()
+                    lost_tracks_dict[cls_id].append(track)
+            '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
+            detections = [detections[i] for i in u_detection]
+            dists = matching.iou_distance(unconfirmed_dict[cls_id], detections)
+            matches, u_unconfirmed, u_detection = matching.linear_assignment(
+                dists, thresh=self.unconfirmed_thresh)
+            for i_tracked, idet in matches:
+                unconfirmed_dict[cls_id][i_tracked].update(detections[idet],
+                                                           self.frame_id)
+                activated_tracks_dict[cls_id].append(unconfirmed_dict[cls_id][
+                    i_tracked])
+            for it in u_unconfirmed:
+                track = unconfirmed_dict[cls_id][it]
+                track.mark_removed()
+                removed_tracks_dict[cls_id].append(track)
+            """ Step 4: Init new stracks"""
+            for inew in u_detection:
+                track = detections[inew]
+                if track.score < self.det_thresh:
+                    continue
+                track.activate(self.motion, self.frame_id)
+                activated_tracks_dict[cls_id].append(track)
+            """ Step 5: Update state"""
+            for track in self.lost_tracks_dict[cls_id]:
+                if self.frame_id - track.end_frame > self.max_time_lost:
+                    track.mark_removed()
+                    removed_tracks_dict[cls_id].append(track)
+
+            self.tracked_tracks_dict[cls_id] = [
+                t for t in self.tracked_tracks_dict[cls_id]
+                if t.state == TrackState.Tracked
+            ]
+            self.tracked_tracks_dict[cls_id] = joint_stracks(
+                self.tracked_tracks_dict[cls_id], activated_tracks_dict[cls_id])
+            self.tracked_tracks_dict[cls_id] = joint_stracks(
+                self.tracked_tracks_dict[cls_id], refined_tracks_dict[cls_id])
+            self.lost_tracks_dict[cls_id] = sub_stracks(
+                self.lost_tracks_dict[cls_id], self.tracked_tracks_dict[cls_id])
+            self.lost_tracks_dict[cls_id].extend(lost_tracks_dict[cls_id])
+            self.lost_tracks_dict[cls_id] = sub_stracks(
+                self.lost_tracks_dict[cls_id], self.removed_tracks_dict[cls_id])
+            self.removed_tracks_dict[cls_id].extend(removed_tracks_dict[cls_id])
+            self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[
+                cls_id] = remove_duplicate_stracks(
+                    self.tracked_tracks_dict[cls_id],
+                    self.lost_tracks_dict[cls_id])
+
+            # get scores of lost tracks
+            output_tracks_dict[cls_id] = [
+                track for track in self.tracked_tracks_dict[cls_id]
+                if track.is_activated
+            ]
+
+        return output_tracks_dict
--- a/deploy/pptracking/python/mot/utils.py
+++ b/deploy/pptracking/python/mot/utils.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import cv2
+import time
+import numpy as np
+import collections
+
+__all__ = [
+    'MOTTimer', 'Detection', 'write_mot_results', 'load_det_results',
+    'preprocess_reid', 'get_crops', 'clip_box', 'scale_coords', 'flow_statistic',
+    'plot_tracking'
+]
+
+
+class MOTTimer(object):
+    """
+    This class used to compute and print the current FPS while evaling.
+    """
+
+    def __init__(self, window_size=20):
+        self.start_time = 0.
+        self.diff = 0.
+        self.duration = 0.
+        self.deque = collections.deque(maxlen=window_size)
+
+    def tic(self):
+        # using time.time instead of time.clock because time time.clock
+        # does not normalize for multithreading
+        self.start_time = time.time()
+
+    def toc(self, average=True):
+        self.diff = time.time() - self.start_time
+        self.deque.append(self.diff)
+        if average:
+            self.duration = np.mean(self.deque)
+        else:
+            self.duration = np.sum(self.deque)
+        return self.duration
+
+    def clear(self):
+        self.start_time = 0.
+        self.diff = 0.
+        self.duration = 0.
+
+
+class Detection(object):
+    """
+    This class represents a bounding box detection in a single image.
+
+    Args:
+        tlwh (Tensor): Bounding box in format `(top left x, top left y,
+            width, height)`.
+        score (Tensor): Bounding box confidence score.
+        feature (Tensor): A feature vector that describes the object 
+            contained in this image.
+        cls_id (Tensor): Bounding box category id.
+    """
+
+    def __init__(self, tlwh, score, feature, cls_id):
+        self.tlwh = np.asarray(tlwh, dtype=np.float32)
+        self.score = float(score)
+        self.feature = np.asarray(feature, dtype=np.float32)
+        self.cls_id = int(cls_id)
+
+    def to_tlbr(self):
+        """
+        Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
+        `(top left, bottom right)`.
+        """
+        ret = self.tlwh.copy()
+        ret[2:] += ret[:2]
+        return ret
+
+    def to_xyah(self):
+        """
+        Convert bounding box to format `(center x, center y, aspect ratio,
+        height)`, where the aspect ratio is `width / height`.
+        """
+        ret = self.tlwh.copy()
+        ret[:2] += ret[2:] / 2
+        ret[2] /= ret[3]
+        return ret
+
+
+def write_mot_results(filename, results, data_type='mot', num_classes=1):
+    # support single and multi classes
+    if data_type in ['mot', 'mcmot']:
+        save_format = '{frame},{id},{x1},{y1},{w},{h},{score},{cls_id},-1,-1\n'
+    elif data_type == 'kitti':
+        save_format = '{frame} {id} car 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
+    else:
+        raise ValueError(data_type)
+
+    f = open(filename, 'w')
+    for cls_id in range(num_classes):
+        for frame_id, tlwhs, tscores, track_ids in results[cls_id]:
+            if data_type == 'kitti':
+                frame_id -= 1
+            for tlwh, score, track_id in zip(tlwhs, tscores, track_ids):
+                if track_id < 0: continue
+                if data_type == 'mot':
+                    cls_id = -1
+
+                x1, y1, w, h = tlwh
+                x2, y2 = x1 + w, y1 + h
+                line = save_format.format(
+                    frame=frame_id,
+                    id=track_id,
+                    x1=x1,
+                    y1=y1,
+                    x2=x2,
+                    y2=y2,
+                    w=w,
+                    h=h,
+                    score=score,
+                    cls_id=cls_id)
+                f.write(line)
+    print('MOT results save in {}'.format(filename))
+
+
+def load_det_results(det_file, num_frames):
+    assert os.path.exists(det_file) and os.path.isfile(det_file), \
+        '{} is not exist or not a file.'.format(det_file)
+    labels = np.loadtxt(det_file, dtype='float32', delimiter=',')
+    assert labels.shape[1] == 7, \
+        "Each line of {} should have 7 items: '[frame_id],[x0],[y0],[w],[h],[score],[class_id]'.".format(det_file)
+    results_list = []
+    for frame_i in range(num_frames):
+        results = {'bbox': [], 'score': [], 'cls_id': []}
+        lables_with_frame = labels[labels[:, 0] == frame_i + 1]
+        # each line of lables_with_frame:
+        # [frame_id],[x0],[y0],[w],[h],[score],[class_id]
+        for l in lables_with_frame:
+            results['bbox'].append(l[1:5])
+            results['score'].append(l[5:6])
+            results['cls_id'].append(l[6:7])
+        results_list.append(results)
+    return results_list
+
+
+def scale_coords(coords, input_shape, im_shape, scale_factor):
+    # Note: ratio has only one value, scale_factor[0] == scale_factor[1]
+    # 
+    # This function only used for JDE YOLOv3 or other detectors with 
+    # LetterBoxResize and JDEBBoxPostProcess, coords output from detector had
+    # not scaled back to the origin image.
+
+    ratio = scale_factor[0]
+    pad_w = (input_shape[1] - int(im_shape[1])) / 2
+    pad_h = (input_shape[0] - int(im_shape[0])) / 2
+    coords[:, 0::2] -= pad_w
+    coords[:, 1::2] -= pad_h
+    coords[:, 0:4] /= ratio
+    coords[:, :4] = np.clip(coords[:, :4], a_min=0, a_max=coords[:, :4].max())
+    return coords.round()
+
+
+def clip_box(xyxy, ori_image_shape):
+    H, W = ori_image_shape
+    xyxy[:, 0::2] = np.clip(xyxy[:, 0::2], a_min=0, a_max=W)
+    xyxy[:, 1::2] = np.clip(xyxy[:, 1::2], a_min=0, a_max=H)
+    w = xyxy[:, 2:3] - xyxy[:, 0:1]
+    h = xyxy[:, 3:4] - xyxy[:, 1:2]
+    mask = np.logical_and(h > 0, w > 0)
+    keep_idx = np.nonzero(mask)
+    return xyxy[keep_idx[0]], keep_idx
+
+
+def get_crops(xyxy, ori_img, w, h):
+    crops = []
+    xyxy = xyxy.astype(np.int64)
+    ori_img = ori_img.numpy()
+    ori_img = np.squeeze(ori_img, axis=0).transpose(1, 0, 2)  # [h,w,3]->[w,h,3]
+    for i, bbox in enumerate(xyxy):
+        crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :]
+        crops.append(crop)
+    crops = preprocess_reid(crops, w, h)
+    return crops
+
+
+def preprocess_reid(imgs,
+                    w=64,
+                    h=192,
+                    mean=[0.485, 0.456, 0.406],
+                    std=[0.229, 0.224, 0.225]):
+    im_batch = []
+    for img in imgs:
+        img = cv2.resize(img, (w, h))
+        img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255
+        img_mean = np.array(mean).reshape((3, 1, 1))
+        img_std = np.array(std).reshape((3, 1, 1))
+        img -= img_mean
+        img /= img_std
+        img = np.expand_dims(img, axis=0)
+        im_batch.append(img)
+    im_batch = np.concatenate(im_batch, 0)
+    return im_batch
+
+
+def flow_statistic(result,
+                   secs_interval,
+                   do_entrance_counting,
+                   video_fps,
+                   entrance,
+                   id_set,
+                   interval_id_set,
+                   in_id_list,
+                   out_id_list,
+                   prev_center,
+                   records,
+                   data_type='mot',
+                   num_classes=1):
+    # Count in and out number: 
+    # Use horizontal center line as the entrance just for simplification.
+    # If a person located in the above the horizontal center line 
+    # at the previous frame and is in the below the line at the current frame,
+    # the in number is increased by one.
+    # If a person was in the below the horizontal center line 
+    # at the previous frame and locates in the below the line at the current frame,
+    # the out number is increased by one.
+    # TODO: if the entrance is not the horizontal center line,
+    # the counting method should be optimized.
+    if do_entrance_counting:
+        entrance_y = entrance[1]  # xmin, ymin, xmax, ymax
+        frame_id, tlwhs, tscores, track_ids = result
+        for tlwh, score, track_id in zip(tlwhs, tscores, track_ids):
+            if track_id < 0: continue
+            if data_type == 'kitti':
+                frame_id -= 1
+
+            x1, y1, w, h = tlwh
+            center_x = x1 + w / 2.
+            center_y = y1 + h / 2.
+            if track_id in prev_center:
+                if prev_center[track_id][1] <= entrance_y and \
+                   center_y > entrance_y:
+                    in_id_list.append(track_id)
+                if prev_center[track_id][1] >= entrance_y and \
+                   center_y < entrance_y:
+                    out_id_list.append(track_id)
+                prev_center[track_id][0] = center_x
+                prev_center[track_id][1] = center_y
+            else:
+                prev_center[track_id] = [center_x, center_y]
+    # Count totol number, number at a manual-setting interval
+    frame_id, tlwhs, tscores, track_ids = result
+    for tlwh, score, track_id in zip(tlwhs, tscores, track_ids):
+        if track_id < 0: continue
+        id_set.add(track_id)
+        interval_id_set.add(track_id)
+
+    # Reset counting at the interval beginning
+    if frame_id % video_fps == 0 and frame_id / video_fps % secs_interval == 0:
+        curr_interval_count = len(interval_id_set)
+        interval_id_set.clear()
+    info = "Frame id: {}, Total count: {}".format(frame_id, len(id_set))
+    if do_entrance_counting:
+        info += ", In count: {}, Out count: {}".format(
+            len(in_id_list), len(out_id_list))
+    if frame_id % video_fps == 0 and frame_id / video_fps % secs_interval == 0:
+        info += ", Count during {} secs: {}".format(secs_interval,
+                                                    curr_interval_count)
+        interval_id_set.clear()
+    print(info)
+    info += "\n"
+    records.append(info)
+
+    return {
+        "id_set": id_set,
+        "interval_id_set": interval_id_set,
+        "in_id_list": in_id_list,
+        "out_id_list": out_id_list,
+        "prev_center": prev_center,
+        "records": records
+    }
+
+
+def get_color(idx):
+    idx = idx * 3
+    color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
+    return color
+
+
+def plot_tracking(image,
+                  tlwhs,
+                  obj_ids,
+                  scores=None,
+                  frame_id=0,
+                  fps=0.,
+                  ids2names=[],
+                  do_entrance_counting=False,
+                  entrance=None):
+    im = np.ascontiguousarray(np.copy(image))
+    im_h, im_w = im.shape[:2]
+
+    text_scale = max(1, image.shape[1] / 1600.)
+    text_thickness = 2
+    line_thickness = max(1, int(image.shape[1] / 500.))
+
+    if fps > 0:
+        _line = 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs))
+    else:
+        _line = 'frame: %d num: %d' % (frame_id, len(tlwhs))
+    cv2.putText(
+        im,
+        _line,
+        (0, int(15 * text_scale)),
+        cv2.FONT_HERSHEY_PLAIN,
+        text_scale, (0, 0, 255),
+        thickness=2)
+
+    for i, tlwh in enumerate(tlwhs):
+        x1, y1, w, h = tlwh
+        intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
+        obj_id = int(obj_ids[i])
+        id_text = '{}'.format(int(obj_id))
+        if ids2names != []:
+            assert len(
+                ids2names) == 1, "plot_tracking only supports single classes."
+            id_text = '{}_'.format(ids2names[0]) + id_text
+        _line_thickness = 1 if obj_id <= 0 else line_thickness
+        color = get_color(abs(obj_id))
+        cv2.rectangle(
+            im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
+        cv2.putText(
+            im,
+            id_text, (intbox[0], intbox[1] - 10),
+            cv2.FONT_HERSHEY_PLAIN,
+            text_scale, (0, 0, 255),
+            thickness=text_thickness)
+
+        if scores is not None:
+            text = '{:.2f}'.format(float(scores[i]))
+            cv2.putText(
+                im,
+                text, (intbox[0], intbox[1] + 10),
+                cv2.FONT_HERSHEY_PLAIN,
+                text_scale, (0, 255, 255),
+                thickness=text_thickness)
+
+    if do_entrance_counting:
+        entrance_line = tuple(map(int, entrance))
+        cv2.rectangle(
+            im,
+            entrance_line[0:2],
+            entrance_line[2:4],
+            color=(0, 255, 255),
+            thickness=line_thickness)
+    return im
--- a/deploy/pptracking/python/mot_jde_infer.py
+++ b/deploy/pptracking/python/mot_jde_infer.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import time
+import yaml
+import cv2
+import numpy as np
+from collections import defaultdict
+
+import paddle
+from paddle.inference import Config
+from paddle.inference import create_predictor
+
+from utils import argsparser, Timer, get_current_memory_mb
+from det_infer import Detector, get_test_images, print_arguments, PredictConfig
+from benchmark_utils import PaddleInferBenchmark
+from visualize import plot_tracking_dict
+
+from mot.tracker import JDETracker
+from mot.utils import MOTTimer, write_mot_results, flow_statistic
+
+# Global dictionary
+MOT_SUPPORT_MODELS = {
+    'JDE',
+    'FairMOT',
+}
+
+
+class JDE_Detector(Detector):
+    """
+    Args:
+        pred_config (object): config of model, defined by `Config(model_dir)`
+        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
+        batch_size (int): size of pre batch in inference
+        trt_min_shape (int): min shape for dynamic shape in trt
+        trt_max_shape (int): max shape for dynamic shape in trt
+        trt_opt_shape (int): opt shape for dynamic shape in trt
+        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
+            calibration, trt_calib_mode need to set True
+        cpu_threads (int): cpu threads
+        enable_mkldnn (bool): whether to open MKLDNN 
+    """
+
+    def __init__(self,
+                 pred_config,
+                 model_dir,
+                 device='CPU',
+                 run_mode='fluid',
+                 batch_size=1,
+                 trt_min_shape=1,
+                 trt_max_shape=1088,
+                 trt_opt_shape=608,
+                 trt_calib_mode=False,
+                 cpu_threads=1,
+                 enable_mkldnn=False):
+        super(JDE_Detector, self).__init__(
+            pred_config=pred_config,
+            model_dir=model_dir,
+            device=device,
+            run_mode=run_mode,
+            batch_size=batch_size,
+            trt_min_shape=trt_min_shape,
+            trt_max_shape=trt_max_shape,
+            trt_opt_shape=trt_opt_shape,
+            trt_calib_mode=trt_calib_mode,
+            cpu_threads=cpu_threads,
+            enable_mkldnn=enable_mkldnn)
+        assert batch_size == 1, "The JDE Detector only supports batch size=1 now"
+        assert pred_config.tracker, "Tracking model should have tracker"
+        self.num_classes = len(pred_config.labels)
+
+        tp = pred_config.tracker
+        min_box_area = tp['min_box_area'] if 'min_box_area' in tp else 200
+        vertical_ratio = tp['vertical_ratio'] if 'vertical_ratio' in tp else 1.6
+        conf_thres = tp['conf_thres'] if 'conf_thres' in tp else 0.
+        tracked_thresh = tp['tracked_thresh'] if 'tracked_thresh' in tp else 0.7
+        metric_type = tp['metric_type'] if 'metric_type' in tp else 'euclidean'
+
+        self.tracker = JDETracker(
+            num_classes=self.num_classes,
+            min_box_area=min_box_area,
+            vertical_ratio=vertical_ratio,
+            conf_thres=conf_thres,
+            tracked_thresh=tracked_thresh,
+            metric_type=metric_type)
+
+    def postprocess(self, pred_dets, pred_embs, threshold):
+        online_targets_dict = self.tracker.update(pred_dets, pred_embs)
+
+        online_tlwhs = defaultdict(list)
+        online_scores = defaultdict(list)
+        online_ids = defaultdict(list)
+        for cls_id in range(self.num_classes):
+            online_targets = online_targets_dict[cls_id]
+            for t in online_targets:
+                tlwh = t.tlwh
+                tid = t.track_id
+                tscore = t.score
+                if tscore < threshold: continue
+                if tlwh[2] * tlwh[3] <= self.tracker.min_box_area: continue
+                if self.tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
+                        3] > self.tracker.vertical_ratio:
+                    continue
+                online_tlwhs[cls_id].append(tlwh)
+                online_ids[cls_id].append(tid)
+                online_scores[cls_id].append(tscore)
+        return online_tlwhs, online_scores, online_ids
+
+    def predict(self, image_list, threshold=0.5, warmup=0, repeats=1):
+        '''
+        Args:
+            image_list (list): list of image
+            threshold (float): threshold of predicted box' score
+        Returns:
+            online_tlwhs, online_scores, online_ids (dict[np.array])
+        '''
+        self.det_times.preprocess_time_s.start()
+        inputs = self.preprocess(image_list)
+        self.det_times.preprocess_time_s.end()
+
+        pred_dets, pred_embs = None, None
+        input_names = self.predictor.get_input_names()
+        for i in range(len(input_names)):
+            input_tensor = self.predictor.get_input_handle(input_names[i])
+            input_tensor.copy_from_cpu(inputs[input_names[i]])
+
+        for i in range(warmup):
+            self.predictor.run()
+            output_names = self.predictor.get_output_names()
+            boxes_tensor = self.predictor.get_output_handle(output_names[0])
+            pred_dets = boxes_tensor.copy_to_cpu()
+
+        self.det_times.inference_time_s.start()
+        for i in range(repeats):
+            self.predictor.run()
+            output_names = self.predictor.get_output_names()
+            boxes_tensor = self.predictor.get_output_handle(output_names[0])
+            pred_dets = boxes_tensor.copy_to_cpu()
+            embs_tensor = self.predictor.get_output_handle(output_names[1])
+            pred_embs = embs_tensor.copy_to_cpu()
+        self.det_times.inference_time_s.end(repeats=repeats)
+
+        self.det_times.postprocess_time_s.start()
+        online_tlwhs, online_scores, online_ids = self.postprocess(
+            pred_dets, pred_embs, threshold)
+        self.det_times.postprocess_time_s.end()
+        self.det_times.img_num += 1
+        return online_tlwhs, online_scores, online_ids
+
+
+def predict_image(detector, image_list):
+    results = []
+    num_classes = detector.num_classes
+    data_type = 'mcmot' if num_classes > 1 else 'mot'
+    ids2names = detector.pred_config.labels
+
+    image_list.sort()
+    for frame_id, img_file in enumerate(image_list):
+        frame = cv2.imread(img_file)
+        if FLAGS.run_benchmark:
+            detector.predict([frame], FLAGS.threshold, warmup=10, repeats=10)
+            cm, gm, gu = get_current_memory_mb()
+            detector.cpu_mem += cm
+            detector.gpu_mem += gm
+            detector.gpu_util += gu
+            print('Test iter {}, file name:{}'.format(frame_id, img_file))
+        else:
+            online_tlwhs, online_scores, online_ids = detector.predict(
+                [frame], FLAGS.threshold)
+            online_im = plot_tracking_dict(frame, num_classes, online_tlwhs,
+                                           online_ids, online_scores, frame_id,
+                                           ids2names)
+            if FLAGS.save_images:
+                if not os.path.exists(FLAGS.output_dir):
+                    os.makedirs(FLAGS.output_dir)
+                img_name = os.path.split(img_file)[-1]
+                out_path = os.path.join(FLAGS.output_dir, img_name)
+                cv2.imwrite(out_path, online_im)
+                print("save result to: " + out_path)
+
+
+def predict_video(detector, camera_id):
+    video_name = 'mot_output.mp4'
+    if camera_id != -1:
+        capture = cv2.VideoCapture(camera_id)
+    else:
+        capture = cv2.VideoCapture(FLAGS.video_file)
+        video_name = os.path.split(FLAGS.video_file)[-1]
+    # Get Video info : resolution, fps, frame count
+    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fps = int(capture.get(cv2.CAP_PROP_FPS))
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    print("fps: %d, frame_count: %d" % (fps, frame_count))
+
+    if not os.path.exists(FLAGS.output_dir):
+        os.makedirs(FLAGS.output_dir)
+    out_path = os.path.join(FLAGS.output_dir, video_name)
+    if not FLAGS.save_images:
+        video_format = 'mp4v'
+        fourcc = cv2.VideoWriter_fourcc(*video_format)
+        writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
+    frame_id = 0
+    timer = MOTTimer()
+    results = defaultdict(list)  # support single class and multi classes
+    num_classes = detector.num_classes
+    data_type = 'mcmot' if num_classes > 1 else 'mot'
+    ids2names = detector.pred_config.labels
+    center_traj = None
+    entrance = None
+    records = None
+    if FLAGS.draw_center_traj:
+        center_traj = [{} for i in range(num_classes)]
+
+    if num_classes == 1:
+        id_set = set()
+        interval_id_set = set()
+        in_id_list = list()
+        out_id_list = list()
+        prev_center = dict()
+        records = list()
+        entrance = [0, height / 2., width, height / 2.]
+
+    video_fps = fps
+
+    while (1):
+        ret, frame = capture.read()
+        if not ret:
+            break
+        timer.tic()
+        online_tlwhs, online_scores, online_ids = detector.predict(
+            [frame], FLAGS.threshold)
+        timer.toc()
+
+        for cls_id in range(num_classes):
+            results[cls_id].append((frame_id + 1, online_tlwhs[cls_id],
+                                    online_scores[cls_id], online_ids[cls_id]))
+
+        fps = 1. / timer.duration
+        # NOTE: just implement flow statistic for one class
+        if num_classes == 1:
+            result = (frame_id + 1, online_tlwhs[0], online_scores[0],
+                      online_ids[0])
+            statistic = flow_statistic(
+                result, FLAGS.secs_interval, FLAGS.do_entrance_counting,
+                video_fps, entrance, id_set, interval_id_set, in_id_list,
+                out_id_list, prev_center, records, data_type, num_classes)
+            id_set = statistic['id_set']
+            interval_id_set = statistic['interval_id_set']
+            in_id_list = statistic['in_id_list']
+            out_id_list = statistic['out_id_list']
+            prev_center = statistic['prev_center']
+            records = statistic['records']
+
+        elif num_classes > 1 and FLAGS.do_entrance_counting:
+            raise NotImplementedError(
+                'Multi-class flow counting is not implemented now!')
+        im = plot_tracking_dict(
+            frame,
+            num_classes,
+            online_tlwhs,
+            online_ids,
+            online_scores,
+            frame_id=frame_id,
+            fps=fps,
+            ids2names=ids2names,
+            do_entrance_counting=FLAGS.do_entrance_counting,
+            entrance=entrance,
+            records=records,
+            center_traj=center_traj)
+
+        if FLAGS.save_images:
+            save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
+            if not os.path.exists(save_dir):
+                os.makedirs(save_dir)
+            cv2.imwrite(
+                os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im)
+        else:
+            writer.write(im)
+
+        frame_id += 1
+        print('detect frame: %d, fps: %f' % (frame_id, fps))
+        if camera_id != -1:
+            cv2.imshow('Tracking Detection', im)
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                break
+    if FLAGS.save_mot_txts:
+        result_filename = os.path.join(FLAGS.output_dir,
+                                       video_name.split('.')[-2] + '.txt')
+
+        write_mot_results(result_filename, results, data_type, num_classes)
+
+        if num_classes == 1:
+            result_filename = os.path.join(
+                FLAGS.output_dir,
+                video_name.split('.')[-2] + '_flow_statistic.txt')
+            f = open(result_filename, 'w')
+            for line in records:
+                f.write(line)
+            print('Flow statistic save in {}'.format(result_filename))
+            f.close()
+
+    if FLAGS.save_images:
+        save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
+        cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(save_dir,
+                                                              out_path)
+        os.system(cmd_str)
+        print('Save video in {}.'.format(out_path))
+    else:
+        writer.release()
+
+
+def main():
+    pred_config = PredictConfig(FLAGS.model_dir)
+    detector = JDE_Detector(
+        pred_config,
+        FLAGS.model_dir,
+        device=FLAGS.device,
+        run_mode=FLAGS.run_mode,
+        trt_min_shape=FLAGS.trt_min_shape,
+        trt_max_shape=FLAGS.trt_max_shape,
+        trt_opt_shape=FLAGS.trt_opt_shape,
+        trt_calib_mode=FLAGS.trt_calib_mode,
+        cpu_threads=FLAGS.cpu_threads,
+        enable_mkldnn=FLAGS.enable_mkldnn)
+
+    # predict from video file or camera video stream
+    if FLAGS.video_file is not None or FLAGS.camera_id != -1:
+        predict_video(detector, FLAGS.camera_id)
+    else:
+        # predict from image
+        img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
+        predict_image(detector, img_list)
+        if not FLAGS.run_benchmark:
+            detector.det_times.info(average=True)
+        else:
+            mems = {
+                'cpu_rss_mb': detector.cpu_mem / len(img_list),
+                'gpu_rss_mb': detector.gpu_mem / len(img_list),
+                'gpu_util': detector.gpu_util * 100 / len(img_list)
+            }
+            perf_info = detector.det_times.report(average=True)
+            model_dir = FLAGS.model_dir
+            mode = FLAGS.run_mode
+            model_info = {
+                'model_name': model_dir.strip('/').split('/')[-1],
+                'precision': mode.split('_')[-1]
+            }
+            data_info = {
+                'batch_size': 1,
+                'shape': "dynamic_shape",
+                'data_num': perf_info['img_num']
+            }
+            det_log = PaddleInferBenchmark(detector.config, model_info,
+                                           data_info, perf_info, mems)
+            det_log('MOT')
+
+
+if __name__ == '__main__':
+    paddle.enable_static()
+    parser = argsparser()
+    FLAGS = parser.parse_args()
+    print_arguments(FLAGS)
+    FLAGS.device = FLAGS.device.upper()
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+                            ], "device should be CPU, GPU or XPU"
+
+    main()
--- a/deploy/pptracking/python/mot_sde_infer.py
+++ b/deploy/pptracking/python/mot_sde_infer.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import time
+import yaml
+import cv2
+import re
+import numpy as np
+from collections import defaultdict
+
+import paddle
+from paddle.inference import Config
+from paddle.inference import create_predictor
+
+from picodet_postprocess import PicoDetPostProcess
+from utils import argsparser, Timer, get_current_memory_mb, _is_valid_video, video2frames
+from det_infer import Detector, DetectorPicoDet, get_test_images, print_arguments, PredictConfig
+from det_infer import load_predictor
+from benchmark_utils import PaddleInferBenchmark
+from visualize import plot_tracking
+
+from mot.tracker import DeepSORTTracker
+from mot.utils import MOTTimer, write_mot_results, flow_statistic, scale_coords, clip_box, preprocess_reid
+
+from mot.mtmct.utils import parse_bias
+from mot.mtmct.postprocess import trajectory_fusion, sub_cluster, gen_res, print_mtmct_result
+from mot.mtmct.postprocess import get_mtmct_matching_results, save_mtmct_crops, save_mtmct_vis_results
+
+# Global dictionary
+MOT_SUPPORT_MODELS = {'DeepSORT'}
+
+
+def bench_log(detector, img_list, model_info, batch_size=1, name=None):
+    mems = {
+        'cpu_rss_mb': detector.cpu_mem / len(img_list),
+        'gpu_rss_mb': detector.gpu_mem / len(img_list),
+        'gpu_util': detector.gpu_util * 100 / len(img_list)
+    }
+    perf_info = detector.det_times.report(average=True)
+    data_info = {
+        'batch_size': batch_size,
+        'shape': "dynamic_shape",
+        'data_num': perf_info['img_num']
+    }
+    log = PaddleInferBenchmark(detector.config, model_info, data_info,
+                               perf_info, mems)
+    log(name)
+
+
+class SDE_Detector(Detector):
+    """
+    Args:
+        pred_config (object): config of model, defined by `Config(model_dir)`
+        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
+        trt_min_shape (int): min shape for dynamic shape in trt
+        trt_max_shape (int): max shape for dynamic shape in trt
+        trt_opt_shape (int): opt shape for dynamic shape in trt
+        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
+            calibration, trt_calib_mode need to set True
+        cpu_threads (int): cpu threads
+        enable_mkldnn (bool): whether to open MKLDNN
+    """
+
+    def __init__(self,
+                 pred_config,
+                 model_dir,
+                 device='CPU',
+                 run_mode='fluid',
+                 batch_size=1,
+                 trt_min_shape=1,
+                 trt_max_shape=1088,
+                 trt_opt_shape=608,
+                 trt_calib_mode=False,
+                 cpu_threads=1,
+                 enable_mkldnn=False):
+        super(SDE_Detector, self).__init__(
+            pred_config=pred_config,
+            model_dir=model_dir,
+            device=device,
+            run_mode=run_mode,
+            batch_size=batch_size,
+            trt_min_shape=trt_min_shape,
+            trt_max_shape=trt_max_shape,
+            trt_opt_shape=trt_opt_shape,
+            trt_calib_mode=trt_calib_mode,
+            cpu_threads=cpu_threads,
+            enable_mkldnn=enable_mkldnn)
+        assert batch_size == 1, "The JDE Detector only supports batch size=1 now"
+        self.pred_config = pred_config
+
+    def postprocess(self, boxes, ori_image_shape, threshold, scaled):
+        over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0]
+        if len(over_thres_idx) == 0:
+            pred_dets = np.zeros((1, 6), dtype=np.float32)
+            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
+            return pred_dets, pred_xyxys
+        else:
+            boxes = boxes[over_thres_idx]
+
+        if not scaled:
+            # scaled means whether the coords after detector outputs
+            # have been scaled back to the original image, set True 
+            # in general detector, set False in JDE YOLOv3.
+            pred_bboxes = scale_coords(boxes[:, 2:], input_shape, im_shape,
+                                       scale_factor)
+        else:
+            pred_bboxes = boxes[:, 2:]
+
+        pred_xyxys, keep_idx = clip_box(pred_bboxes, ori_image_shape)
+
+        if len(keep_idx[0]) == 0:
+            pred_dets = np.zeros((1, 6), dtype=np.float32)
+            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
+            return pred_dets, pred_xyxys
+
+        pred_scores = boxes[:, 1:2][keep_idx[0]]
+        pred_cls_ids = boxes[:, 0:1][keep_idx[0]]
+        pred_tlwhs = np.concatenate(
+            (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
+            axis=1)
+
+        pred_dets = np.concatenate(
+            (pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
+
+        return pred_dets, pred_xyxys
+
+    def predict(self, image_path, ori_image_shape, scaled, threshold=0.5, warmup=0, repeats=1):
+        '''
+        Args:
+            image_path (list[str]): path of images, only support one image path
+                (batch_size=1) in tracking model
+            ori_image_shape (list[int]: original image shape
+            threshold (float): threshold of predicted box' score
+            scaled (bool): whether the coords after detector outputs are scaled,
+                default False in jde yolov3, set True in general detector.
+        Returns:
+            pred_dets (np.ndarray, [N, 6])
+        '''
+        self.det_times.preprocess_time_s.start()
+        inputs = self.preprocess(image_path)
+        self.det_times.preprocess_time_s.end()
+
+        input_names = self.predictor.get_input_names()
+        for i in range(len(input_names)):
+            input_tensor = self.predictor.get_input_handle(input_names[i])
+            input_tensor.copy_from_cpu(inputs[input_names[i]])
+
+        for i in range(warmup):
+            self.predictor.run()
+            output_names = self.predictor.get_output_names()
+            boxes_tensor = self.predictor.get_output_handle(output_names[0])
+            boxes = boxes_tensor.copy_to_cpu()
+
+        self.det_times.inference_time_s.start()
+        for i in range(repeats):
+            self.predictor.run()
+            output_names = self.predictor.get_output_names()
+            boxes_tensor = self.predictor.get_output_handle(output_names[0])
+            boxes = boxes_tensor.copy_to_cpu()
+        self.det_times.inference_time_s.end(repeats=repeats)
+
+        self.det_times.postprocess_time_s.start()
+        if len(boxes) == 0:
+            pred_dets = np.zeros((1, 6), dtype=np.float32)
+            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
+        else:
+            pred_dets, pred_xyxys = self.postprocess(
+                boxes, ori_image_shape, threshold, scaled)
+
+        self.det_times.postprocess_time_s.end()
+        self.det_times.img_num += 1
+        return pred_dets, pred_xyxys
+
+
+class SDE_DetectorPicoDet(DetectorPicoDet):
+    """
+    Args:
+        pred_config (object): config of model, defined by `Config(model_dir)`
+        model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
+        device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
+        run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
+        trt_min_shape (int): min shape for dynamic shape in trt
+        trt_max_shape (int): max shape for dynamic shape in trt
+        trt_opt_shape (int): opt shape for dynamic shape in trt
+        trt_calib_mode (bool): If the model is produced by TRT offline quantitative
+            calibration, trt_calib_mode need to set True
+        cpu_threads (int): cpu threads
+        enable_mkldnn (bool): whether to open MKLDNN
+    """
+
+    def __init__(self,
+                 pred_config,
+                 model_dir,
+                 device='CPU',
+                 run_mode='fluid',
+                 batch_size=1,
+                 trt_min_shape=1,
+                 trt_max_shape=1088,
+                 trt_opt_shape=608,
+                 trt_calib_mode=False,
+                 cpu_threads=1,
+                 enable_mkldnn=False):
+        super(SDE_DetectorPicoDet, self).__init__(
+            pred_config=pred_config,
+            model_dir=model_dir,
+            device=device,
+            run_mode=run_mode,
+            batch_size=batch_size,
+            trt_min_shape=trt_min_shape,
+            trt_max_shape=trt_max_shape,
+            trt_opt_shape=trt_opt_shape,
+            trt_calib_mode=trt_calib_mode,
+            cpu_threads=cpu_threads,
+            enable_mkldnn=enable_mkldnn)
+        assert batch_size == 1, "The JDE Detector only supports batch size=1 now"
+        self.pred_config = pred_config
+
+    def postprocess_bboxes(self, boxes, input_shape, im_shape, scale_factor,
+                           threshold):
+        over_thres_idx = np.nonzero(boxes[:, 1:2] >= threshold)[0]
+        if len(over_thres_idx) == 0:
+            pred_dets = np.zeros((1, 6), dtype=np.float32)
+            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
+            return pred_dets, pred_xyxys
+        else:
+            boxes = boxes[over_thres_idx]
+
+        pred_bboxes = boxes[:, 2:]
+
+        pred_xyxys, keep_idx = clip_box(pred_bboxes, input_shape, im_shape,
+                                        scale_factor)
+        if len(keep_idx[0]) == 0:
+            pred_dets = np.zeros((1, 6), dtype=np.float32)
+            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
+            return pred_dets, pred_xyxys
+
+        pred_scores = boxes[:, 1:2][keep_idx[0]]
+        pred_cls_ids = boxes[:, 0:1][keep_idx[0]]
+        pred_tlwhs = np.concatenate(
+            (pred_xyxys[:, 0:2], pred_xyxys[:, 2:4] - pred_xyxys[:, 0:2] + 1),
+            axis=1)
+
+        pred_dets = np.concatenate(
+            (pred_tlwhs, pred_scores, pred_cls_ids), axis=1)
+        return pred_dets, pred_xyxys
+
+    def predict(self, image, scaled, threshold=0.5, warmup=0, repeats=1):
+        '''
+        Args:
+            image (np.ndarray): image numpy data
+            threshold (float): threshold of predicted box' score
+            scaled (bool): whether the coords after detector outputs are scaled,
+                default False in jde yolov3, set True in general detector.
+        Returns:
+            pred_dets (np.ndarray, [N, 6])
+        '''
+        self.det_times.preprocess_time_s.start()
+        inputs = self.preprocess(image)
+        self.det_times.preprocess_time_s.end()
+
+        input_names = self.predictor.get_input_names()
+        for i in range(len(input_names)):
+            input_tensor = self.predictor.get_input_handle(input_names[i])
+            input_tensor.copy_from_cpu(inputs[input_names[i]])
+
+        np_score_list, np_boxes_list = [], []
+        for i in range(warmup):
+            self.predictor.run()
+            output_names = self.predictor.get_output_names()
+            boxes_tensor = self.predictor.get_output_handle(output_names[0])
+            boxes = boxes_tensor.copy_to_cpu()
+
+        self.det_times.inference_time_s.start()
+        for i in range(repeats):
+            self.predictor.run()
+            np_score_list.clear()
+            np_boxes_list.clear()
+            output_names = self.predictor.get_output_names()
+            num_outs = int(len(output_names) / 2)
+            for out_idx in range(num_outs):
+                np_score_list.append(
+                    self.predictor.get_output_handle(output_names[out_idx])
+                    .copy_to_cpu())
+                np_boxes_list.append(
+                    self.predictor.get_output_handle(output_names[
+                        out_idx + num_outs]).copy_to_cpu())
+
+        self.det_times.inference_time_s.end(repeats=repeats)
+        self.det_times.img_num += 1
+        self.det_times.postprocess_time_s.start()
+        self.postprocess = PicoDetPostProcess(
+            inputs['image'].shape[2:],
+            inputs['im_shape'],
+            inputs['scale_factor'],
+            strides=self.pred_config.fpn_stride,
+            nms_threshold=self.pred_config.nms['nms_threshold'])
+        boxes, boxes_num = self.postprocess(np_score_list, np_boxes_list)
+
+        if len(boxes) == 0:
+            pred_dets = np.zeros((1, 6), dtype=np.float32)
+            pred_xyxys = np.zeros((1, 4), dtype=np.float32)
+        else:
+            input_shape = inputs['image'].shape[2:]
+            im_shape = inputs['im_shape']
+            scale_factor = inputs['scale_factor']
+            pred_dets, pred_xyxys = self.postprocess_bboxes(
+                boxes, input_shape, im_shape, scale_factor, threshold)
+
+        return pred_dets, pred_xyxys
+
+
+class SDE_ReID(object):
+    def __init__(self,
+                 pred_config,
+                 model_dir,
+                 device='CPU',
+                 run_mode='fluid',
+                 batch_size=50,
+                 trt_min_shape=1,
+                 trt_max_shape=1088,
+                 trt_opt_shape=608,
+                 trt_calib_mode=False,
+                 cpu_threads=1,
+                 enable_mkldnn=False):
+        self.pred_config = pred_config
+        self.predictor, self.config = load_predictor(
+            model_dir,
+            run_mode=run_mode,
+            batch_size=batch_size,
+            min_subgraph_size=self.pred_config.min_subgraph_size,
+            device=device,
+            use_dynamic_shape=self.pred_config.use_dynamic_shape,
+            trt_min_shape=trt_min_shape,
+            trt_max_shape=trt_max_shape,
+            trt_opt_shape=trt_opt_shape,
+            trt_calib_mode=trt_calib_mode,
+            cpu_threads=cpu_threads,
+            enable_mkldnn=enable_mkldnn)
+        self.det_times = Timer()
+        self.cpu_mem, self.gpu_mem, self.gpu_util = 0, 0, 0
+        self.batch_size = batch_size
+        assert pred_config.tracker, "Tracking model should have tracker"
+        pt = pred_config.tracker
+        max_age = pt['max_age'] if 'max_age' in pt else 30
+        max_iou_distance = pt[
+            'max_iou_distance'] if 'max_iou_distance' in pt else 0.7
+        self.tracker = DeepSORTTracker(
+            max_age=max_age, max_iou_distance=max_iou_distance)
+
+    def get_crops(self, xyxy, ori_img):
+        w, h = self.tracker.input_size
+        self.det_times.preprocess_time_s.start()
+        crops = []
+        xyxy = xyxy.astype(np.int64)
+        ori_img = ori_img.transpose(1, 0, 2)  # [h,w,3]->[w,h,3]
+        for i, bbox in enumerate(xyxy):
+            crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :]
+            crops.append(crop)
+        crops = preprocess_reid(crops, w, h)
+        self.det_times.preprocess_time_s.end()
+
+        return crops
+
+    def preprocess(self, crops):
+        # to keep fast speed, only use topk crops
+        crops = crops[:self.batch_size]
+        inputs = {}
+        inputs['crops'] = np.array(crops).astype('float32')
+        return inputs
+
+    def postprocess(self, pred_dets, pred_embs):
+        tracker = self.tracker
+        tracker.predict()
+        online_targets = tracker.update(pred_dets, pred_embs)
+
+        online_tlwhs, online_scores, online_ids = [], [], []
+        for t in online_targets:
+            if not t.is_confirmed() or t.time_since_update > 1:
+                continue
+            tlwh = t.to_tlwh()
+            tscore = t.score
+            tid = t.track_id
+            if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
+            if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
+                    3] > tracker.vertical_ratio:
+                continue
+            online_tlwhs.append(tlwh)
+            online_scores.append(tscore)
+            online_ids.append(tid)
+
+        tracking_outs = {
+            'online_tlwhs': online_tlwhs,
+            'online_scores': online_scores,
+            'online_ids': online_ids,
+        }
+        return tracking_outs
+
+    def postprocess_mtmct(self, pred_dets, pred_embs, frame_id, seq_name):
+        tracker = self.tracker
+        tracker.predict()
+        online_targets = tracker.update(pred_dets, pred_embs)
+
+        online_tlwhs, online_scores, online_ids = [], [], []
+        online_tlbrs, online_feats = [], []
+        for t in online_targets:
+            if not t.is_confirmed() or t.time_since_update > 1:
+                continue
+            tlwh = t.to_tlwh()
+            tscore = t.score
+            tid = t.track_id
+            if tlwh[2] * tlwh[3] <= tracker.min_box_area: continue
+            if tracker.vertical_ratio > 0 and tlwh[2] / tlwh[
+                    3] > tracker.vertical_ratio:
+                continue
+            online_tlwhs.append(tlwh)
+            online_scores.append(tscore)
+            online_ids.append(tid)
+
+            online_tlbrs.append(t.to_tlbr())
+            online_feats.append(t.feat)
+
+        tracking_outs = {
+            'online_tlwhs': online_tlwhs,
+            'online_scores': online_scores,
+            'online_ids': online_ids,
+            'feat_data': {},
+        }
+        for _tlbr, _id, _feat in zip(online_tlbrs, online_ids, online_feats):
+            feat_data = {}
+            feat_data['bbox'] = _tlbr
+            feat_data['frame'] = f"{frame_id:06d}"
+            feat_data['id'] = _id
+            _imgname = f'{seq_name}_{_id}_{frame_id}.jpg'
+            feat_data['imgname'] = _imgname
+            feat_data['feat'] = _feat
+            tracking_outs['feat_data'].update({_imgname: feat_data})
+        return tracking_outs
+
+    def predict(self,
+                crops,
+                pred_dets,
+                warmup=0,
+                repeats=1,
+                MTMCT=False,
+                frame_id=0,
+                seq_name=''):
+        self.det_times.preprocess_time_s.start()
+        inputs = self.preprocess(crops)
+        self.det_times.preprocess_time_s.end()
+
+        input_names = self.predictor.get_input_names()
+        for i in range(len(input_names)):
+            input_tensor = self.predictor.get_input_handle(input_names[i])
+            input_tensor.copy_from_cpu(inputs[input_names[i]])
+
+        for i in range(warmup):
+            self.predictor.run()
+            output_names = self.predictor.get_output_names()
+            feature_tensor = self.predictor.get_output_handle(output_names[0])
+            pred_embs = feature_tensor.copy_to_cpu()
+
+        self.det_times.inference_time_s.start()
+        for i in range(repeats):
+            self.predictor.run()
+            output_names = self.predictor.get_output_names()
+            feature_tensor = self.predictor.get_output_handle(output_names[0])
+            pred_embs = feature_tensor.copy_to_cpu()
+        self.det_times.inference_time_s.end(repeats=repeats)
+
+        self.det_times.postprocess_time_s.start()
+        if MTMCT == False:
+            tracking_outs = self.postprocess(pred_dets, pred_embs)
+        else:
+            tracking_outs = self.postprocess_mtmct(pred_dets, pred_embs,
+                                                   frame_id, seq_name)
+        self.det_times.postprocess_time_s.end()
+        self.det_times.img_num += 1
+
+        return tracking_outs
+
+
+def predict_image(detector, reid_model, image_list):
+    image_list.sort()
+    for i, img_file in enumerate(image_list):
+        frame = cv2.imread(img_file)
+        if FLAGS.run_benchmark:
+            pred_dets, pred_xyxys = detector.predict(
+                [frame], FLAGS.scaled, FLAGS.threshold, warmup=10, repeats=10)
+            cm, gm, gu = get_current_memory_mb()
+            detector.cpu_mem += cm
+            detector.gpu_mem += gm
+            detector.gpu_util += gu
+            print('Test iter {}, file name:{}'.format(i, img_file))
+        else:
+            pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled,
+                                                     FLAGS.threshold)
+
+        if len(pred_dets) == 1 and np.sum(pred_dets) == 0:
+            print('Frame {} has no object, try to modify score threshold.'.
+                  format(i))
+            online_im = frame
+        else:
+            # reid process
+            crops = reid_model.get_crops(pred_xyxys, frame)
+
+            if FLAGS.run_benchmark:
+                tracking_outs = reid_model.predict(
+                    crops, pred_dets, warmup=10, repeats=10)
+            else:
+                tracking_outs = reid_model.predict(crops, pred_dets)
+
+                online_tlwhs = tracking_outs['online_tlwhs']
+                online_scores = tracking_outs['online_scores']
+                online_ids = tracking_outs['online_ids']
+
+                online_im = plot_tracking(
+                    frame, online_tlwhs, online_ids, online_scores, frame_id=i)
+
+        if FLAGS.save_images:
+            if not os.path.exists(FLAGS.output_dir):
+                os.makedirs(FLAGS.output_dir)
+            img_name = os.path.split(img_file)[-1]
+            out_path = os.path.join(FLAGS.output_dir, img_name)
+            cv2.imwrite(out_path, online_im)
+            print("save result to: " + out_path)
+
+
+def predict_video(detector, reid_model, camera_id):
+    if camera_id != -1:
+        capture = cv2.VideoCapture(camera_id)
+        video_name = 'mot_output.mp4'
+    else:
+        capture = cv2.VideoCapture(FLAGS.video_file)
+        video_name = os.path.split(FLAGS.video_file)[-1]
+    # Get Video info : resolution, fps, frame count
+    width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fps = int(capture.get(cv2.CAP_PROP_FPS))
+    frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+    print("fps: %d, frame_count: %d" % (fps, frame_count))
+
+    if not os.path.exists(FLAGS.output_dir):
+        os.makedirs(FLAGS.output_dir)
+    out_path = os.path.join(FLAGS.output_dir, video_name)
+    if not FLAGS.save_images:
+        video_format = 'mp4v'
+        fourcc = cv2.VideoWriter_fourcc(*video_format)
+        writer = cv2.VideoWriter(out_path, fourcc, fps, (width, height))
+    frame_id = 0
+    timer = MOTTimer()
+    results = defaultdict(list)
+    id_set = set()
+    interval_id_set = set()
+    in_id_list = list()
+    out_id_list = list()
+    prev_center = dict()
+    records = list()
+    entrance = [0, height / 2., width, height / 2.]
+    video_fps = fps
+
+    while (1):
+        ret, frame = capture.read()
+        if not ret:
+            break
+        timer.tic()
+        pred_dets, pred_xyxys = detector.predict([frame], FLAGS.scaled,
+                                                 FLAGS.threshold)
+
+        if len(pred_dets) == 1 and np.sum(pred_dets) == 0:
+            print('Frame {} has no object, try to modify score threshold.'.
+                  format(frame_id))
+            timer.toc()
+            im = frame
+        else:
+            # reid process
+            crops = reid_model.get_crops(pred_xyxys, frame)
+            tracking_outs = reid_model.predict(crops, pred_dets)
+
+            online_tlwhs = tracking_outs['online_tlwhs']
+            online_scores = tracking_outs['online_scores']
+            online_ids = tracking_outs['online_ids']
+
+            results[0].append(
+                (frame_id + 1, online_tlwhs, online_scores, online_ids))
+            # NOTE: just implement flow statistic for one class
+            result = (frame_id + 1, online_tlwhs, online_scores, online_ids)
+            statistic = flow_statistic(
+                result, FLAGS.secs_interval, FLAGS.do_entrance_counting,
+                video_fps, entrance, id_set, interval_id_set, in_id_list,
+                out_id_list, prev_center, records)
+            id_set = statistic['id_set']
+            interval_id_set = statistic['interval_id_set']
+            in_id_list = statistic['in_id_list']
+            out_id_list = statistic['out_id_list']
+            prev_center = statistic['prev_center']
+            records = statistic['records']
+
+            timer.toc()
+
+            fps = 1. / timer.duration
+            im = plot_tracking(
+                frame,
+                online_tlwhs,
+                online_ids,
+                online_scores,
+                frame_id=frame_id,
+                fps=fps,
+                do_entrance_counting=FLAGS.do_entrance_counting,
+                entrance=entrance)
+
+        if FLAGS.save_images:
+            save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
+            if not os.path.exists(save_dir):
+                os.makedirs(save_dir)
+            cv2.imwrite(
+                os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), im)
+        else:
+            writer.write(im)
+
+        frame_id += 1
+        print('detect frame:%d, fps: %f' % (frame_id, fps))
+
+        if camera_id != -1:
+            cv2.imshow('Tracking Detection', im)
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                break
+
+    if FLAGS.save_mot_txts:
+        result_filename = os.path.join(FLAGS.output_dir,
+                                       video_name.split('.')[-2] + '.txt')
+        write_mot_results(result_filename, results)
+
+        result_filename = os.path.join(
+            FLAGS.output_dir, video_name.split('.')[-2] + '_flow_statistic.txt')
+        f = open(result_filename, 'w')
+        for line in records:
+            f.write(line)
+        print('Flow statistic save in {}'.format(result_filename))
+        f.close()
+
+    if FLAGS.save_images:
+        save_dir = os.path.join(FLAGS.output_dir, video_name.split('.')[-2])
+        cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg {}'.format(save_dir,
+                                                              out_path)
+        os.system(cmd_str)
+        print('Save video in {}.'.format(out_path))
+    else:
+        writer.release()
+
+
+def predict_mtmct_seq(detector, reid_model, seq_name, output_dir):
+    fpath = os.path.join(FLAGS.mtmct_dir, seq_name)
+    if os.path.exists(os.path.join(fpath, 'img1')):
+        fpath = os.path.join(fpath, 'img1')
+
+    assert os.path.isdir(fpath), '{} should be a directory'.format(fpath)
+    image_list = os.listdir(fpath)
+    image_list.sort()
+    assert len(image_list) > 0, '{} has no images.'.format(fpath)
+
+    results = defaultdict(list)
+    mot_features_dict = {}  # cid_tid_fid feats
+    print('Totally {} frames found in seq {}.'.format(len(image_list), seq_name))
+
+    for frame_id, img_file in enumerate(image_list):
+        if frame_id % 40 == 0:
+            print('Processing frame {} of seq {}.'.format(frame_id, seq_name))
+        frame = cv2.imread(os.path.join(fpath, img_file))
+        ori_image_shape = list(frame.shape[:2])
+        frame_path = os.path.join(fpath, img_file)
+        pred_dets, pred_xyxys = detector.predict([frame_path], ori_image_shape, FLAGS.scaled,
+                                                 FLAGS.threshold)
+
+        if len(pred_dets) == 1 and np.sum(pred_dets) == 0:
+            print('Frame {} has no object, try to modify score threshold.'.
+                  format(frame_id))
+            online_im = frame
+        else:
+            # reid process
+            crops = reid_model.get_crops(pred_xyxys, frame)
+
+            tracking_outs = reid_model.predict(
+                crops,
+                pred_dets,
+                MTMCT=True,
+                frame_id=frame_id,
+                seq_name=seq_name)
+
+            feat_data_dict = tracking_outs['feat_data']
+            mot_features_dict = dict(mot_features_dict, **feat_data_dict)
+
+            online_tlwhs = tracking_outs['online_tlwhs']
+            online_scores = tracking_outs['online_scores']
+            online_ids = tracking_outs['online_ids']
+
+            online_im = plot_tracking(frame, online_tlwhs, online_ids,
+                                      online_scores, frame_id)
+            results[0].append(
+                (frame_id + 1, online_tlwhs, online_scores, online_ids))
+
+        if FLAGS.save_images:
+            save_dir = os.path.join(output_dir, seq_name)
+            if not os.path.exists(save_dir): os.makedirs(save_dir)
+            img_name = os.path.split(img_file)[-1]
+            out_path = os.path.join(save_dir, img_name)
+            cv2.imwrite(out_path, online_im)
+
+    if FLAGS.save_mot_txts:
+        result_filename = os.path.join(output_dir, seq_name + '.txt')
+        write_mot_results(result_filename, results)
+
+    return mot_features_dict
+
+
+def predict_mtmct(detector, reid_model, mtmct_dir, mtmct_cfg):
+    MTMCT = mtmct_cfg['MTMCT']
+    assert MTMCT == True, 'predict_mtmct should be used for MTMCT.'
+
+    cameras_bias = mtmct_cfg['cameras_bias']
+    cid_bias = parse_bias(cameras_bias)
+    scene_cluster = list(cid_bias.keys())
+
+    # 1.zone releated parameters
+    use_zone = mtmct_cfg['use_zone']
+    zone_path = mtmct_cfg['zone_path']
+
+    # 2.tricks parameters, can be used for other mtmct dataset
+    use_ff = mtmct_cfg['use_ff']
+    use_rerank = mtmct_cfg['use_rerank']
+
+    # 3.camera releated parameters
+    use_camera = mtmct_cfg['use_camera']
+    use_st_filter = mtmct_cfg['use_st_filter']
+
+    # 4.zone releated parameters
+    use_roi = mtmct_cfg['use_roi']
+    roi_dir = mtmct_cfg['roi_dir']
+
+    mot_list_breaks = []
+    cid_tid_dict = dict()
+
+    output_dir = FLAGS.output_dir
+    if not os.path.exists(output_dir): os.makedirs(output_dir)
+
+    seqs = os.listdir(mtmct_dir)
+    seqs.sort()
+
+    for seq in seqs:
+        fpath = os.path.join(mtmct_dir, seq)
+        if os.path.isfile(fpath) and _is_valid_video(fpath):
+            ext = seq.split('.')[-1]
+            seq = seq.split('.')[-2]
+            print('ffmpeg processing of video {}'.format(fpath))
+            frames_path = video2frames(video_path=fpath, outpath=mtmct_dir, frame_rate=25)
+            fpath = os.path.join(mtmct_dir, seq)
+
+        if os.path.isdir(fpath) == False:
+            print('{} is not a image folder.'.format(fpath))
+            continue
+
+        mot_features_dict = predict_mtmct_seq(detector, reid_model,
+                                              seq, output_dir)
+
+        cid = int(re.sub('[a-z,A-Z]', "", seq))
+        tid_data, mot_list_break = trajectory_fusion(
+            mot_features_dict,
+            cid,
+            cid_bias,
+            use_zone=use_zone,
+            zone_path=zone_path)
+        mot_list_breaks.append(mot_list_break)
+        # single seq process
+        for line in tid_data:
+            tracklet = tid_data[line]
+            tid = tracklet['tid']
+            if (cid, tid) not in cid_tid_dict:
+                cid_tid_dict[(cid, tid)] = tracklet
+
+    map_tid = sub_cluster(
+        cid_tid_dict,
+        scene_cluster,
+        use_ff=use_ff,
+        use_rerank=use_rerank,
+        use_camera=use_camera,
+        use_st_filter=use_st_filter)
+
+    pred_mtmct_file = os.path.join(output_dir, 'mtmct_result.txt')
+    if use_camera:
+        gen_res(pred_mtmct_file, scene_cluster, map_tid, mot_list_breaks)
+    else:
+        gen_res(
+            pred_mtmct_file,
+            scene_cluster,
+            map_tid,
+            mot_list_breaks,
+            use_roi=use_roi,
+            roi_dir=roi_dir)
+
+    if FLAGS.save_images:
+        carame_results, cid_tid_fid_res = get_mtmct_matching_results(
+            pred_mtmct_file)
+
+        crops_dir = os.path.join(output_dir, 'mtmct_crops')
+        save_mtmct_crops(
+            cid_tid_fid_res, images_dir=mtmct_dir, crops_dir=crops_dir)
+
+        save_dir = os.path.join(output_dir, 'mtmct_vis')
+        save_mtmct_vis_results(
+            carame_results,
+            images_dir=mtmct_dir,
+            save_dir=save_dir,
+            save_videos=FLAGS.save_images)
+
+    # evalution metrics
+    data_root_gt = os.path.join(mtmct_dir, '..', 'gt', 'gt.txt')
+    if os.path.exists(data_root_gt):
+        print_mtmct_result(data_root_gt, pred_mtmct_file)
+
+
+def main():
+    pred_config = PredictConfig(FLAGS.model_dir)
+    detector_func = 'SDE_Detector'
+    if pred_config.arch == 'PicoDet':
+        detector_func = 'SDE_DetectorPicoDet'
+
+    detector = eval(detector_func)(pred_config,
+                                   FLAGS.model_dir,
+                                   device=FLAGS.device,
+                                   run_mode=FLAGS.run_mode,
+                                   batch_size=FLAGS.batch_size,
+                                   trt_min_shape=FLAGS.trt_min_shape,
+                                   trt_max_shape=FLAGS.trt_max_shape,
+                                   trt_opt_shape=FLAGS.trt_opt_shape,
+                                   trt_calib_mode=FLAGS.trt_calib_mode,
+                                   cpu_threads=FLAGS.cpu_threads,
+                                   enable_mkldnn=FLAGS.enable_mkldnn)
+
+    pred_config = PredictConfig(FLAGS.reid_model_dir)
+    reid_model = SDE_ReID(
+        pred_config,
+        FLAGS.reid_model_dir,
+        device=FLAGS.device,
+        run_mode=FLAGS.run_mode,
+        batch_size=FLAGS.reid_batch_size,
+        trt_min_shape=FLAGS.trt_min_shape,
+        trt_max_shape=FLAGS.trt_max_shape,
+        trt_opt_shape=FLAGS.trt_opt_shape,
+        trt_calib_mode=FLAGS.trt_calib_mode,
+        cpu_threads=FLAGS.cpu_threads,
+        enable_mkldnn=FLAGS.enable_mkldnn)
+
+    # predict from video file or camera video stream
+    if FLAGS.video_file is not None or FLAGS.camera_id != -1:
+        predict_video(detector, reid_model, FLAGS.camera_id)
+
+    elif FLAGS.mtmct_dir is not None:
+        mtmct_cfg_file = FLAGS.mtmct_cfg
+        with open(mtmct_cfg_file) as f:
+            mtmct_cfg = yaml.safe_load(f)
+        predict_mtmct(detector, reid_model, FLAGS.mtmct_dir, mtmct_cfg)
+
+    else:
+        # predict from image
+        img_list = get_test_images(FLAGS.image_dir, FLAGS.image_file)
+        predict_image(detector, reid_model, img_list)
+
+        if not FLAGS.run_benchmark:
+            detector.det_times.info(average=True)
+            reid_model.det_times.info(average=True)
+        else:
+            mode = FLAGS.run_mode
+            det_model_dir = FLAGS.model_dir
+            det_model_info = {
+                'model_name': det_model_dir.strip('/').split('/')[-1],
+                'precision': mode.split('_')[-1]
+            }
+            bench_log(detector, img_list, det_model_info, name='Det')
+
+            reid_model_dir = FLAGS.reid_model_dir
+            reid_model_info = {
+                'model_name': reid_model_dir.strip('/').split('/')[-1],
+                'precision': mode.split('_')[-1]
+            }
+            bench_log(reid_model, img_list, reid_model_info, name='ReID')
+
+
+if __name__ == '__main__':
+    paddle.enable_static()
+    parser = argsparser()
+    FLAGS = parser.parse_args()
+    print_arguments(FLAGS)
+    FLAGS.device = FLAGS.device.upper()
+    assert FLAGS.device in ['CPU', 'GPU', 'XPU'
+                            ], "device should be CPU, GPU or XPU"
+
+    main()
--- a/deploy/pptracking/python/mtmct_cfg.yml
+++ b/deploy/pptracking/python/mtmct_cfg.yml
+# config for MTMCT
+MTMCT: True
+cameras_bias:
+  c041: 0
+  c042: 0
+# 1.zone releated parameters
+use_zone: True
+zone_path: dataset/mot/aic21mtmct_vehicle/S06/zone
+# 2.tricks parameters, can be used for other mtmct dataset
+use_ff: True
+use_rerank: True
+# 3.camera releated parameters
+use_camera: True
+use_st_filter: False
+# 4.zone releated parameters
+use_roi: True
+roi_dir: dataset/mot/aic21mtmct_vehicle/S06
--- a/deploy/pptracking/python/picodet_postprocess.py
+++ b/deploy/pptracking/python/picodet_postprocess.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from scipy.special import softmax
+
+
+def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
+    """
+    Args:
+        box_scores (N, 5): boxes in corner-form and probabilities.
+        iou_threshold: intersection over union threshold.
+        top_k: keep top_k results. If k <= 0, keep all the results.
+        candidate_size: only consider the candidates with the highest scores.
+    Returns:
+         picked: a list of indexes of the kept boxes
+    """
+    scores = box_scores[:, -1]
+    boxes = box_scores[:, :-1]
+    picked = []
+    indexes = np.argsort(scores)
+    indexes = indexes[-candidate_size:]
+    while len(indexes) > 0:
+        current = indexes[-1]
+        picked.append(current)
+        if 0 < top_k == len(picked) or len(indexes) == 1:
+            break
+        current_box = boxes[current, :]
+        indexes = indexes[:-1]
+        rest_boxes = boxes[indexes, :]
+        iou = iou_of(
+            rest_boxes,
+            np.expand_dims(
+                current_box, axis=0), )
+        indexes = indexes[iou <= iou_threshold]
+
+    return box_scores[picked, :]
+
+
+def iou_of(boxes0, boxes1, eps=1e-5):
+    """Return intersection-over-union (Jaccard index) of boxes.
+    Args:
+        boxes0 (N, 4): ground truth boxes.
+        boxes1 (N or 1, 4): predicted boxes.
+        eps: a small number to avoid 0 as denominator.
+    Returns:
+        iou (N): IoU values.
+    """
+    overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
+    overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
+
+    overlap_area = area_of(overlap_left_top, overlap_right_bottom)
+    area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
+    area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
+    return overlap_area / (area0 + area1 - overlap_area + eps)
+
+
+def area_of(left_top, right_bottom):
+    """Compute the areas of rectangles given two corners.
+    Args:
+        left_top (N, 2): left top corner.
+        right_bottom (N, 2): right bottom corner.
+    Returns:
+        area (N): return the area.
+    """
+    hw = np.clip(right_bottom - left_top, 0.0, None)
+    return hw[..., 0] * hw[..., 1]
+
+
+class PicoDetPostProcess(object):
+    """
+    Args:
+        input_shape (int): network input image size
+        ori_shape (int): ori image shape of before padding
+        scale_factor (float): scale factor of ori image
+        enable_mkldnn (bool): whether to open MKLDNN
+    """
+
+    def __init__(self,
+                 input_shape,
+                 ori_shape,
+                 scale_factor,
+                 strides=[8, 16, 32, 64],
+                 score_threshold=0.4,
+                 nms_threshold=0.5,
+                 nms_top_k=1000,
+                 keep_top_k=100):
+        self.ori_shape = ori_shape
+        self.input_shape = input_shape
+        self.scale_factor = scale_factor
+        self.strides = strides
+        self.score_threshold = score_threshold
+        self.nms_threshold = nms_threshold
+        self.nms_top_k = nms_top_k
+        self.keep_top_k = keep_top_k
+
+    def warp_boxes(self, boxes, ori_shape):
+        """Apply transform to boxes
+        """
+        width, height = ori_shape[1], ori_shape[0]
+        n = len(boxes)
+        if n:
+            # warp points
+            xy = np.ones((n * 4, 3))
+            xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
+                n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
+            # xy = xy @ M.T  # transform
+            xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)  # rescale
+            # create new boxes
+            x = xy[:, [0, 2, 4, 6]]
+            y = xy[:, [1, 3, 5, 7]]
+            xy = np.concatenate(
+                (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+            # clip boxes
+            xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
+            xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
+            return xy.astype(np.float32)
+        else:
+            return boxes
+
+    def __call__(self, scores, raw_boxes):
+        batch_size = raw_boxes[0].shape[0]
+        reg_max = int(raw_boxes[0].shape[-1] / 4 - 1)
+        out_boxes_num = []
+        out_boxes_list = []
+        for batch_id in range(batch_size):
+            # generate centers
+            decode_boxes = []
+            select_scores = []
+            for stride, box_distribute, score in zip(self.strides, raw_boxes,
+                                                     scores):
+                box_distribute = box_distribute[batch_id]
+                score = score[batch_id]
+                # centers
+                fm_h = self.input_shape[0] / stride
+                fm_w = self.input_shape[1] / stride
+                h_range = np.arange(fm_h)
+                w_range = np.arange(fm_w)
+                ww, hh = np.meshgrid(w_range, h_range)
+                ct_row = (hh.flatten() + 0.5) * stride
+                ct_col = (ww.flatten() + 0.5) * stride
+                center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)
+
+                # box distribution to distance
+                reg_range = np.arange(reg_max + 1)
+                box_distance = box_distribute.reshape((-1, reg_max + 1))
+                box_distance = softmax(box_distance, axis=1)
+                box_distance = box_distance * np.expand_dims(reg_range, axis=0)
+                box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
+                box_distance = box_distance * stride
+
+                # top K candidate
+                topk_idx = np.argsort(score.max(axis=1))[::-1]
+                topk_idx = topk_idx[:self.nms_top_k]
+                center = center[topk_idx]
+                score = score[topk_idx]
+                box_distance = box_distance[topk_idx]
+
+                # decode box
+                decode_box = center + [-1, -1, 1, 1] * box_distance
+
+                select_scores.append(score)
+                decode_boxes.append(decode_box)
+
+            # nms
+            bboxes = np.concatenate(decode_boxes, axis=0)
+            confidences = np.concatenate(select_scores, axis=0)
+            picked_box_probs = []
+            picked_labels = []
+            for class_index in range(0, confidences.shape[1]):
+                probs = confidences[:, class_index]
+                mask = probs > self.score_threshold
+                probs = probs[mask]
+                if probs.shape[0] == 0:
+                    continue
+                subset_boxes = bboxes[mask, :]
+                box_probs = np.concatenate(
+                    [subset_boxes, probs.reshape(-1, 1)], axis=1)
+                box_probs = hard_nms(
+                    box_probs,
+                    iou_threshold=self.nms_threshold,
+                    top_k=self.keep_top_k, )
+                picked_box_probs.append(box_probs)
+                picked_labels.extend([class_index] * box_probs.shape[0])
+
+            if len(picked_box_probs) == 0:
+                out_boxes_list.append(np.empty((0, 4)))
+                out_boxes_num.append(0)
+
+            else:
+                picked_box_probs = np.concatenate(picked_box_probs)
+
+                # resize output boxes
+                picked_box_probs[:, :4] = self.warp_boxes(
+                    picked_box_probs[:, :4], self.ori_shape[batch_id])
+                im_scale = np.concatenate([
+                    self.scale_factor[batch_id][::-1],
+                    self.scale_factor[batch_id][::-1]
+                ])
+                picked_box_probs[:, :4] /= im_scale
+                # clas score box
+                out_boxes_list.append(
+                    np.concatenate(
+                        [
+                            np.expand_dims(
+                                np.array(picked_labels),
+                                axis=-1), np.expand_dims(
+                                    picked_box_probs[:, 4], axis=-1),
+                            picked_box_probs[:, :4]
+                        ],
+                        axis=1))
+                out_boxes_num.append(len(picked_labels))
+
+        out_boxes_list = np.concatenate(out_boxes_list, axis=0)
+        out_boxes_num = np.asarray(out_boxes_num).astype(np.int32)
+        return out_boxes_list, out_boxes_num
--- a/deploy/pptracking/python/preprocess.py
+++ b/deploy/pptracking/python/preprocess.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cv2
+import numpy as np
+
+
+def decode_image(im_file, im_info):
+    """read rgb image
+    Args:
+        im_file (str|np.ndarray): input can be image path or np.ndarray
+        im_info (dict): info of image
+    Returns:
+        im (np.ndarray):  processed image (np.ndarray)
+        im_info (dict): info of processed image
+    """
+    if isinstance(im_file, str):
+        with open(im_file, 'rb') as f:
+            im_read = f.read()
+        data = np.frombuffer(im_read, dtype='uint8')
+        im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
+        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    else:
+        im = im_file
+    im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
+    im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
+    return im, im_info
+
+
+class Resize(object):
+    """resize image by target_size and max_size
+    Args:
+        target_size (int): the target size of image
+        keep_ratio (bool): whether keep_ratio or not, default true
+        interp (int): method of resize
+    """
+
+    def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
+        if isinstance(target_size, int):
+            target_size = [target_size, target_size]
+        self.target_size = target_size
+        self.keep_ratio = keep_ratio
+        self.interp = interp
+
+    def __call__(self, im, im_info):
+        """
+        Args:
+            im (np.ndarray): image (np.ndarray)
+            im_info (dict): info of image
+        Returns:
+            im (np.ndarray):  processed image (np.ndarray)
+            im_info (dict): info of processed image
+        """
+        assert len(self.target_size) == 2
+        assert self.target_size[0] > 0 and self.target_size[1] > 0
+        im_channel = im.shape[2]
+        im_scale_y, im_scale_x = self.generate_scale(im)
+        im = cv2.resize(
+            im,
+            None,
+            None,
+            fx=im_scale_x,
+            fy=im_scale_y,
+            interpolation=self.interp)
+        im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
+        im_info['scale_factor'] = np.array(
+            [im_scale_y, im_scale_x]).astype('float32')
+        return im, im_info
+
+    def generate_scale(self, im):
+        """
+        Args:
+            im (np.ndarray): image (np.ndarray)
+        Returns:
+            im_scale_x: the resize ratio of X
+            im_scale_y: the resize ratio of Y
+        """
+        origin_shape = im.shape[:2]
+        im_c = im.shape[2]
+        if self.keep_ratio:
+            im_size_min = np.min(origin_shape)
+            im_size_max = np.max(origin_shape)
+            target_size_min = np.min(self.target_size)
+            target_size_max = np.max(self.target_size)
+            im_scale = float(target_size_min) / float(im_size_min)
+            if np.round(im_scale * im_size_max) > target_size_max:
+                im_scale = float(target_size_max) / float(im_size_max)
+            im_scale_x = im_scale
+            im_scale_y = im_scale
+        else:
+            resize_h, resize_w = self.target_size
+            im_scale_y = resize_h / float(origin_shape[0])
+            im_scale_x = resize_w / float(origin_shape[1])
+        return im_scale_y, im_scale_x
+
+
+class NormalizeImage(object):
+    """normalize image
+    Args:
+        mean (list): im - mean
+        std (list): im / std
+        is_scale (bool): whether need im / 255
+        is_channel_first (bool): if True: image shape is CHW, else: HWC
+    """
+
+    def __init__(self, mean, std, is_scale=True):
+        self.mean = mean
+        self.std = std
+        self.is_scale = is_scale
+
+    def __call__(self, im, im_info):
+        """
+        Args:
+            im (np.ndarray): image (np.ndarray)
+            im_info (dict): info of image
+        Returns:
+            im (np.ndarray):  processed image (np.ndarray)
+            im_info (dict): info of processed image
+        """
+        im = im.astype(np.float32, copy=False)
+        mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+        std = np.array(self.std)[np.newaxis, np.newaxis, :]
+
+        if self.is_scale:
+            im = im / 255.0
+        im -= mean
+        im /= std
+        return im, im_info
+
+
+class Permute(object):
+    """permute image
+    Args:
+        to_bgr (bool): whether convert RGB to BGR 
+        channel_first (bool): whether convert HWC to CHW
+    """
+
+    def __init__(self, ):
+        super(Permute, self).__init__()
+
+    def __call__(self, im, im_info):
+        """
+        Args:
+            im (np.ndarray): image (np.ndarray)
+            im_info (dict): info of image
+        Returns:
+            im (np.ndarray):  processed image (np.ndarray)
+            im_info (dict): info of processed image
+        """
+        im = im.transpose((2, 0, 1)).copy()
+        return im, im_info
+
+
+class PadStride(object):
+    """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
+    Args:
+        stride (bool): model with FPN need image shape % stride == 0
+    """
+
+    def __init__(self, stride=0):
+        self.coarsest_stride = stride
+
+    def __call__(self, im, im_info):
+        """
+        Args:
+            im (np.ndarray): image (np.ndarray)
+            im_info (dict): info of image
+        Returns:
+            im (np.ndarray):  processed image (np.ndarray)
+            im_info (dict): info of processed image
+        """
+        coarsest_stride = self.coarsest_stride
+        if coarsest_stride <= 0:
+            return im, im_info
+        im_c, im_h, im_w = im.shape
+        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
+        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
+        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
+        padding_im[:, :im_h, :im_w] = im
+        return padding_im, im_info
+
+
+class LetterBoxResize(object):
+    def __init__(self, target_size):
+        """
+        Resize image to target size, convert normalized xywh to pixel xyxy
+        format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
+        Args:
+            target_size (int|list): image target size.
+        """
+        super(LetterBoxResize, self).__init__()
+        if isinstance(target_size, int):
+            target_size = [target_size, target_size]
+        self.target_size = target_size
+
+    def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
+        # letterbox: resize a rectangular image to a padded rectangular
+        shape = img.shape[:2]  # [height, width]
+        ratio_h = float(height) / shape[0]
+        ratio_w = float(width) / shape[1]
+        ratio = min(ratio_h, ratio_w)
+        new_shape = (round(shape[1] * ratio),
+                     round(shape[0] * ratio))  # [width, height]
+        padw = (width - new_shape[0]) / 2
+        padh = (height - new_shape[1]) / 2
+        top, bottom = round(padh - 0.1), round(padh + 0.1)
+        left, right = round(padw - 0.1), round(padw + 0.1)
+
+        img = cv2.resize(
+            img, new_shape, interpolation=cv2.INTER_AREA)  # resized, no border
+        img = cv2.copyMakeBorder(
+            img, top, bottom, left, right, cv2.BORDER_CONSTANT,
+            value=color)  # padded rectangular
+        return img, ratio, padw, padh
+
+    def __call__(self, im, im_info):
+        """
+        Args:
+            im (np.ndarray): image (np.ndarray)
+            im_info (dict): info of image
+        Returns:
+            im (np.ndarray):  processed image (np.ndarray)
+            im_info (dict): info of processed image
+        """
+        assert len(self.target_size) == 2
+        assert self.target_size[0] > 0 and self.target_size[1] > 0
+        height, width = self.target_size
+        h, w = im.shape[:2]
+        im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
+
+        new_shape = [round(h * ratio), round(w * ratio)]
+        im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
+        im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
+        return im, im_info
+
+
+def preprocess(im, preprocess_ops):
+    # process image by preprocess_ops
+    im_info = {
+        'scale_factor': np.array(
+            [1., 1.], dtype=np.float32),
+        'im_shape': None,
+    }
+    im, im_info = decode_image(im, im_info)
+    for operator in preprocess_ops:
+        im, im_info = operator(im, im_info)
+    return im, im_info
--- a/deploy/pptracking/python/utils.py
+++ b/deploy/pptracking/python/utils.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+import os
+import sys
+import ast
+import argparse
+
+
+def argsparser():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--model_dir",
+        type=str,
+        default=None,
+        help=("Directory include:'model.pdiparams', 'model.pdmodel', "
+              "'infer_cfg.yml', created by tools/export_model.py."),
+        required=True)
+    parser.add_argument(
+        "--image_file", type=str, default=None, help="Path of image file.")
+    parser.add_argument(
+        "--image_dir",
+        type=str,
+        default=None,
+        help="Dir of image file, `image_file` has a higher priority.")
+    parser.add_argument(
+        "--batch_size", type=int, default=1, help="batch_size for inference.")
+    parser.add_argument(
+        "--video_file",
+        type=str,
+        default=None,
+        help="Path of video file, `video_file` or `camera_id` has a highest priority."
+    )
+    parser.add_argument(
+        "--camera_id",
+        type=int,
+        default=-1,
+        help="device id of camera to predict.")
+    parser.add_argument(
+        "--threshold", type=float, default=0.5, help="Threshold of score.")
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="output",
+        help="Directory of output visualization files.")
+    parser.add_argument(
+        "--run_mode",
+        type=str,
+        default='fluid',
+        help="mode of running(fluid/trt_fp32/trt_fp16/trt_int8)")
+    parser.add_argument(
+        "--device",
+        type=str,
+        default='cpu',
+        help="Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU."
+    )
+    parser.add_argument(
+        "--run_benchmark",
+        type=ast.literal_eval,
+        default=False,
+        help="Whether to predict a image_file repeatedly for benchmark")
+    parser.add_argument(
+        "--enable_mkldnn",
+        type=ast.literal_eval,
+        default=False,
+        help="Whether use mkldnn with CPU.")
+    parser.add_argument(
+        "--cpu_threads", type=int, default=1, help="Num of threads with CPU.")
+    parser.add_argument(
+        "--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.")
+    parser.add_argument(
+        "--trt_max_shape",
+        type=int,
+        default=1280,
+        help="max_shape for TensorRT.")
+    parser.add_argument(
+        "--trt_opt_shape",
+        type=int,
+        default=640,
+        help="opt_shape for TensorRT.")
+    parser.add_argument(
+        "--trt_calib_mode",
+        type=bool,
+        default=False,
+        help="If the model is produced by TRT offline quantitative "
+        "calibration, trt_calib_mode need to set True.")
+    parser.add_argument(
+        '--save_images',
+        action='store_true',
+        help='Save visualization image results.')
+    parser.add_argument(
+        '--save_mot_txts',
+        action='store_true',
+        help='Save tracking results (txt).')
+    parser.add_argument(
+        '--scaled',
+        type=bool,
+        default=False,
+        help="Whether coords after detector outputs are scaled, False in JDE YOLOv3 "
+        "True in general detector.")
+    parser.add_argument(
+        "--reid_model_dir",
+        type=str,
+        default=None,
+        help=("Directory include:'model.pdiparams', 'model.pdmodel', "
+              "'infer_cfg.yml', created by tools/export_model.py."))
+    parser.add_argument(
+        "--reid_batch_size",
+        type=int,
+        default=50,
+        help="max batch_size for reid model inference.")
+    parser.add_argument(
+        "--do_entrance_counting",
+        action='store_true',
+        help="Whether counting the numbers of identifiers entering "
+        "or getting out from the entrance. Note that only support one-class"
+        "counting, multi-class counting is coming soon.")
+    parser.add_argument(
+        "--secs_interval",
+        type=int,
+        default=2,
+        help="The seconds interval to count after tracking")
+    parser.add_argument(
+        "--draw_center_traj",
+        action='store_true',
+        help="Whether drawing the trajectory of center")
+    parser.add_argument(
+        "--mtmct_dir",
+        type=str,
+        default=None,
+        help="The MTMCT scene video folder.")
+    parser.add_argument(
+        "--mtmct_cfg", type=str, default=None, help="The MTMCT config.")
+    return parser
+
+
+class Times(object):
+    def __init__(self):
+        self.time = 0.
+        # start time
+        self.st = 0.
+        # end time
+        self.et = 0.
+
+    def start(self):
+        self.st = time.time()
+
+    def end(self, repeats=1, accumulative=True):
+        self.et = time.time()
+        if accumulative:
+            self.time += (self.et - self.st) / repeats
+        else:
+            self.time = (self.et - self.st) / repeats
+
+    def reset(self):
+        self.time = 0.
+        self.st = 0.
+        self.et = 0.
+
+    def value(self):
+        return round(self.time, 4)
+
+
+class Timer(Times):
+    def __init__(self):
+        super(Timer, self).__init__()
+        self.preprocess_time_s = Times()
+        self.inference_time_s = Times()
+        self.postprocess_time_s = Times()
+        self.img_num = 0
+
+    def info(self, average=False):
+        total_time = self.preprocess_time_s.value(
+        ) + self.inference_time_s.value() + self.postprocess_time_s.value()
+        total_time = round(total_time, 4)
+        print("------------------ Inference Time Info ----------------------")
+        print("total_time(ms): {}, img_num: {}".format(total_time * 1000,
+                                                       self.img_num))
+        preprocess_time = round(
+            self.preprocess_time_s.value() / max(1, self.img_num),
+            4) if average else self.preprocess_time_s.value()
+        postprocess_time = round(
+            self.postprocess_time_s.value() / max(1, self.img_num),
+            4) if average else self.postprocess_time_s.value()
+        inference_time = round(self.inference_time_s.value() /
+                               max(1, self.img_num),
+                               4) if average else self.inference_time_s.value()
+
+        average_latency = total_time / max(1, self.img_num)
+        qps = 0
+        if total_time > 0:
+            qps = 1 / average_latency
+        print("average latency time(ms): {:.2f}, QPS: {:2f}".format(
+            average_latency * 1000, qps))
+        print(
+            "preprocess_time(ms): {:.2f}, inference_time(ms): {:.2f}, postprocess_time(ms): {:.2f}".
+            format(preprocess_time * 1000, inference_time * 1000,
+                   postprocess_time * 1000))
+
+    def report(self, average=False):
+        dic = {}
+        dic['preprocess_time_s'] = round(
+            self.preprocess_time_s.value() / max(1, self.img_num),
+            4) if average else self.preprocess_time_s.value()
+        dic['postprocess_time_s'] = round(
+            self.postprocess_time_s.value() / max(1, self.img_num),
+            4) if average else self.postprocess_time_s.value()
+        dic['inference_time_s'] = round(
+            self.inference_time_s.value() / max(1, self.img_num),
+            4) if average else self.inference_time_s.value()
+        dic['img_num'] = self.img_num
+        total_time = self.preprocess_time_s.value(
+        ) + self.inference_time_s.value() + self.postprocess_time_s.value()
+        dic['total_time_s'] = round(total_time, 4)
+        return dic
+
+
+def get_current_memory_mb():
+    """
+    It is used to Obtain the memory usage of the CPU and GPU during the running of the program.
+    And this function Current program is time-consuming.
+    """
+    import pynvml
+    import psutil
+    import GPUtil
+    gpu_id = int(os.environ.get('CUDA_VISIBLE_DEVICES', 0))
+
+    pid = os.getpid()
+    p = psutil.Process(pid)
+    info = p.memory_full_info()
+    cpu_mem = info.uss / 1024. / 1024.
+    gpu_mem = 0
+    gpu_percent = 0
+    gpus = GPUtil.getGPUs()
+    if gpu_id is not None and len(gpus) > 0:
+        gpu_percent = gpus[gpu_id].load
+        pynvml.nvmlInit()
+        handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+        meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
+        gpu_mem = meminfo.used / 1024. / 1024.
+    return round(cpu_mem, 4), round(gpu_mem, 4), round(gpu_percent, 4)
+
+
+def video2frames(video_path, outpath, frame_rate=25, **kargs):
+    def _dict2str(kargs):
+        cmd_str = ''
+        for k, v in kargs.items():
+            cmd_str += (' ' + str(k) + ' ' + str(v))
+        return cmd_str
+
+    ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
+    vid_name = os.path.basename(video_path).split('.')[0]
+    out_full_path = os.path.join(outpath, vid_name)
+
+    if not os.path.exists(out_full_path):
+        os.makedirs(out_full_path)
+
+    # video file name
+    outformat = os.path.join(out_full_path, '%05d.jpg')
+
+    cmd = ffmpeg
+    cmd = ffmpeg + [
+        ' -i ', video_path, ' -r ', str(frame_rate), ' -f image2 ', outformat
+    ]
+    cmd = ''.join(cmd) + _dict2str(kargs)
+
+    if os.system(cmd) != 0:
+        raise RuntimeError('ffmpeg process video: {} error'.format(video_path))
+        sys.exit(-1)
+
+    sys.stdout.flush()
+    return out_full_path
+
+
+def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', '.flv')):
+    return f.lower().endswith(extensions)
--- a/deploy/pptracking/python/visualize.py
+++ b/deploy/pptracking/python/visualize.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+
+import os
+import cv2
+import numpy as np
+from PIL import Image, ImageDraw
+from collections import deque
+
+
+def visualize_box_mask(im, results, labels, threshold=0.5):
+    """
+    Args:
+        im (str/np.ndarray): path of image/np.ndarray read by cv2
+        results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
+                        matix element:[class, score, x_min, y_min, x_max, y_max]
+        labels (list): labels:['class1', ..., 'classn']
+        threshold (float): Threshold of score.
+    Returns:
+        im (PIL.Image.Image): visualized image
+    """
+    if isinstance(im, str):
+        im = Image.open(im).convert('RGB')
+    else:
+        im = Image.fromarray(im)
+    if 'boxes' in results and len(results['boxes']) > 0:
+        im = draw_box(im, results['boxes'], labels, threshold=threshold)
+    return im
+
+
+def get_color_map_list(num_classes):
+    """
+    Args:
+        num_classes (int): number of class
+    Returns:
+        color_map (list): RGB color list
+    """
+    color_map = num_classes * [0, 0, 0]
+    for i in range(0, num_classes):
+        j = 0
+        lab = i
+        while lab:
+            color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
+            color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
+            color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
+            j += 1
+            lab >>= 3
+    color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
+    return color_map
+
+
+def draw_box(im, np_boxes, labels, threshold=0.5):
+    """
+    Args:
+        im (PIL.Image.Image): PIL image
+        np_boxes (np.ndarray): shape:[N,6], N: number of box,
+                               matix element:[class, score, x_min, y_min, x_max, y_max]
+        labels (list): labels:['class1', ..., 'classn']
+        threshold (float): threshold of box
+    Returns:
+        im (PIL.Image.Image): visualized image
+    """
+    draw_thickness = min(im.size) // 320
+    draw = ImageDraw.Draw(im)
+    clsid2color = {}
+    color_list = get_color_map_list(len(labels))
+    expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1)
+    np_boxes = np_boxes[expect_boxes, :]
+
+    for dt in np_boxes:
+        clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
+        if clsid not in clsid2color:
+            clsid2color[clsid] = color_list[clsid]
+        color = tuple(clsid2color[clsid])
+
+        if len(bbox) == 4:
+            xmin, ymin, xmax, ymax = bbox
+            print('class_id:{:d}, confidence:{:.4f}, left_top:[{:.2f},{:.2f}],'
+                  'right_bottom:[{:.2f},{:.2f}]'.format(
+                      int(clsid), score, xmin, ymin, xmax, ymax))
+            # draw bbox
+            draw.line(
+                [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
+                 (xmin, ymin)],
+                width=draw_thickness,
+                fill=color)
+        elif len(bbox) == 8:
+            x1, y1, x2, y2, x3, y3, x4, y4 = bbox
+            draw.line(
+                [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
+                width=2,
+                fill=color)
+            xmin = min(x1, x2, x3, x4)
+            ymin = min(y1, y2, y3, y4)
+
+        # draw label
+        text = "{} {:.4f}".format(labels[clsid], score)
+        tw, th = draw.textsize(text)
+        draw.rectangle(
+            [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
+        draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
+    return im
+
+
+def get_color(idx):
+    idx = idx * 3
+    color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
+    return color
+
+
+def plot_tracking(image,
+                  tlwhs,
+                  obj_ids,
+                  scores=None,
+                  frame_id=0,
+                  fps=0.,
+                  ids2names=[],
+                  do_entrance_counting=False,
+                  entrance=None):
+    im = np.ascontiguousarray(np.copy(image))
+    im_h, im_w = im.shape[:2]
+
+    text_scale = max(1, image.shape[1] / 1600.)
+    text_thickness = 2
+    line_thickness = max(1, int(image.shape[1] / 500.))
+
+    cv2.putText(
+        im,
+        'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
+        (0, int(15 * text_scale)),
+        cv2.FONT_HERSHEY_PLAIN,
+        text_scale, (0, 0, 255),
+        thickness=2)
+
+    for i, tlwh in enumerate(tlwhs):
+        x1, y1, w, h = tlwh
+        intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
+        obj_id = int(obj_ids[i])
+        id_text = '{}'.format(int(obj_id))
+        if ids2names != []:
+            assert len(
+                ids2names) == 1, "plot_tracking only supports single classes."
+            id_text = '{}_'.format(ids2names[0]) + id_text
+        _line_thickness = 1 if obj_id <= 0 else line_thickness
+        color = get_color(abs(obj_id))
+        cv2.rectangle(
+            im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness)
+        cv2.putText(
+            im,
+            id_text, (intbox[0], intbox[1] - 10),
+            cv2.FONT_HERSHEY_PLAIN,
+            text_scale, (0, 0, 255),
+            thickness=text_thickness)
+
+        if scores is not None:
+            text = '{:.2f}'.format(float(scores[i]))
+            cv2.putText(
+                im,
+                text, (intbox[0], intbox[1] + 10),
+                cv2.FONT_HERSHEY_PLAIN,
+                text_scale, (0, 255, 255),
+                thickness=text_thickness)
+
+    if do_entrance_counting:
+        entrance_line = tuple(map(int, entrance))
+        cv2.rectangle(
+            im,
+            entrance_line[0:2],
+            entrance_line[2:4],
+            color=(0, 255, 255),
+            thickness=line_thickness)
+    return im
+
+
+def plot_tracking_dict(image,
+                       num_classes,
+                       tlwhs_dict,
+                       obj_ids_dict,
+                       scores_dict,
+                       frame_id=0,
+                       fps=0.,
+                       ids2names=[],
+                       do_entrance_counting=False,
+                       entrance=None,
+                       records=None,
+                       center_traj=None):
+    im = np.ascontiguousarray(np.copy(image))
+    im_h, im_w = im.shape[:2]
+
+    text_scale = max(1, image.shape[1] / 1600.)
+    text_thickness = 2
+    line_thickness = max(1, int(image.shape[1] / 500.))
+
+    if num_classes == 1:
+        start = records[-1].find('Total')
+        end = records[-1].find('In')
+        cv2.putText(
+            im,
+            records[-1][start:end], (0, int(40 * text_scale)),
+            cv2.FONT_HERSHEY_PLAIN,
+            text_scale, (0, 0, 255),
+            thickness=2)
+
+    if num_classes == 1 and do_entrance_counting:
+        entrance_line = tuple(map(int, entrance))
+        cv2.rectangle(
+            im,
+            entrance_line[0:2],
+            entrance_line[2:4],
+            color=(0, 255, 255),
+            thickness=line_thickness)
+        # find start location for entrance counting data
+        start = records[-1].find('In')
+        cv2.putText(
+            im,
+            records[-1][start:-1], (0, int(60 * text_scale)),
+            cv2.FONT_HERSHEY_PLAIN,
+            text_scale, (0, 0, 255),
+            thickness=2)
+
+    for cls_id in range(num_classes):
+        tlwhs = tlwhs_dict[cls_id]
+        obj_ids = obj_ids_dict[cls_id]
+        scores = scores_dict[cls_id]
+        cv2.putText(
+            im,
+            'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)),
+            (0, int(15 * text_scale)),
+            cv2.FONT_HERSHEY_PLAIN,
+            text_scale, (0, 0, 255),
+            thickness=2)
+
+        record_id = set()
+        for i, tlwh in enumerate(tlwhs):
+            x1, y1, w, h = tlwh
+            intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
+            center = tuple(map(int, (x1 + w / 2., y1 + h / 2.)))
+            obj_id = int(obj_ids[i])
+            if center_traj is not None:
+                record_id.add(obj_id)
+                if obj_id not in center_traj[cls_id]:
+                    center_traj[cls_id][obj_id] = deque(maxlen=30)
+                center_traj[cls_id][obj_id].append(center)
+
+            id_text = '{}'.format(int(obj_id))
+            if ids2names != []:
+                id_text = '{}_{}'.format(ids2names[cls_id], id_text)
+            else:
+                id_text = 'class{}_{}'.format(cls_id, id_text)
+
+            _line_thickness = 1 if obj_id <= 0 else line_thickness
+            color = get_color(abs(obj_id))
+            cv2.rectangle(
+                im,
+                intbox[0:2],
+                intbox[2:4],
+                color=color,
+                thickness=line_thickness)
+            cv2.putText(
+                im,
+                id_text, (intbox[0], intbox[1] - 10),
+                cv2.FONT_HERSHEY_PLAIN,
+                text_scale, (0, 0, 255),
+                thickness=text_thickness)
+
+            if scores is not None:
+                text = '{:.2f}'.format(float(scores[i]))
+                cv2.putText(
+                    im,
+                    text, (intbox[0], intbox[1] + 10),
+                    cv2.FONT_HERSHEY_PLAIN,
+                    text_scale, (0, 255, 255),
+                    thickness=text_thickness)
+        if center_traj is not None:
+            for traj in center_traj:
+                for i in traj.keys():
+                    if i not in record_id:
+                        continue
+                    for point in traj[i]:
+                        cv2.circle(im, point, 3, (0, 0, 255), -1)
+    return im