diff --git a/deploy/fastdeploy/README.md b/deploy/fastdeploy/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..10979ffc6b548425f1767de8a8bd294267823c08
--- /dev/null
+++ b/deploy/fastdeploy/README.md
@@ -0,0 +1,93 @@
+# PaddleDetection高性能全场景模型部署方案—FastDeploy
+
+## 目录
+- [FastDeploy介绍](#FastDeploy介绍)
+- [PaddleDetection模型部署](#PaddleDetection模型部署)
+- [常见问题](#常见问题)
+
+## 1. FastDeploy介绍
+
+
+**[⚡️FastDeploy](https://github.com/PaddlePaddle/FastDeploy)**是一款**全场景**、**易用灵活**、**极致高效**的AI推理部署工具,支持**云边端**部署。使用FastDeploy可以简单高效的在X86 CPU、NVIDIA GPU、飞腾CPU、ARM CPU、Intel GPU、昆仑、昇腾、瑞芯微、晶晨、算能等10+款硬件上对PaddleDetection模型进行快速部署,并且支持Paddle Inference、Paddle Lite、TensorRT、OpenVINO、ONNXRuntime、RKNPU2、SOPHGO等多种推理后端。
+
+
+
+

+
+
+
+## 2. PaddleDetection模型部署
+
+
+### 2.1 硬件支持列表
+
+|硬件类型|该硬件是否支持|使用指南|Python|C++|
+|:---:|:---:|:---:|:---:|:---:|
+|X86 CPU|✅|[链接](./cpu-gpu)|✅|✅|
+|NVIDIA GPU|✅|[链接](./cpu-gpu)|✅|✅|
+|飞腾CPU|✅|[链接](./cpu-gpu)|✅|✅|
+|ARM CPU|✅|[链接](./cpu-gpu)|✅|✅|
+|Intel GPU(集成显卡)|✅|[链接](./cpu-gpu)|✅|✅|
+|Intel GPU(独立显卡)|✅|[链接](./cpu-gpu)|✅|✅|
+|昆仑|✅|[链接](./kunlunxin)|✅|✅|
+|昇腾|✅|[链接](./ascend)|✅|✅|
+|瑞芯微|✅|[链接](./rockchip)|✅|✅|
+|晶晨|✅|[链接](./amlogic)|-|✅|✅|
+|算能|✅|[链接](./sophgo)|✅|✅|
+
+### 2.2. 详细使用文档
+- X86 CPU
+ - [部署模型准备](./cpu-gpu)
+ - [Python部署示例](./cpu-gpu/python/)
+ - [C++部署示例](./cpu-gpu/cpp/)
+- NVIDIA GPU
+ - [部署模型准备](./cpu-gpu)
+ - [Python部署示例](./cpu-gpu/python/)
+ - [C++部署示例](./cpu-gpu/cpp/)
+- 飞腾CPU
+ - [部署模型准备](./cpu-gpu)
+ - [Python部署示例](./cpu-gpu/python/)
+ - [C++部署示例](./cpu-gpu/cpp/)
+- ARM CPU
+ - [部署模型准备](./cpu-gpu)
+ - [Python部署示例](./cpu-gpu/python/)
+ - [C++部署示例](./cpu-gpu/cpp/)
+- Intel GPU
+ - [部署模型准备](./cpu-gpu)
+ - [Python部署示例](./cpu-gpu/python/)
+ - [C++部署示例](./cpu-gpu/cpp/)
+- 昆仑 XPU
+ - [部署模型准备](./kunlunxin)
+ - [Python部署示例](./kunlunxin/python/)
+ - [C++部署示例](./kunlunxin/cpp/)
+- 昇腾 Ascend
+ - [部署模型准备](./ascend)
+ - [Python部署示例](./ascend/python/)
+ - [C++部署示例](./ascend/cpp/)
+- 瑞芯微 Rockchip
+ - [部署模型准备](./rockchip/)
+ - [Python部署示例](./rockchip/rknpu2/)
+ - [C++部署示例](./rockchip/rknpu2/)
+- 晶晨 Amlogic
+ - [部署模型准备](./amlogic/a311d/)
+ - [C++部署示例](./amlogic/a311d/cpp/)
+- 算能 Sophgo
+ - [部署模型准备](./sophgo/)
+ - [Python部署示例](./sophgo/python/)
+ - [C++部署示例](./sophgo/cpp/)
+
+### 2.3 更多部署方式
+
+- [Android ARM CPU部署](https://github.com/PaddlePaddle/FastDeploy/tree/develop/java/android#Detection)
+- [服务化Serving部署](./serving)
+- [web部署](./web)
+- [模型自动化压缩工具](./quantize)
+
+
+## 3. 常见问题
+
+
+遇到问题可查看常见问题集合,搜索FastDeploy issue,*或给FastDeploy提交[issue](https://github.com/PaddlePaddle/FastDeploy/issues)*:
+
+[常见问题集合](https://github.com/PaddlePaddle/FastDeploy/tree/develop/docs/cn/faq)
+[FastDeploy issues](https://github.com/PaddlePaddle/FastDeploy/issues)
diff --git a/deploy/fastdeploy/amlogic/a311d/README.md b/deploy/fastdeploy/amlogic/a311d/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4af0750220a9ef039d240f7549b70e483bcb3158
--- /dev/null
+++ b/deploy/fastdeploy/amlogic/a311d/README.md
@@ -0,0 +1,20 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection 检测模型在晶晨NPU上的部署方案部署方案—FastDeploy
+
+目前 FastDeploy 已经支持基于 Paddle Lite 部署 PP-YOLOE 量化模型到 A311D 上。
+
+## 1. 说明
+
+晶晨A311D是一款先进的AI应用处理器。PaddleDetection支持通过FastDeploy在A311D上基于Paddle-Lite部署相关检测模型。**注意**:需要注意的是,芯原(verisilicon)作为 IP 设计厂商,本身并不提供实体SoC产品,而是授权其 IP 给芯片厂商,如:晶晨(Amlogic),瑞芯微(Rockchip)等。因此本文是适用于被芯原授权了 NPU IP 的芯片产品。只要芯片产品没有大副修改芯原的底层库,则该芯片就可以使用本文档作为 Paddle Lite 推理部署的参考和教程。在本文中,晶晨 SoC 中的 NPU 和 瑞芯微 SoC 中的 NPU 统称为芯原 NPU。目前支持如下芯片的部署:
+- Amlogic A311D
+- Amlogic C308X
+- Amlogic S905D3
+
+模型的量化和量化模型的下载请参考:[模型量化](../quantize/README.md)
+
+## 2. 详细的部署示例
+
+在 A311D 上只支持 C++ 的部署。
+
+- [C++部署](cpp)
\ No newline at end of file
diff --git a/deploy/fastdeploy/amlogic/a311d/cpp/CMakeLists.txt b/deploy/fastdeploy/amlogic/a311d/cpp/CMakeLists.txt
new file mode 100755
index 0000000000000000000000000000000000000000..af493f6b67d2135a94c06590248bd6f28d364a54
--- /dev/null
+++ b/deploy/fastdeploy/amlogic/a311d/cpp/CMakeLists.txt
@@ -0,0 +1,27 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+include_directories(${FastDeploy_INCLUDE_DIRS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+
+set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install)
+
+install(TARGETS infer_demo DESTINATION ./)
+
+install(DIRECTORY models DESTINATION ./)
+install(DIRECTORY images DESTINATION ./)
+
+file(GLOB_RECURSE FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/lib*.so*)
+file(GLOB_RECURSE ALL_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/lib*.so*)
+list(APPEND ALL_LIBS ${FASTDEPLOY_LIBS})
+install(PROGRAMS ${ALL_LIBS} DESTINATION lib)
+
+file(GLOB ADB_TOOLS run_with_adb.sh)
+install(PROGRAMS ${ADB_TOOLS} DESTINATION ./)
diff --git a/deploy/fastdeploy/amlogic/a311d/cpp/README.md b/deploy/fastdeploy/amlogic/a311d/cpp/README.md
new file mode 100755
index 0000000000000000000000000000000000000000..830c47e6d6d04455da17941c62f656bf562124aa
--- /dev/null
+++ b/deploy/fastdeploy/amlogic/a311d/cpp/README.md
@@ -0,0 +1,77 @@
+[English](README.md) | 简体中文
+# PaddleDetection A311D 量化模型 C++ 部署示例
+
+本目录下提供的 `infer.cc`,可以帮助用户快速完成 PP-YOLOE 量化模型在 A311D 上的部署推理加速。
+
+## 1. 部署环境准备
+软硬件环境满足要求,以及交叉编译环境的准备,请参考:[FastDeploy 晶晨 A311d 编译文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 2. 部署模型准备
+1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。
+2. 用户可以先使用 PaddleDetection 自行导出 Float32 模型,注意导出模型模型时设置参数:use_shared_conv=False,更多细节请参考:[PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe)
+3. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的检测模型仍然需要FP32模型文件夹下的 infer_cfg.yml 文件,自行量化的模型文件夹内不包含此 yaml 文件,用户从 FP32 模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。)
+4. 模型需要异构计算,异构计算文件可以参考:[异构计算](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md),由于 FastDeploy 已经提供了模型,可以先测试我们提供的异构文件,验证精度是否符合要求。
+
+更多量化相关相关信息可查阅[模型量化](../../../quantize/README.md)
+
+## 3. 在 A311D 上部署量化后的 PP-YOLOE 检测模型
+请按照以下步骤完成在 A311D 上部署 PP-YOLOE 量化模型:
+
+1. 交叉编译编译 FastDeploy 库,具体请参考:[交叉编译 FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/a311d.md)
+
+2. 将编译后的库拷贝到当前目录,可使用如下命令:
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/amlogic/a311d/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+cp -r FastDeploy/build/fastdeploy-timvx/ PaddleDetection/deploy/fastdeploy/amlogic/a311d/cpp
+```
+
+3. 在当前路径下载部署所需的模型和示例图片:
+```bash
+cd PaddleDetection/deploy/fastdeploy/amlogic/a311d/cpp
+mkdir models && mkdir images
+wget https://bj.bcebos.com/fastdeploy/models/ppyoloe_noshare_qat.tar.gz
+tar -xvf ppyoloe_noshare_qat.tar.gz
+cp -r ppyoloe_noshare_qat models
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+cp -r 000000014439.jpg images
+```
+
+4. 编译部署示例,可使入如下命令:
+```bash
+cd PaddleDetection/deploy/fastdeploy/amlogic/a311d/cpp
+mkdir build && cd build
+cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/../fastdeploy-timvx/toolchain.cmake -DFASTDEPLOY_INSTALL_DIR=${PWD}/../fastdeploy-timvx -DTARGET_ABI=arm64 ..
+make -j8
+make install
+# 成功编译之后,会生成 install 文件夹,里面有一个运行 demo 和部署所需的库
+```
+
+5. 基于 adb 工具部署 PP-YOLOE 检测模型到晶晨 A311D
+```bash
+# 进入 install 目录
+cd PaddleDetection/deploy/fastdeploy/amlogic/a311d/cpp/build/install/
+# 如下命令表示:bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
+bash run_with_adb.sh infer_demo ppyoloe_noshare_qat 000000014439.jpg $DEVICE_ID
+```
+
+部署成功后运行结果如下:
+
+
+
+需要特别注意的是,在 A311D 上部署的模型需要是量化后的模型,模型的量化请参考:[模型量化](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/quantize.md)
+
+## 4. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [Python部署](../python)
+
+## 5. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/amlogic/a311d/cpp/infer.cc b/deploy/fastdeploy/amlogic/a311d/cpp/infer.cc
new file mode 100755
index 0000000000000000000000000000000000000000..c7b81f9f98e555ad493afb3d7291b3e85b9bc17a
--- /dev/null
+++ b/deploy/fastdeploy/amlogic/a311d/cpp/infer.cc
@@ -0,0 +1,65 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+ auto subgraph_file = model_dir + sep + "subgraph.txt";
+ fastdeploy::vision::EnableFlyCV();
+ fastdeploy::RuntimeOption option;
+ option.UseTimVX();
+ option.SetLiteSubgraphPartitionPath(subgraph_file);
+
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ assert(model.Initialized());
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout << "Usage: infer_demo path/to/quant_model "
+ "path/to/image "
+ "e.g ./infer_demo ./PPYOLOE_L_quant ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+
+ std::string model_dir = argv[1];
+ std::string test_image = argv[2];
+ InitAndInfer(model_dir, test_image);
+ return 0;
+}
diff --git a/deploy/fastdeploy/amlogic/a311d/cpp/run_with_adb.sh b/deploy/fastdeploy/amlogic/a311d/cpp/run_with_adb.sh
new file mode 100755
index 0000000000000000000000000000000000000000..dd7d7b47d2c12cfc4cf462a674531546cdeac173
--- /dev/null
+++ b/deploy/fastdeploy/amlogic/a311d/cpp/run_with_adb.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+HOST_SPACE=${PWD}
+echo ${HOST_SPACE}
+WORK_SPACE=/data/local/tmp/test
+
+# The first parameter represents the demo name
+DEMO_NAME=image_classification_demo
+if [ -n "$1" ]; then
+ DEMO_NAME=$1
+fi
+
+# The second parameter represents the model name
+MODEL_NAME=mobilenet_v1_fp32_224
+if [ -n "$2" ]; then
+ MODEL_NAME=$2
+fi
+
+# The third parameter indicates the name of the image to be tested
+IMAGE_NAME=0001.jpg
+if [ -n "$3" ]; then
+ IMAGE_NAME=$3
+fi
+
+# The fourth parameter represents the ID of the device
+ADB_DEVICE_NAME=
+if [ -n "$4" ]; then
+ ADB_DEVICE_NAME="-s $4"
+fi
+
+# Set the environment variables required during the running process
+EXPORT_ENVIRONMENT_VARIABLES="export GLOG_v=5; export SUBGRAPH_ONLINE_MODE=true; export RKNPU_LOGLEVEL=5; export RKNN_LOG_LEVEL=5; ulimit -c unlimited; export VIV_VX_ENABLE_GRAPH_TRANSFORM=-pcq:1; export VIV_VX_SET_PER_CHANNEL_ENTROPY=100; export TIMVX_BATCHNORM_FUSION_MAX_ALLOWED_QUANT_SCALE_DEVIATION=300000; export VSI_NN_LOG_LEVEL=5;"
+
+EXPORT_ENVIRONMENT_VARIABLES="${EXPORT_ENVIRONMENT_VARIABLES}export LD_LIBRARY_PATH=${WORK_SPACE}/lib:\$LD_LIBRARY_PATH;"
+
+# Please install adb, and DON'T run this in the docker.
+set -e
+adb $ADB_DEVICE_NAME shell "rm -rf $WORK_SPACE"
+adb $ADB_DEVICE_NAME shell "mkdir -p $WORK_SPACE"
+
+# Upload the demo, librarys, model and test images to the device
+adb $ADB_DEVICE_NAME push ${HOST_SPACE}/lib $WORK_SPACE
+adb $ADB_DEVICE_NAME push ${HOST_SPACE}/${DEMO_NAME} $WORK_SPACE
+adb $ADB_DEVICE_NAME push models $WORK_SPACE
+adb $ADB_DEVICE_NAME push images $WORK_SPACE
+
+# Execute the deployment demo
+adb $ADB_DEVICE_NAME shell "cd $WORK_SPACE; ${EXPORT_ENVIRONMENT_VARIABLES} chmod +x ./${DEMO_NAME}; ./${DEMO_NAME} ./models/${MODEL_NAME} ./images/$IMAGE_NAME"
diff --git a/deploy/fastdeploy/ascend/README.md b/deploy/fastdeploy/ascend/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b3874224d809b14829ded08b41d02881f0607623
--- /dev/null
+++ b/deploy/fastdeploy/ascend/README.md
@@ -0,0 +1,87 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection检测模型在华为昇腾上的部署方案—FastDeploy
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在华为昇腾上快速部署检测模型
+
+## 2. 使用预导出的模型列表
+为了方便开发者的测试,下面提供了PaddleDetection导出的各系列模型,开发者可直接下载使用。其中精度指标来源于PaddleDetection中对各模型的介绍,详情各参考PaddleDetection中的说明。
+
+| 模型 | 参数大小 | 精度 | 备注 |
+|:---------------------------------------------------------------- |:----- |:----- | :------ |
+| [picodet_l_320_coco_lcnet](https://bj.bcebos.com/paddlehub/fastdeploy/picodet_l_320_coco_lcnet.tgz) |23MB | Box AP 42.6% |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz) |200MB | Box AP 51.4% |
+| [ppyoloe_plus_crn_m_80e_coco](https://bj.bcebos.com/fastdeploy/models/ppyoloe_plus_crn_m_80e_coco.tgz) |83.3MB | Box AP 49.8% |
+| [ppyolo_r50vd_dcn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolo_r50vd_dcn_1x_coco.tgz) | 180MB | Box AP 44.8% | 暂不支持TensorRT |
+| [ppyolov2_r101vd_dcn_365e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolov2_r101vd_dcn_365e_coco.tgz) | 282MB | Box AP 49.7% | 暂不支持TensorRT |
+| [yolov3_darknet53_270e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov3_darknet53_270e_coco.tgz) |237MB | Box AP 39.1% | |
+| [yolox_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolox_s_300e_coco.tgz) | 35MB | Box AP 40.4% | |
+| [faster_rcnn_r50_vd_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_r50_vd_fpn_2x_coco.tgz) | 160MB | Box AP 40.8%| 暂不支持TensorRT |
+| [mask_rcnn_r50_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/mask_rcnn_r50_1x_coco.tgz) | 128M | Box AP 37.4%, Mask AP 32.8%| 暂不支持TensorRT、ORT |
+| [ssd_mobilenet_v1_300_120e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_mobilenet_v1_300_120e_voc.tgz) | 24.9M | Box AP 73.8%| 暂不支持TensorRT、ORT |
+| [ssd_vgg16_300_240e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_vgg16_300_240e_voc.tgz) | 106.5M | Box AP 77.8%| 暂不支持TensorRT、ORT |
+| [ssdlite_mobilenet_v1_300_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ssdlite_mobilenet_v1_300_coco.tgz) | 29.1M | | 暂不支持TensorRT、ORT |
+| [rtmdet_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_l_300e_coco.tgz) | 224M | Box AP 51.2%| |
+| [rtmdet_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_s_300e_coco.tgz) | 42M | Box AP 44.5%| |
+| [yolov5_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_l_300e_coco.tgz) | 183M | Box AP 48.9%| |
+| [yolov5_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_s_300e_coco.tgz) | 31M | Box AP 37.6%| |
+| [yolov6_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_l_300e_coco.tgz) | 229M | Box AP 51.0%| |
+| [yolov6_s_400e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_s_400e_coco.tgz) | 68M | Box AP 43.4%| |
+| [yolov7_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_l_300e_coco.tgz) | 145M | Box AP 51.0%| |
+| [yolov7_x_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_x_300e_coco.tgz) | 277M | Box AP 53.0%| |
+| [cascade_rcnn_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_fpn_1x_coco.tgz) | 271M | Box AP 41.1%| 暂不支持TensorRT、ORT |
+| [cascade_rcnn_r50_vd_fpn_ssld_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.tgz) | 271M | Box AP 45.0%| 暂不支持TensorRT、ORT |
+| [faster_rcnn_enhance_3x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_enhance_3x_coco.tgz) | 119M | Box AP 41.5%| 暂不支持TensorRT、ORT |
+| [fcos_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/fcos_r50_fpn_1x_coco.tgz) | 129M | Box AP 39.6%| 暂不支持TensorRT |
+| [gfl_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/gfl_r50_fpn_1x_coco.tgz) | 128M | Box AP 41.0%| 暂不支持TensorRT |
+| [ppyoloe_crn_l_80e_sliced_visdrone_640_025](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_80e_sliced_visdrone_640_025.tgz) | 200M | Box AP 31.9%| |
+| [retinanet_r101_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r101_fpn_2x_coco.tgz) | 210M | Box AP 40.6%| 暂不支持TensorRT、ORT |
+| [retinanet_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r50_fpn_1x_coco.tgz) | 136M | Box AP 37.5%| 暂不支持TensorRT、ORT |
+| [tood_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/tood_r50_fpn_1x_coco.tgz) | 130M | Box AP 42.5%| 暂不支持TensorRT、ORT |
+| [ttfnet_darknet53_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ttfnet_darknet53_1x_coco.tgz) | 178M | Box AP 33.5%| 暂不支持TensorRT、ORT |
+| [yolov8_x_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_x_500e_coco.tgz) | 265M | Box AP 53.8%
+| [yolov8_l_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_l_500e_coco.tgz) | 173M | Box AP 52.8%
+| [yolov8_m_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_m_500e_coco.tgz) | 99M | Box AP 50.2%
+| [yolov8_s_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_s_500e_coco.tgz) | 43M | Box AP 44.9%
+| [yolov8_n_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_n_500e_coco.tgz) | 13M | Box AP 37.3%
+
+
+## 3. 自行导出PaddleDetection部署模型
+### 3.1 模型版本
+支持[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)大于等于2.4版本的PaddleDetection模型部署。目前FastDeploy测试过成功部署的模型:
+
+- [PP-YOLOE(含PP-YOLOE+)系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
+- [PicoDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/picodet)
+- [PP-YOLO系列模型(含v2)](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyolo)
+- [YOLOv3系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolov3)
+- [YOLOX系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolox)
+- [FasterRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/faster_rcnn)
+- [MaskRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/mask_rcnn)
+- [SSD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ssd)
+- [YOLOv5系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov5)
+- [YOLOv6系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov6)
+- [YOLOv7系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov7)
+- [YOLOv8系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov8)
+- [RTMDet系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/rtmdet)
+- [CascadeRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/cascade_rcnn)
+- [PSSDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rcnn_enhance)
+- [RetinaNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/retinanet)
+- [PPYOLOESOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/smalldet)
+- [FCOS系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/fcos)
+- [TTFNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ttfnet)
+- [TOOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/tood)
+- [GFL系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/gfl)
+
+### 3.2 模型导出
+PaddleDetection模型导出,请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/deploy/EXPORT_MODEL.md),**注意**:PaddleDetection导出的模型包含`model.pdmodel`、`model.pdiparams`和`infer_cfg.yml`三个文件,FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+### 3.3 导出须知
+如果您是自行导出PaddleDetection推理模型,请注意以下问题:
+- 在导出模型时不要进行NMS的去除操作,正常导出即可
+- 如果用于跑原生TensorRT后端(非Paddle Inference后端),不要添加--trt参数
+- 导出模型时,不要添加`fuse_normalize=True`参数
+
+## 4. 详细的部署示例
+- [Python部署](python)
+- [C++部署](cpp)
\ No newline at end of file
diff --git a/deploy/fastdeploy/ascend/cpp/CMakeLists.txt b/deploy/fastdeploy/ascend/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..776d832f91529895af3de67f489510e51793b0f3
--- /dev/null
+++ b/deploy/fastdeploy/ascend/cpp/CMakeLists.txt
@@ -0,0 +1,11 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/deploy/fastdeploy/ascend/cpp/README.md b/deploy/fastdeploy/ascend/cpp/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..62029d903c3073da2a379aeed35e16d15d64d77c
--- /dev/null
+++ b/deploy/fastdeploy/ascend/cpp/README.md
@@ -0,0 +1,52 @@
+[English](README.md) | 简体中文
+# PaddleDetection Ascend C++部署示例
+
+本目录下提供`infer.cc`快速完成PPYOLOE在华为昇腾上部署的示例。
+
+## 1. 部署环境准备
+在部署前,需自行编译基于华为昇腾NPU的预测库,参考文档[华为昇腾NPU部署环境编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 3. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试。
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/cpp/ascend/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+mkdir build
+cd build
+# 使用编译完成的FastDeploy库编译infer_demo
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-ascend
+make -j
+
+# 下载模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 华为昇腾推理
+./infer_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+
+## 4. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [Python部署](../python)
+
+## 5. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/ascend/cpp/infer.cc b/deploy/fastdeploy/ascend/cpp/infer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..c7394921b3ea49f204d676c9e674a8af77a91810
--- /dev/null
+++ b/deploy/fastdeploy/ascend/cpp/infer.cc
@@ -0,0 +1,62 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void AscendInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "deploy.yaml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseAscend();
+ auto model = fastdeploy::vision::detection::PPYOLOE(
+ model_file, params_file, config_file, option);
+
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+ "e.g ./infer_model ./model_dir ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+
+ AscendInfer(argv[1], argv[2]);
+ return 0;
+}
diff --git a/deploy/fastdeploy/ascend/python/README.md b/deploy/fastdeploy/ascend/python/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2b88148a5918e35ac40019488b2ecad2bc160cc3
--- /dev/null
+++ b/deploy/fastdeploy/ascend/python/README.md
@@ -0,0 +1,44 @@
+[English](README.md) | 简体中文
+# PaddleDetection Ascend Python部署示例
+
+本目录下提供`infer.py`快速完成PPYOLOE在华为昇腾上部署的示例。
+
+## 1. 部署环境准备
+在部署前,需自行编译基于华为昇腾NPU的FastDeploy python wheel包并安装,参考文档[华为昇腾NPU部署环境编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 3. 运行部署示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/ascend/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 华为昇腾推理
+python infer.py --model_dir ppyoloe_crn_l_300e_coco --image_file 000000014439.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+## 4. 更多指南
+- [PaddleDetection Python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/object_detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [C++部署](../cpp)
+
+## 5. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/ascend/python/infer.py b/deploy/fastdeploy/ascend/python/infer.py
new file mode 100755
index 0000000000000000000000000000000000000000..46cb50129dd06fd15473b8bfbd37295b68bdcf6f
--- /dev/null
+++ b/deploy/fastdeploy/ascend/python/infer.py
@@ -0,0 +1,46 @@
+import cv2
+import os
+
+import fastdeploy as fd
+
+
+def parse_arguments():
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir", required=True, help="Path of PaddleDetection model.")
+ parser.add_argument(
+ "--image_file", type=str, required=True, help="Path of test image file.")
+ return parser.parse_args()
+
+args = parse_arguments()
+
+runtime_option = fd.RuntimeOption()
+runtime_option.use_ascend()
+
+if args.model_dir is None:
+ model_dir = fd.download_model(name='ppyoloe_crn_l_300e_coco')
+else:
+ model_dir = args.model_dir
+
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+config_file = os.path.join(model_dir, "infer_cfg.yml")
+
+# settting for runtime
+model = fd.vision.detection.PPYOLOE(
+ model_file, params_file, config_file, runtime_option=runtime_option)
+
+# predict
+if args.image_file is None:
+ image_file = fd.utils.get_detection_test_image()
+else:
+ image_file = args.image_file
+im = cv2.imread(image_file)
+result = model.predict(im)
+print(result)
+
+# visualize
+vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/cpu-gpu/README.md b/deploy/fastdeploy/cpu-gpu/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8485fb37afc30d803773a05fdff169c50192c949
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/README.md
@@ -0,0 +1,97 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection检测模型在CPU-GPU上的部署方案—FastDeploy
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上快速部署检测模型
+
+## 2. 使用预导出的模型列表
+为了方便开发者的测试,下面提供了PaddleDetection导出的各系列模型,开发者可直接下载使用。其中精度指标来源于PaddleDetection中对各模型的介绍,详情各参考PaddleDetection中的说明。
+
+### 2.1 目标检测及实例分割模型
+| 模型 | 参数大小 | 精度 | 备注 |
+|:---------------------------------------------------------------- |:----- |:----- | :------ |
+| [picodet_l_320_coco_lcnet](https://bj.bcebos.com/paddlehub/fastdeploy/picodet_l_320_coco_lcnet.tgz) |23MB | Box AP 42.6% |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz) |200MB | Box AP 51.4% |
+| [ppyoloe_plus_crn_m_80e_coco](https://bj.bcebos.com/fastdeploy/models/ppyoloe_plus_crn_m_80e_coco.tgz) |83.3MB | Box AP 49.8% |
+| [ppyolo_r50vd_dcn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolo_r50vd_dcn_1x_coco.tgz) | 180MB | Box AP 44.8% | 暂不支持TensorRT |
+| [ppyolov2_r101vd_dcn_365e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolov2_r101vd_dcn_365e_coco.tgz) | 282MB | Box AP 49.7% | 暂不支持TensorRT |
+| [yolov3_darknet53_270e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov3_darknet53_270e_coco.tgz) |237MB | Box AP 39.1% | |
+| [yolox_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolox_s_300e_coco.tgz) | 35MB | Box AP 40.4% | |
+| [faster_rcnn_r50_vd_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_r50_vd_fpn_2x_coco.tgz) | 160MB | Box AP 40.8%| 暂不支持TensorRT |
+| [mask_rcnn_r50_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/mask_rcnn_r50_1x_coco.tgz) | 128M | Box AP 37.4%, Mask AP 32.8%| 暂不支持TensorRT、ORT |
+| [ssd_mobilenet_v1_300_120e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_mobilenet_v1_300_120e_voc.tgz) | 24.9M | Box AP 73.8%| 暂不支持TensorRT、ORT |
+| [ssd_vgg16_300_240e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_vgg16_300_240e_voc.tgz) | 106.5M | Box AP 77.8%| 暂不支持TensorRT、ORT |
+| [ssdlite_mobilenet_v1_300_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ssdlite_mobilenet_v1_300_coco.tgz) | 29.1M | | 暂不支持TensorRT、ORT |
+| [rtmdet_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_l_300e_coco.tgz) | 224M | Box AP 51.2%| |
+| [rtmdet_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_s_300e_coco.tgz) | 42M | Box AP 44.5%| |
+| [yolov5_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_l_300e_coco.tgz) | 183M | Box AP 48.9%| |
+| [yolov5_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_s_300e_coco.tgz) | 31M | Box AP 37.6%| |
+| [yolov6_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_l_300e_coco.tgz) | 229M | Box AP 51.0%| |
+| [yolov6_s_400e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_s_400e_coco.tgz) | 68M | Box AP 43.4%| |
+| [yolov7_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_l_300e_coco.tgz) | 145M | Box AP 51.0%| |
+| [yolov7_x_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_x_300e_coco.tgz) | 277M | Box AP 53.0%| |
+| [cascade_rcnn_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_fpn_1x_coco.tgz) | 271M | Box AP 41.1%| 暂不支持TensorRT、ORT |
+| [cascade_rcnn_r50_vd_fpn_ssld_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.tgz) | 271M | Box AP 45.0%| 暂不支持TensorRT、ORT |
+| [faster_rcnn_enhance_3x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_enhance_3x_coco.tgz) | 119M | Box AP 41.5%| 暂不支持TensorRT、ORT |
+| [fcos_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/fcos_r50_fpn_1x_coco.tgz) | 129M | Box AP 39.6%| 暂不支持TensorRT |
+| [gfl_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/gfl_r50_fpn_1x_coco.tgz) | 128M | Box AP 41.0%| 暂不支持TensorRT |
+| [ppyoloe_crn_l_80e_sliced_visdrone_640_025](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_80e_sliced_visdrone_640_025.tgz) | 200M | Box AP 31.9%| |
+| [retinanet_r101_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r101_fpn_2x_coco.tgz) | 210M | Box AP 40.6%| 暂不支持TensorRT、ORT |
+| [retinanet_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r50_fpn_1x_coco.tgz) | 136M | Box AP 37.5%| 暂不支持TensorRT、ORT |
+| [tood_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/tood_r50_fpn_1x_coco.tgz) | 130M | Box AP 42.5%| 暂不支持TensorRT、ORT |
+| [ttfnet_darknet53_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ttfnet_darknet53_1x_coco.tgz) | 178M | Box AP 33.5%| 暂不支持TensorRT、ORT |
+| [yolov8_x_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_x_500e_coco.tgz) | 265M | Box AP 53.8%
+| [yolov8_l_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_l_500e_coco.tgz) | 173M | Box AP 52.8%
+| [yolov8_m_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_m_500e_coco.tgz) | 99M | Box AP 50.2%
+| [yolov8_s_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_s_500e_coco.tgz) | 43M | Box AP 44.9%
+| [yolov8_n_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_n_500e_coco.tgz) | 13M | Box AP 37.3%
+
+### 2.2 关键点检测模型
+| 模型 | 说明 | 模型格式 | 版本 |
+| :--- | :--- | :------- | :--- |
+| [PP-TinyPose-128x96](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_128x96_infer.tgz) | 单人关键点检测模型 | Paddle | [Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PP-TinyPose-256x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz) | 单人关键点检测模型 | Paddle | [Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PicoDet-S-Lcnet-Pedestrian-192x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_192x192_infer.tgz) + [PP-TinyPose-128x96](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_128x96_infer.tgz) | 单人关键点检测串联配置 | Paddle |[Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PicoDet-S-Lcnet-Pedestrian-320x320](https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz) + [PP-TinyPose-256x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz) | 多人关键点检测串联配置 | Paddle |[Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+
+
+## 3. 自行导出PaddleDetection部署模型
+### 3.1 模型版本
+支持[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)大于等于2.4版本的PaddleDetection模型部署。目前FastDeploy测试过成功部署的模型:
+
+- [PP-YOLOE(含PP-YOLOE+)系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
+- [PicoDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/picodet)
+- [PP-YOLO系列模型(含v2)](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyolo)
+- [YOLOv3系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolov3)
+- [YOLOX系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolox)
+- [FasterRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/faster_rcnn)
+- [MaskRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/mask_rcnn)
+- [SSD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ssd)
+- [YOLOv5系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov5)
+- [YOLOv6系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov6)
+- [YOLOv7系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov7)
+- [YOLOv8系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov8)
+- [RTMDet系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/rtmdet)
+- [CascadeRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/cascade_rcnn)
+- [PSSDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rcnn_enhance)
+- [RetinaNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/retinanet)
+- [PPYOLOESOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/smalldet)
+- [FCOS系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/fcos)
+- [TTFNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ttfnet)
+- [TOOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/tood)
+- [GFL系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/gfl)
+- [PP-PicoDet + PP-TinyPose系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose/README.md)
+
+### 3.2 模型导出
+PaddleDetection模型导出,请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/deploy/EXPORT_MODEL.md),**注意**:PaddleDetection导出的模型包含`model.pdmodel`、`model.pdiparams`和`infer_cfg.yml`三个文件,FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+### 3.3 导出须知
+如果您是自行导出PaddleDetection推理模型,请注意以下问题:
+- 在导出模型时不要进行NMS的去除操作,正常导出即可
+- 如果用于跑原生TensorRT后端(非Paddle Inference后端),不要添加--trt参数
+- 导出模型时,不要添加`fuse_normalize=True`参数
+
+## 4. 详细的部署示例
+- [Python部署](python)
+- [C++部署](cpp)
\ No newline at end of file
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/CMakeLists.txt b/deploy/fastdeploy/cpu-gpu/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d2f10da4f9599b4f91ac53b32dcef8841f163b9b
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/CMakeLists.txt
@@ -0,0 +1,13 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+add_executable(infer_tinypose_demo ${PROJECT_SOURCE_DIR}/pptinypose_infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+target_link_libraries(infer_tinypose_demo ${FASTDEPLOY_LIBS})
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/README.md b/deploy/fastdeploy/cpu-gpu/cpp/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..01d68dd14aaa4d80eda5c4bbd7fd235dfac0e2b4
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/README.md
@@ -0,0 +1,142 @@
+[English](README.md) | 简体中文
+# PaddleDetection CPU-GPU C++部署示例
+
+本目录下提供`infer.cc`快速完成PPYOLOE模型包括PPYOLOE在CPU/GPU,以及GPU上通过Paddle-TensorRT加速部署的示例。
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上快速部署PaddleDetection模型。FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+## 2. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 3. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 4. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+### 4.1 目标检测示例
+```bash
+# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-gpu-x.x.x.tgz
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 编译部署示例
+mkdir build && cd build
+mv ../fastdeploy-linux-x64-gpu-x.x.x .
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-gpu-x.x.x
+make -j
+
+# 下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 运行部署示例
+# CPU推理
+./infer_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 0
+# GPU推理
+./infer_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1
+# GPU上Paddle-TensorRT推理(注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+./infer_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 2
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+### 4.2 关键点检测示例
+```bash
+# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-gpu-x.x.x.tgz
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 编译部署示例
+mkdir build && cd build
+mv ../fastdeploy-linux-x64-gpu-x.x.x .
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-gpu-x.x.x
+make -j
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
+
+# 运行部署示例
+# CPU推理
+./infer_tinypose_demo PP_TinyPose_256x192_infer hrnet_demo.jpg 0
+# GPU推理
+./infer_tinypose_demo PP_TinyPose_256x192_infer hrnet_demo.jpg 1
+# GPU上Paddle-TensorRT推理(注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+./infer_tinypose_demo PP_TinyPose_256x192_infer hrnet_demo.jpg 2
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+关于如何进行多人关键点检测,请参考[PPTinyPose Pipeline示例](./det_keypoint_unite/)
+
+- 注意,以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: [如何在Windows中使用FastDeploy C++ SDK](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_sdk_on_windows.md)
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 5. PaddleDetection C++接口
+FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+### 5.1 目标检测及实例分割模型
+```c++
+fastdeploy::vision::detection::PicoDet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::SOLOv2(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLOE(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLO(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::YOLOv3(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOX(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::FasterRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::MaskRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::SSD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv5(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv6(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv7(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv8(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::CascadeRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PSSDet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::RetinaNet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLOESOD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::FCOS(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::TOOD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::GFL(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+```
+
+### 5.2 关键点检测模型
+```C++
+fastdeploy::vision::keypointdetection::PPTinyPose(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+```
+
+PaddleDetection模型加载和初始化,其中model_file, params_file为导出的Paddle部署模型格式, config_file为PaddleDetection同时导出的部署配置yaml文件
+
+## 6. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [Python部署](../python)
+
+## 7. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
+
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/CMakeLists.txt b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71cbaa0fde199f00de23d4d090721595285f2a50
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/CMakeLists.txt
@@ -0,0 +1,11 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/det_keypoint_unite_infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/README.md b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..66520a4e5251a65689b4ba2be576617a87451eec
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/README.md
@@ -0,0 +1,74 @@
+[English](README.md) | 简体中文
+# PP-PicoDet + PP-TinyPose (Pipeline) CPU-GPU C++部署示例
+
+本目录下提供`det_keypoint_unite_infer.cc`快速完成多人模型配置 PP-PicoDet + PP-TinyPose 在CPU/GPU,以及GPU上通过TensorRT加速部署的`单图多人关键点检测`示例。执行如下脚本即可完成。**注意**: PP-TinyPose单模型独立部署,请参考[PP-TinyPose 单模型](../README.md)
+
+## 1. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../../README.md)或者[自行导出PaddleDetection部署模型](../../README.md)。
+
+## 3. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose和PP-PicoDet模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+tar -xvf PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/000000018491.jpg
+
+# CPU推理
+./infer_demo PP_PicoDet_V2_S_Pedestrian_320x320_infer PP_TinyPose_256x192_infer 000000018491.jpg 0
+# GPU推理
+./infer_demo PP_PicoDet_V2_S_Pedestrian_320x320_infer PP_TinyPose_256x192_infer 000000018491.jpg 1
+# GPU上Paddle-TensorRT推理(注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+./infer_demo PP_PicoDet_V2_S_Pedestrian_320x320_infer PP_TinyPose_256x192_infer 000000018491.jpg 2
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+- 注意,以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: [如何在Windows中使用FastDeploy C++ SDK](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_sdk_on_windows.md)
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 4. PP-TinyPose 模型串联 C++ 接口
+
+```c++
+fastdeploy::pipeline::PPTinyPose(
+ fastdeploy::vision::detection::PicoDet* det_model,
+ fastdeploy::vision::keypointdetection::PPTinyPose* pptinypose_model)
+```
+
+PPTinyPose Pipeline模型加载和初始化。det_model表示初始化后的检测模型,pptinypose_model表示初始化后的关键点检测模型。
+
+
+## 5. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../../)
+- [Python部署](../../python/det_keypoint_unite/)
+
+## 6. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc
new file mode 100755
index 0000000000000000000000000000000000000000..1b8b13120c645309908e6583630d6e5caaf6987a
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc
@@ -0,0 +1,205 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#include "fastdeploy/pipeline.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const std::string& det_model_dir,
+ const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto det_model_file = det_model_dir + sep + "model.pdmodel";
+ auto det_params_file = det_model_dir + sep + "model.pdiparams";
+ auto det_config_file = det_model_dir + sep + "infer_cfg.yml";
+ auto det_model = fastdeploy::vision::detection::PicoDet(
+ det_model_file, det_params_file, det_config_file);
+ if (!det_model.Initialized()) {
+ std::cerr << "Detection Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+
+ auto pipeline =fastdeploy::pipeline::PPTinyPose(&det_model, &tinypose_model);
+ pipeline.detection_model_score_threshold = 0.5;
+ if (!pipeline.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.2);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "TinyPose visualized result saved in ./vis_result.jpg"
+ << std::endl;
+}
+
+void GpuInfer(const std::string& det_model_dir,
+ const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto option = fastdeploy::RuntimeOption();
+ option.UseGpu();
+ auto det_model_file = det_model_dir + sep + "model.pdmodel";
+ auto det_params_file = det_model_dir + sep + "model.pdiparams";
+ auto det_config_file = det_model_dir + sep + "infer_cfg.yml";
+ auto det_model = fastdeploy::vision::detection::PicoDet(
+ det_model_file, det_params_file, det_config_file, option);
+ if (!det_model.Initialized()) {
+ std::cerr << "Detection Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+
+ auto pipeline =
+ fastdeploy::pipeline::PPTinyPose(
+ &det_model, &tinypose_model);
+ pipeline.detection_model_score_threshold = 0.5;
+ if (!pipeline.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.2);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "TinyPose visualized result saved in ./vis_result.jpg"
+ << std::endl;
+}
+
+void TrtInfer(const std::string& det_model_dir,
+ const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto det_model_file = det_model_dir + sep + "model.pdmodel";
+ auto det_params_file = det_model_dir + sep + "model.pdiparams";
+ auto det_config_file = det_model_dir + sep + "infer_cfg.yml";
+
+ auto det_option = fastdeploy::RuntimeOption();
+ det_option.UseGpu();
+ det_option.UsePaddleInferBackend();
+ // If use original Tensorrt, not Paddle-TensorRT,
+ // please try `option.UseTrtBackend()`
+ det_option.paddle_infer_option.enable_trt = true;
+ det_option.paddle_infer_option.collect_trt_shape = true;
+ det_option.trt_option.SetShape("image", {1, 3, 320, 320}, {1, 3, 320, 320},
+ {1, 3, 320, 320});
+ det_option.trt_option.SetShape("scale_factor", {1, 2}, {1, 2}, {1, 2});
+ auto det_model = fastdeploy::vision::detection::PicoDet(
+ det_model_file, det_params_file, det_config_file, det_option);
+ if (!det_model.Initialized()) {
+ std::cerr << "Detection Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_option = fastdeploy::RuntimeOption();
+
+ tinypose_option.UseGpu();
+ tinypose_option.UsePaddleInferBackend();
+ // If use original Tensorrt, not Paddle-TensorRT,
+ // please try `option.UseTrtBackend()`
+ tinypose_option.paddle_infer_option.enable_trt = true;
+ tinypose_option.paddle_infer_option.collect_trt_shape = true;
+ tinypose_option.trt_option.SetShape("image", {1, 3, 256, 192}, {1, 3, 256, 192},
+ {1, 3, 256, 192});
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file,
+ tinypose_option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+
+ auto pipeline =
+ fastdeploy::pipeline::PPTinyPose(
+ &det_model, &tinypose_model);
+ pipeline.detection_model_score_threshold = 0.5;
+ if (!pipeline.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.2);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "TinyPose visualized result saved in ./vis_result.jpg"
+ << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 5) {
+ std::cout << "Usage: infer_demo path/to/detection_model_dir "
+ "path/to/pptinypose_model_dir path/to/image run_option, "
+ "e.g ./infer_model ./picodet_model_dir ./pptinypose_model_dir "
+ "./test.jpeg 0"
+ << std::endl;
+ std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+ "with gpu; 2: run with gpu and use tensorrt backend;"
+ << std::endl;
+ return -1;
+ }
+
+ if (std::atoi(argv[4]) == 0) {
+ CpuInfer(argv[1], argv[2], argv[3]);
+ } else if (std::atoi(argv[4]) == 1) {
+ GpuInfer(argv[1], argv[2], argv[3]);
+ } else if (std::atoi(argv[4]) == 2) {
+ TrtInfer(argv[1], argv[2], argv[3]);
+ }
+ return 0;
+}
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/infer.cc b/deploy/fastdeploy/cpu-gpu/cpp/infer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..88042f5e083b5f34c0cb4e6016b153fa79fd115b
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/infer.cc
@@ -0,0 +1,134 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseCpu();
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void GpuInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+
+ auto option = fastdeploy::RuntimeOption();
+ option.UseGpu();
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+void TrtInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+
+ auto option = fastdeploy::RuntimeOption();
+ option.UseGpu();
+ option.UsePaddleInferBackend();
+ // If use original Tensorrt, not Paddle-TensorRT,
+ // please try `option.UseTrtBackend()`
+ option.paddle_infer_option.enable_trt = true;
+ option.paddle_infer_option.collect_trt_shape = true;
+ option.trt_option.SetShape("image", {1, 3, 640, 640}, {1, 3, 640, 640},
+ {1, 3, 640, 640});
+ option.trt_option.SetShape("scale_factor", {1, 2}, {1, 2}, {1, 2});
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 4) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+ "e.g ./infer_demo ./ppyoloe_model_dir ./test.jpeg 0"
+ << std::endl;
+ std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+ "with gpu; 2: run with gpu and use tensorrt backend"
+ << std::endl;
+ return -1;
+ }
+
+ if (std::atoi(argv[3]) == 0) {
+ CpuInfer(argv[1], argv[2]);
+ } else if (std::atoi(argv[3]) == 1) {
+ GpuInfer(argv[1], argv[2]);
+ } else if (std::atoi(argv[3]) == 2) {
+ TrtInfer(argv[1], argv[2]);
+ }
+ return 0;
+}
diff --git a/deploy/fastdeploy/cpu-gpu/cpp/pptinypose_infer.cc b/deploy/fastdeploy/cpu-gpu/cpp/pptinypose_infer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0a56334f7f4d56c2955cee344e5bb1bccaed4cd9
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/cpp/pptinypose_infer.cc
@@ -0,0 +1,149 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void CpuInfer(const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseCpu();
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+ if (!tinypose_model.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto tinypose_vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.5);
+ cv::imwrite("tinypose_vis_result.jpg", tinypose_vis_im);
+ std::cout << "TinyPose visualized result saved in ./tinypose_vis_result.jpg"
+ << std::endl;
+}
+
+void GpuInfer(const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto option = fastdeploy::RuntimeOption();
+ option.UseGpu();
+
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+ if (!tinypose_model.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto tinypose_vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.5);
+ cv::imwrite("tinypose_vis_result.jpg", tinypose_vis_im);
+ std::cout << "TinyPose visualized result saved in ./tinypose_vis_result.jpg"
+ << std::endl;
+}
+
+void TrtInfer(const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_option = fastdeploy::RuntimeOption();
+ tinypose_option.UseGpu();
+ tinypose_option.UsePaddleInferBackend();
+ // If use original Tensorrt, not Paddle-TensorRT,
+ // please try `option.UseTrtBackend()`
+ tinypose_option.paddle_infer_option.enable_trt = true;
+ tinypose_option.paddle_infer_option.collect_trt_shape = true;
+ tinypose_option.trt_option.SetShape("image", {1, 3, 256, 192}, {1, 3, 256, 192},
+ {1, 3, 256, 192});
+
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file,
+ tinypose_option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+ if (!tinypose_model.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto tinypose_vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.5);
+ cv::imwrite("tinypose_vis_result.jpg", tinypose_vis_im);
+ std::cout << "TinyPose visualized result saved in ./tinypose_vis_result.jpg"
+ << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 4) {
+ std::cout << "Usage: infer_demo path/to/pptinypose_model_dir path/to/image "
+ "run_option, "
+ "e.g ./infer_demo ./pptinypose_model_dir ./test.jpeg 0"
+ << std::endl;
+ std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+ "with gpu; 2: run with gpu and use tensorrt backend;"
+ << std::endl;
+ return -1;
+ }
+
+ if (std::atoi(argv[3]) == 0) {
+ CpuInfer(argv[1], argv[2]);
+ } else if (std::atoi(argv[3]) == 1) {
+ GpuInfer(argv[1], argv[2]);
+ } else if (std::atoi(argv[3]) == 2) {
+ TrtInfer(argv[1], argv[2]);
+ }
+ return 0;
+}
diff --git a/deploy/fastdeploy/cpu-gpu/python/README.md b/deploy/fastdeploy/cpu-gpu/python/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..acadce22a8f56f00446a5d0d250e5dc9bcdf1485
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/python/README.md
@@ -0,0 +1,126 @@
+[English](README.md) | 简体中文
+# PaddleDetection CPU-GPU Python部署示例
+
+本目录下提供`infer.py`快速完成PPYOLOE模型包括PPYOLOE在CPU/GPU,以及GPU上通过Paddle-TensorRT加速部署的示例。
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上快速部署PaddleDetection模型。FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+## 2. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 3. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 4. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+### 4.1 目标检测示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 运行部署示例
+# CPU推理
+python infer.py --model_dir ppyoloe_crn_l_300e_coco --image_file 000000014439.jpg --device cpu
+# GPU推理
+python infer.py --model_dir ppyoloe_crn_l_300e_coco --image_file 000000014439.jpg --device gpu
+# GPU上Paddle-TensorRT推理 (注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+python infer.py --model_dir ppyoloe_crn_l_300e_coco --image_file 000000014439.jpg --device gpu --use_trt True
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+### 4.2 关键点检测示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
+
+# 运行部署示例
+# CPU推理
+python pptinypose_infer.py --model_dir PP_TinyPose_256x192_infer --image_file hrnet_demo.jpg --device cpu
+# GPU推理
+python pptinypose_infer.py --model_dir PP_TinyPose_256x192_infer --image_file hrnet_demo.jpg --device gpu
+# GPU上Paddle-TensorRT推理 (注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+python pptinypose_infer.py --model_dir PP_TinyPose_256x192_infer --image_file hrnet_demo.jpg --device gpu --use_trt True
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+关于如何进行多人关键点检测,请参考[PPTinyPose Pipeline示例](./det_keypoint_unite/)
+
+## 5. 部署示例选项说明
+
+|参数|含义|默认值
+|---|---|---|
+|--model_dir|指定模型文件夹所在的路径|None|
+|--image_file|指定测试图片所在的路径|None|
+|--device|指定即将运行的硬件类型,支持的值为`[cpu, gpu]`,当设置为cpu时,可运行在x86 cpu/arm cpu等cpu上|cpu|
+|--use_trt|是否使用trt,该项只在device为gpu时有效|False|
+
+## 6. PaddleDetection Python接口
+FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+### 6.1 目标检测及实例分割模型
+```python
+fastdeploy.vision.detection.PPYOLOE(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PicoDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOX(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.YOLOv3(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PPYOLO(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.FasterRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.MaskRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.SSD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv5(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv6(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv7(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.RTMDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.CascadeRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PSSDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.RetinaNet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PPYOLOESOD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.FCOS(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.TTFNet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.TOOD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.GFL(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+### 6.2 关键点检测模型
+```python
+fd.vision.keypointdetection.PPTinyPose(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PaddleDetection模型加载和初始化,其中model_file, params_file为导出的Paddle部署模型格式, config_file为PaddleDetection同时导出的部署配置yaml文件
+
+## 7. 更多指南
+- [PaddleDetection Python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/object_detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [C++部署](../cpp)
+
+## 8. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/README.md b/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..257188b6ecebddf5182545bc0617cfacbef1af51
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/README.md
@@ -0,0 +1,70 @@
+[English](README.md) | 简体中文
+# PP-PicoDet + PP-TinyPose (Pipeline) CPU-GPU Python部署示例
+
+本目录下提供`det_keypoint_unite_infer.py`快速完成多人模型配置 PP-PicoDet + PP-TinyPose 在CPU/GPU,以及GPU上通过TensorRT加速部署的`单图多人关键点检测`示例。执行如下脚本即可完成.**注意**: PP-TinyPose单模型独立部署,请参考[PP-TinyPose 单模型](../README.md)
+
+## 1. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../../README.md)或者[自行导出PaddleDetection部署模型](../../README.md)。
+
+## 3. 运行部署示例
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+tar -xvf PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/000000018491.jpg
+# CPU推理
+python det_keypoint_unite_infer.py --tinypose_model_dir PP_TinyPose_256x192_infer --det_model_dir PP_PicoDet_V2_S_Pedestrian_320x320_infer --image_file 000000018491.jpg --device cpu
+# GPU推理
+python det_keypoint_unite_infer.py --tinypose_model_dir PP_TinyPose_256x192_infer --det_model_dir PP_PicoDet_V2_S_Pedestrian_320x320_infer --image_file 000000018491.jpg --device gpu
+# GPU上Paddle-TensorRT推理(注意:TensorRT推理第一次运行,有序列化模型的操作,有一定耗时,需要耐心等待)
+python det_keypoint_unite_infer.py --tinypose_model_dir PP_TinyPose_256x192_infer --det_model_dir PP_PicoDet_V2_S_Pedestrian_320x320_infer --image_file 000000018491.jpg --device gpu --use_trt True
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 4. 部署示例选项说明
+
+|参数|含义|默认值
+|---|---|---|
+|--tinypose_model_dir|指定关键点模型文件夹所在的路径|None|
+|--det_model_dir|指定目标模型文件夹所在的路径|None|
+|--image_file|指定测试图片所在的路径|None|
+|--device|指定即将运行的硬件类型,支持的值为`[cpu, gpu]`,当设置为cpu时,可运行在x86 cpu/arm cpu等cpu上|cpu|
+|--use_trt|是否使用trt,该项只在device为gpu时有效|False|
+
+## 5. PPTinyPose 模型串联 Python接口
+
+```python
+fd.pipeline.PPTinyPose(det_model=None, pptinypose_model=None)
+```
+
+PPTinyPose Pipeline 模型加载和初始化,其中det_model是使用`fd.vision.detection.PicoDet`初始化的检测模型,pptinypose_model是使用`fd.vision.keypointdetection.PPTinyPose`初始化的关键点检测模型。
+
+## 6. 更多指南
+- [PaddleDetection Python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/object_detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../../)
+- [C++部署](../../cpp/)
+
+## 7. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/det_keypoint_unite_infer.py b/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/det_keypoint_unite_infer.py
new file mode 100755
index 0000000000000000000000000000000000000000..6873ed867b9f17fb642ad99250ac8595449c6fd7
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/python/det_keypoint_unite/det_keypoint_unite_infer.py
@@ -0,0 +1,101 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--tinypose_model_dir",
+ required=True,
+ help="path of paddletinypose model directory")
+ parser.add_argument(
+ "--det_model_dir", help="path of paddledetection model directory")
+ parser.add_argument(
+ "--image_file", required=True, help="path of test image file.")
+ parser.add_argument(
+ "--device",
+ type=str,
+ default='cpu',
+ help="type of inference device, support 'cpu' or 'gpu'.")
+ parser.add_argument(
+ "--use_trt",
+ type=ast.literal_eval,
+ default=False,
+ help="wether to use tensorrt.")
+ return parser.parse_args()
+
+
+def build_picodet_option(args):
+ option = fd.RuntimeOption()
+
+ if args.device.lower() == "gpu":
+ option.use_gpu()
+
+ if args.use_trt:
+ option.use_paddle_infer_backend()
+ # If use original Tensorrt, not Paddle-TensorRT,
+ # please try `option.use_trt_backend()`
+ option.paddle_infer_option.enable_trt = True
+ option.paddle_infer_option.collect_trt_shape = True
+ option.trt_option.set_shape("image", [1, 3, 320, 320], [1, 3, 320, 320],
+ [1, 3, 320, 320])
+ option.trt_option.set_shape("scale_factor", [1, 2], [1, 2], [1, 2])
+ return option
+
+
+def build_tinypose_option(args):
+ option = fd.RuntimeOption()
+
+ if args.device.lower() == "gpu":
+ option.use_gpu()
+
+ if args.use_trt:
+ option.use_paddle_infer_backend()
+ # If use original Tensorrt, not Paddle-TensorRT,
+ # please try `option.use_trt_backend()`
+ option.paddle_infer_option.enable_trt = True
+ option.paddle_infer_option.collect_trt_shape = True
+ option.trt_option.set_shape("image", [1, 3, 256, 192], [1, 3, 256, 192],
+ [1, 3, 256, 192])
+ return option
+
+
+args = parse_arguments()
+picodet_model_file = os.path.join(args.det_model_dir, "model.pdmodel")
+picodet_params_file = os.path.join(args.det_model_dir, "model.pdiparams")
+picodet_config_file = os.path.join(args.det_model_dir, "infer_cfg.yml")
+
+# setup runtime
+runtime_option = build_picodet_option(args)
+det_model = fd.vision.detection.PicoDet(
+ picodet_model_file,
+ picodet_params_file,
+ picodet_config_file,
+ runtime_option=runtime_option)
+
+tinypose_model_file = os.path.join(args.tinypose_model_dir, "model.pdmodel")
+tinypose_params_file = os.path.join(args.tinypose_model_dir, "model.pdiparams")
+tinypose_config_file = os.path.join(args.tinypose_model_dir, "infer_cfg.yml")
+# setup runtime
+runtime_option = build_tinypose_option(args)
+tinypose_model = fd.vision.keypointdetection.PPTinyPose(
+ tinypose_model_file,
+ tinypose_params_file,
+ tinypose_config_file,
+ runtime_option=runtime_option)
+
+# predict
+im = cv2.imread(args.image_file)
+pipeline = fd.pipeline.PPTinyPose(det_model, tinypose_model)
+pipeline.detection_model_score_threshold = 0.5
+pipeline_result = pipeline.predict(im)
+print("Paddle TinyPose Result:\n", pipeline_result)
+
+# visualize
+vis_im = fd.vision.vis_keypoint_detection(
+ im, pipeline_result, conf_threshold=0.2)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("TinyPose visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/cpu-gpu/python/infer.py b/deploy/fastdeploy/cpu-gpu/python/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf1a96dda6449330e47996c54ef0566f0dc08944
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/python/infer.py
@@ -0,0 +1,74 @@
+import cv2
+import os
+
+import fastdeploy as fd
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir", required=True, help="Path of PaddleDetection model.")
+ parser.add_argument(
+ "--image_file", type=str, required=True, help="Path of test image file.")
+ parser.add_argument(
+ "--device",
+ type=str,
+ default='cpu',
+ help="Type of inference device, support, 'cpu' or 'gpu'.")
+ parser.add_argument(
+ "--use_trt",
+ type=ast.literal_eval,
+ default=False,
+ help="Wether to use tensorrt.")
+ return parser.parse_args()
+
+
+def build_option(args):
+ option = fd.RuntimeOption()
+
+ if args.device.lower() == "gpu":
+ option.use_gpu()
+
+ if args.use_trt:
+ option.use_paddle_infer_backend()
+ # If use original Tensorrt, not Paddle-TensorRT,
+ # please try `option.use_trt_backend()`
+ option.paddle_infer_option.enable_trt = True
+ option.paddle_infer_option.collect_trt_shape = True
+ option.trt_option.set_shape("image", [1, 3, 640, 640], [1, 3, 640, 640],
+ [1, 3, 640, 640])
+ option.trt_option.set_shape("scale_factor", [1, 2], [1, 2], [1, 2])
+ return option
+
+
+args = parse_arguments()
+
+if args.model_dir is None:
+ model_dir = fd.download_model(name='ppyoloe_crn_l_300e_coco')
+else:
+ model_dir = args.model_dir
+
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+config_file = os.path.join(model_dir, "infer_cfg.yml")
+
+# settting for runtime
+runtime_option = build_option(args)
+model = fd.vision.detection.PPYOLOE(
+ model_file, params_file, config_file, runtime_option=runtime_option)
+
+# predict
+if args.image_file is None:
+ image_file = fd.utils.get_detection_test_image()
+else:
+ image_file = args.image_file
+im = cv2.imread(image_file)
+result = model.predict(im)
+print(result)
+
+# visualize
+vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/cpu-gpu/python/pptinypose_infer.py b/deploy/fastdeploy/cpu-gpu/python/pptinypose_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3115f82be28a4ebd90ccc4ce3b8b1b706e67d5d
--- /dev/null
+++ b/deploy/fastdeploy/cpu-gpu/python/pptinypose_infer.py
@@ -0,0 +1,67 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir",
+ required=True,
+ help="path of PP-TinyPose model directory")
+ parser.add_argument(
+ "--image_file", required=True, help="path of test image file.")
+ parser.add_argument(
+ "--device",
+ type=str,
+ default='cpu',
+ help="type of inference device, support 'cpu', or 'gpu'.")
+ parser.add_argument(
+ "--use_trt",
+ type=ast.literal_eval,
+ default=False,
+ help="wether to use tensorrt.")
+ return parser.parse_args()
+
+
+def build_option(args):
+ option = fd.RuntimeOption()
+
+ if args.device.lower() == "gpu":
+ option.use_gpu()
+
+ if args.use_trt:
+ option.use_paddle_infer_backend()
+ # If use original Tensorrt, not Paddle-TensorRT,
+ # please try `option.use_trt_backend()`
+ option.paddle_infer_option.enable_trt = True
+ option.paddle_infer_option.collect_trt_shape = True
+ option.trt_option.set_shape("image", [1, 3, 256, 192], [1, 3, 256, 192],
+ [1, 3, 256, 192])
+ return option
+
+
+args = parse_arguments()
+
+tinypose_model_file = os.path.join(args.model_dir, "model.pdmodel")
+tinypose_params_file = os.path.join(args.model_dir, "model.pdiparams")
+tinypose_config_file = os.path.join(args.model_dir, "infer_cfg.yml")
+# setup runtime
+runtime_option = build_option(args)
+tinypose_model = fd.vision.keypointdetection.PPTinyPose(
+ tinypose_model_file,
+ tinypose_params_file,
+ tinypose_config_file,
+ runtime_option=runtime_option)
+# predict
+im = cv2.imread(args.image_file)
+tinypose_result = tinypose_model.predict(im)
+print("Paddle TinyPose Result:\n", tinypose_result)
+
+# visualize
+vis_im = fd.vision.vis_keypoint_detection(
+ im, tinypose_result, conf_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("TinyPose visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/kunlunxin/README.md b/deploy/fastdeploy/kunlunxin/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..c264df0d614803ced7d6eb7f5ee4ed5c0f86f575
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/README.md
@@ -0,0 +1,105 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection 在昆仑芯上的部署方案-FastDeploy
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在昆仑芯片上部署检测模型。
+
+支持如下芯片的部署
+- 昆仑 818-100(推理芯片)
+- 昆仑 818-300(训练芯片)
+
+支持如下芯片的设备
+- K100/K200 昆仑 AI 加速卡
+- R200 昆仑芯 AI 加速卡
+
+## 2. 使用预导出的模型列表
+
+为了方便开发者的测试,下面提供了PaddleDetection导出的各系列模型,开发者可直接下载使用。其中精度指标来源于PaddleDetection中对各模型的介绍,详情各参考PaddleDetection中的说明。
+
+### 2.1 目标检测及实例分割模型
+| 模型 | 参数大小 | 精度 | 备注 |
+|:---------------------------------------------------------------- |:----- |:----- | :------ |
+| [picodet_l_320_coco_lcnet](https://bj.bcebos.com/paddlehub/fastdeploy/picodet_l_320_coco_lcnet.tgz) |23MB | Box AP 42.6% |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz) |200MB | Box AP 51.4% |
+| [ppyoloe_plus_crn_m_80e_coco](https://bj.bcebos.com/fastdeploy/models/ppyoloe_plus_crn_m_80e_coco.tgz) |83.3MB | Box AP 49.8% |
+| [ppyolo_r50vd_dcn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolo_r50vd_dcn_1x_coco.tgz) | 180MB | Box AP 44.8% | 暂不支持TensorRT |
+| [ppyolov2_r101vd_dcn_365e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyolov2_r101vd_dcn_365e_coco.tgz) | 282MB | Box AP 49.7% | 暂不支持TensorRT |
+| [yolov3_darknet53_270e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov3_darknet53_270e_coco.tgz) |237MB | Box AP 39.1% | |
+| [yolox_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolox_s_300e_coco.tgz) | 35MB | Box AP 40.4% | |
+| [faster_rcnn_r50_vd_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_r50_vd_fpn_2x_coco.tgz) | 160MB | Box AP 40.8%| 暂不支持TensorRT |
+| [mask_rcnn_r50_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/mask_rcnn_r50_1x_coco.tgz) | 128M | Box AP 37.4%, Mask AP 32.8%| 暂不支持TensorRT、ORT |
+| [ssd_mobilenet_v1_300_120e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_mobilenet_v1_300_120e_voc.tgz) | 24.9M | Box AP 73.8%| 暂不支持TensorRT、ORT |
+| [ssd_vgg16_300_240e_voc](https://bj.bcebos.com/paddlehub/fastdeploy/ssd_vgg16_300_240e_voc.tgz) | 106.5M | Box AP 77.8%| 暂不支持TensorRT、ORT |
+| [ssdlite_mobilenet_v1_300_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ssdlite_mobilenet_v1_300_coco.tgz) | 29.1M | | 暂不支持TensorRT、ORT |
+| [rtmdet_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_l_300e_coco.tgz) | 224M | Box AP 51.2%| |
+| [rtmdet_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/rtmdet_s_300e_coco.tgz) | 42M | Box AP 44.5%| |
+| [yolov5_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_l_300e_coco.tgz) | 183M | Box AP 48.9%| |
+| [yolov5_s_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov5_s_300e_coco.tgz) | 31M | Box AP 37.6%| |
+| [yolov6_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_l_300e_coco.tgz) | 229M | Box AP 51.0%| |
+| [yolov6_s_400e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov6_s_400e_coco.tgz) | 68M | Box AP 43.4%| |
+| [yolov7_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_l_300e_coco.tgz) | 145M | Box AP 51.0%| |
+| [yolov7_x_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov7_x_300e_coco.tgz) | 277M | Box AP 53.0%| |
+| [cascade_rcnn_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_fpn_1x_coco.tgz) | 271M | Box AP 41.1%| 暂不支持TensorRT、ORT |
+| [cascade_rcnn_r50_vd_fpn_ssld_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.tgz) | 271M | Box AP 45.0%| 暂不支持TensorRT、ORT |
+| [faster_rcnn_enhance_3x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/faster_rcnn_enhance_3x_coco.tgz) | 119M | Box AP 41.5%| 暂不支持TensorRT、ORT |
+| [fcos_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/fcos_r50_fpn_1x_coco.tgz) | 129M | Box AP 39.6%| 暂不支持TensorRT |
+| [gfl_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/gfl_r50_fpn_1x_coco.tgz) | 128M | Box AP 41.0%| 暂不支持TensorRT |
+| [ppyoloe_crn_l_80e_sliced_visdrone_640_025](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_80e_sliced_visdrone_640_025.tgz) | 200M | Box AP 31.9%| |
+| [retinanet_r101_fpn_2x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r101_fpn_2x_coco.tgz) | 210M | Box AP 40.6%| 暂不支持TensorRT、ORT |
+| [retinanet_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/retinanet_r50_fpn_1x_coco.tgz) | 136M | Box AP 37.5%| 暂不支持TensorRT、ORT |
+| [tood_r50_fpn_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/tood_r50_fpn_1x_coco.tgz) | 130M | Box AP 42.5%| 暂不支持TensorRT、ORT |
+| [ttfnet_darknet53_1x_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ttfnet_darknet53_1x_coco.tgz) | 178M | Box AP 33.5%| 暂不支持TensorRT、ORT |
+| [yolov8_x_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_x_500e_coco.tgz) | 265M | Box AP 53.8%
+| [yolov8_l_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_l_500e_coco.tgz) | 173M | Box AP 52.8%
+| [yolov8_m_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_m_500e_coco.tgz) | 99M | Box AP 50.2%
+| [yolov8_s_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_s_500e_coco.tgz) | 43M | Box AP 44.9%
+| [yolov8_n_500e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/yolov8_n_500e_coco.tgz) | 13M | Box AP 37.3%
+
+### 2.2 关键点检测模型
+| 模型 | 说明 | 模型格式 | 版本 |
+| :--- | :--- | :------- | :--- |
+| [PP-TinyPose-128x96](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_128x96_infer.tgz) | 单人关键点检测模型 | Paddle | [Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PP-TinyPose-256x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz) | 单人关键点检测模型 | Paddle | [Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PicoDet-S-Lcnet-Pedestrian-192x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_192x192_infer.tgz) + [PP-TinyPose-128x96](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_128x96_infer.tgz) | 单人关键点检测串联配置 | Paddle |[Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+| [PicoDet-S-Lcnet-Pedestrian-320x320](https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz) + [PP-TinyPose-256x192](https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz) | 多人关键点检测串联配置 | Paddle |[Release/2.5](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose) |
+
+## 3. 自行导出PaddleDetection部署模型
+### 3.1 模型版本
+支持[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)大于等于2.4版本的PaddleDetection模型部署。目前FastDeploy测试过成功部署的模型:
+
+- [PP-YOLOE(含PP-YOLOE+)系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyoloe)
+- [PicoDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/picodet)
+- [PP-YOLO系列模型(含v2)](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ppyolo)
+- [YOLOv3系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolov3)
+- [YOLOX系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/yolox)
+- [FasterRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/faster_rcnn)
+- [MaskRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/mask_rcnn)
+- [SSD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ssd)
+- [YOLOv5系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov5)
+- [YOLOv6系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov6)
+- [YOLOv7系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov7)
+- [YOLOv8系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/yolov8)
+- [RTMDet系列模型](https://github.com/PaddlePaddle/PaddleYOLO/tree/release/2.6/configs/rtmdet)
+- [CascadeRCNN系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/cascade_rcnn)
+- [PSSDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/rcnn_enhance)
+- [RetinaNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/retinanet)
+- [PPYOLOESOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/smalldet)
+- [FCOS系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/fcos)
+- [TTFNet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/ttfnet)
+- [TOOD系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/tood)
+- [GFL系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.6/configs/gfl)
+- [PP-PicoDet + PP-TinyPose系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.5/configs/keypoint/tiny_pose/README.md)
+
+### 3.2 模型导出
+PaddleDetection模型导出,请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.6/deploy/EXPORT_MODEL.md),**注意**:PaddleDetection导出的模型包含`model.pdmodel`、`model.pdiparams`和`infer_cfg.yml`三个文件,FastDeploy会从yaml文件中获取模型在推理时需要的预处理信息
+
+### 3.3 导出须知
+如果您是自行导出PaddleDetection推理模型,请注意以下问题:
+- 在导出模型时不要进行NMS的去除操作,正常导出即可
+- 如果用于跑原生TensorRT后端(非Paddle Inference后端),不要添加--trt参数
+- 导出模型时,不要添加`fuse_normalize=True`参数
+
+## 4. 详细的部署示例
+- [Python部署](python)
+- [C++部署](cpp)
\ No newline at end of file
diff --git a/deploy/fastdeploy/kunlunxin/cpp/CMakeLists.txt b/deploy/fastdeploy/kunlunxin/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..41ac6602fed65b027d05cb4f97a1ae6ac2fca077
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+add_executable(infer_tinypose_demo ${PROJECT_SOURCE_DIR}/pptinypose_infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+target_link_libraries(infer_tinypose_demo ${FASTDEPLOY_LIBS})
+
diff --git a/deploy/fastdeploy/kunlunxin/cpp/README.md b/deploy/fastdeploy/kunlunxin/cpp/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..af6453f41a3e8ea3f57817c017e09c125908e2fd
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/README.md
@@ -0,0 +1,127 @@
+[English](README.md) | 简体中文
+# PaddleDetection 昆仑芯 XPU C++部署示例
+
+本目录下提供`infer.cc`快速完成PPYOLOE模型包括PPYOLOE在昆仑芯 XPU加速部署的示例。
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上快速部署PaddleDetection模型。FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+## 2. 部署环境准备
+在部署前,需自行编译基于昆仑芯XPU的预测库,参考文档[昆仑芯XPU部署环境编译安装](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 3. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 4. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+### 4.1 目标检测示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 编译部署示例
+mkdir build
+cd build
+# 使用编译完成的FastDeploy库编译infer_demo
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-kunlunxin
+make -j
+
+# 下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 运行部署示例
+./infer_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+### 4.2 关键点检测示例
+```bash
+# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-gpu-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-gpu-x.x.x.tgz
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 编译部署示例
+mkdir build && cd build
+mv ../fastdeploy-linux-x64-gpu-x.x.x .
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-gpu-x.x.x
+make -j
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
+
+# 运行部署示例
+./infer_tinypose_demo PP_TinyPose_256x192_infer hrnet_demo.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+关于如何进行多人关键点检测,请参考[PPTinyPose Pipeline示例](./det_keypoint_unite/)
+
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 5. PaddleDetection C++接口
+FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+### 5.1 目标检测及实例分割模型
+```c++
+fastdeploy::vision::detection::PicoDet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::SOLOv2(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLOE(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLO(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::YOLOv3(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOX(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::FasterRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::MaskRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::SSD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv5(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv6(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv7(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PaddleYOLOv8(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::CascadeRCNN(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PSSDet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::RetinaNet(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::PPYOLOESOD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::FCOS(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::TOOD(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+fastdeploy::vision::detection::GFL(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+```
+
+### 5.2 关键点检测模型
+```C++
+fastdeploy::vision::keypointdetection::PPTinyPose(const string& model_file, const string& params_file, const string& config_file, const RuntimeOption& runtime_option = RuntimeOption(), const ModelFormat& model_format = ModelFormat::PADDLE);
+```
+
+PaddleDetection模型加载和初始化,其中model_file, params_file为导出的Paddle部署模型格式, config_file为PaddleDetection同时导出的部署配置yaml文件
+
+## 6. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [Python部署](../python)
+
+## 7. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/CMakeLists.txt b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..71cbaa0fde199f00de23d4d090721595285f2a50
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/CMakeLists.txt
@@ -0,0 +1,11 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.12)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/det_keypoint_unite_infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/README.md b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..2dd0fd26a15e94f9287528ad3668a2563e5d77ef
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/README.md
@@ -0,0 +1,70 @@
+[English](README.md) | 简体中文
+# PP-PicoDet + PP-TinyPose (Pipeline) 昆仑芯 XPU C++部署示例
+
+本目录下提供`det_keypoint_unite_infer.cc`快速完成多人模型配置 PP-PicoDet + PP-TinyPose 在CPU/GPU,以及GPU上通过TensorRT加速部署的`单图多人关键点检测`示例。执行如下脚本即可完成。**注意**: PP-TinyPose单模型独立部署,请参考[PP-TinyPose 单模型](../README.md)
+
+## 1. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../../README.md)或者[自行导出PaddleDetection部署模型](../../README.md)。
+
+## 3. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose和PP-PicoDet模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+tar -xvf PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/000000018491.jpg
+
+# 运行部署示例
+./infer_demo PP_PicoDet_V2_S_Pedestrian_320x320_infer PP_TinyPose_256x192_infer 000000018491.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+- 注意,以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: [如何在Windows中使用FastDeploy C++ SDK](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/use_sdk_on_windows.md)
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 4. PP-TinyPose 模型串联 C++ 接口
+
+```c++
+fastdeploy::pipeline::PPTinyPose(
+ fastdeploy::vision::detection::PicoDet* det_model,
+ fastdeploy::vision::keypointdetection::PPTinyPose* pptinypose_model)
+```
+
+PPTinyPose Pipeline模型加载和初始化。det_model表示初始化后的检测模型,pptinypose_model表示初始化后的关键点检测模型。
+
+
+## 5. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../../)
+- [Python部署](../../python/det_keypoint_unite/)
+
+## 6. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc
new file mode 100755
index 0000000000000000000000000000000000000000..089213ff942fd126729f59464fcb38c7e5deeddf
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/det_keypoint_unite/det_keypoint_unite_infer.cc
@@ -0,0 +1,84 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#include "fastdeploy/pipeline.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void KunlunXinInfer(const std::string& det_model_dir,
+ const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto option = fastdeploy::RuntimeOption();
+ option.UseKunlunXin();
+ auto det_model_file = det_model_dir + sep + "model.pdmodel";
+ auto det_params_file = det_model_dir + sep + "model.pdiparams";
+ auto det_config_file = det_model_dir + sep + "infer_cfg.yml";
+ auto det_model = fastdeploy::vision::detection::PicoDet(
+ det_model_file, det_params_file, det_config_file, option);
+ if (!det_model.Initialized()) {
+ std::cerr << "Detection Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+
+ auto pipeline =
+ fastdeploy::pipeline::PPTinyPose(
+ &det_model, &tinypose_model);
+ pipeline.detection_model_score_threshold = 0.5;
+ if (!pipeline.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.2);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "TinyPose visualized result saved in ./vis_result.jpg"
+ << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 5) {
+ std::cout << "Usage: infer_demo path/to/detection_model_dir "
+ "path/to/pptinypose_model_dir path/to/image, "
+ "e.g ./infer_model ./picodet_model_dir ./pptinypose_model_dir "
+ "./test.jpeg 0"
+ << std::endl;
+ return -1;
+ }
+
+ KunlunXinInfer(argv[1], argv[2], argv[3]);
+ return 0;
+}
diff --git a/deploy/fastdeploy/kunlunxin/cpp/infer.cc b/deploy/fastdeploy/kunlunxin/cpp/infer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4f80bb4398646e176faae635b86887561af93644
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/infer.cc
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void KunlunXinInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseKunlunXin();
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ if (!model.Initialized()) {
+ std::cerr << "Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image, "
+ "e.g ./infer_demo ./model_dir ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+ KunlunXinInfer(argv[1], argv[2]);
+ return 0;
+}
diff --git a/deploy/fastdeploy/kunlunxin/cpp/pptinypose_infer.cc b/deploy/fastdeploy/kunlunxin/cpp/pptinypose_infer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..168d167d1c6ce1a1e21cdca8e52969bb8520dd1f
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/cpp/pptinypose_infer.cc
@@ -0,0 +1,65 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void KunlunXinInfer(const std::string& tinypose_model_dir,
+ const std::string& image_file) {
+ auto tinypose_model_file = tinypose_model_dir + sep + "model.pdmodel";
+ auto tinypose_params_file = tinypose_model_dir + sep + "model.pdiparams";
+ auto tinypose_config_file = tinypose_model_dir + sep + "infer_cfg.yml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseKunlunXin();
+ auto tinypose_model = fastdeploy::vision::keypointdetection::PPTinyPose(
+ tinypose_model_file, tinypose_params_file, tinypose_config_file, option);
+ if (!tinypose_model.Initialized()) {
+ std::cerr << "TinyPose Model Failed to initialize." << std::endl;
+ return;
+ }
+
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::KeyPointDetectionResult res;
+ if (!tinypose_model.Predict(&im, &res)) {
+ std::cerr << "TinyPose Prediction Failed." << std::endl;
+ return;
+ } else {
+ std::cout << "TinyPose Prediction Done!" << std::endl;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto tinypose_vis_im =
+ fastdeploy::vision::VisKeypointDetection(im, res, 0.5);
+ cv::imwrite("tinypose_vis_result.jpg", tinypose_vis_im);
+ std::cout << "TinyPose visualized result saved in ./tinypose_vis_result.jpg"
+ << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image, "
+ "e.g ./infer_demo ./model_dir ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+ KunlunXinInfer(argv[1], argv[2]);
+ return 0;
+}
diff --git a/deploy/fastdeploy/kunlunxin/python/README.md b/deploy/fastdeploy/kunlunxin/python/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..45ce3168a6187f37bdbbb9d6acf24eb2b0da874c
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/python/README.md
@@ -0,0 +1,117 @@
+[English](README.md) | 简体中文
+# PaddleDetection 昆仑芯 XPU Python部署示例
+
+本目录下提供`infer.py`快速完成PPYOLOE模型在昆仑芯 XPU上的加速部署的示例。
+
+## 1. 说明
+PaddleDetection支持利用FastDeploy在NVIDIA GPU、X86 CPU、飞腾CPU、ARM CPU、Intel GPU(独立显卡/集成显卡)硬件上快速部署PaddleDetection模型。FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+## 2. 部署环境准备
+在部署前,需自行编译基于昆仑XPU的FastDeploy python wheel包并安装,参考文档[昆仑芯XPU部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 3. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 4. 运行部署示例
+以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
+
+### 4.1 目标检测示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 运行部署示例
+# 昆仑芯推理
+python infer.py --model_dir ppyoloe_crn_l_300e_coco --image_file 000000014439.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+### 4.2 关键点检测示例
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/hrnet_demo.jpg
+
+# 运行部署示例
+python pptinypose_infer.py --model_dir PP_TinyPose_256x192_infer --image_file hrnet_demo.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+关于如何进行多人关键点检测,请参考[PPTinyPose Pipeline示例](./det_keypoint_unite/)
+
+
+## 5. 部署示例选项说明
+
+|参数|含义|默认值
+|---|---|---|
+|--model_dir|指定模型文件夹所在的路径|None|
+|--image_file|指定测试图片所在的路径|None|
+
+## 6. PaddleDetection Python接口
+FastDeploy目前支持的模型系列,包括但不限于`PPYOLOE`, `PicoDet`, `PaddleYOLOX`, `PPYOLO`, `FasterRCNN`,`SSD`,`PaddleYOLOv5`,`PaddleYOLOv6`,`PaddleYOLOv7`,`RTMDet`,`CascadeRCNN`,`PSSDet`,`RetinaNet`,`PPYOLOESOD`,`FCOS`,`TTFNet`,`TOOD`,`GFL`所有类名的构造函数和预测函数在参数上完全一致。所有模型的调用,只需要参考PPYOLOE的示例,即可快速调用。
+
+### 6.1 目标检测及实例分割模型
+```python
+fastdeploy.vision.detection.PPYOLOE(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PicoDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOX(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.YOLOv3(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PPYOLO(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.FasterRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.MaskRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.SSD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv5(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv6(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PaddleYOLOv7(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.RTMDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.CascadeRCNN(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PSSDet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.RetinaNet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.PPYOLOESOD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.FCOS(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.TTFNet(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.TOOD(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+fastdeploy.vision.detection.GFL(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+### 6.2 关键点检测模型
+```python
+fd.vision.keypointdetection.PPTinyPose(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
+```
+
+PaddleDetection模型加载和初始化,其中model_file, params_file为导出的Paddle部署模型格式, config_file为PaddleDetection同时导出的部署配置yaml文件
+
+## 7. 更多指南
+- [PaddleDetection Python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/object_detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [C++部署](../cpp)
+
+## 8. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
diff --git a/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/README.md b/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..3a7359f23f134e41b91fe38675c05658c29eae8d
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/README.md
@@ -0,0 +1,65 @@
+[English](README.md) | 简体中文
+# PP-PicoDet + PP-TinyPose (Pipeline) CPU-GPU Python部署示例
+
+本目录下提供`det_keypoint_unite_infer.py`快速完成多人模型配置 PP-PicoDet + PP-TinyPose 在CPU/GPU,以及GPU上通过TensorRT加速部署的`单图多人关键点检测`示例。执行如下脚本即可完成.**注意**: PP-TinyPose单模型独立部署,请参考[PP-TinyPose 单模型](../README.md)
+
+## 1. 部署环境准备
+在部署前,需确认软硬件环境,同时下载预编译部署库,参考[FastDeploy安装文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#FastDeploy预编译库安装)安装FastDeploy预编译库。
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../../README.md)或者[自行导出PaddleDetection部署模型](../../README.md)。
+
+## 3. 运行部署示例
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载PP-TinyPose模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_TinyPose_256x192_infer.tgz
+tar -xvf PP_TinyPose_256x192_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+tar -xvf PP_PicoDet_V2_S_Pedestrian_320x320_infer.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/000000018491.jpg
+
+# 运行部署示例
+python det_keypoint_unite_infer.py --tinypose_model_dir PP_TinyPose_256x192_infer --det_model_dir PP_PicoDet_V2_S_Pedestrian_320x320_infer --image_file 000000018491.jpg
+```
+
+运行完成可视化结果如下图所示
+
+

+
+
+- 关于如何通过FastDeploy使用更多不同的推理后端,以及如何使用不同的硬件,请参考文档:[如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+
+## 4. 部署示例选项说明
+
+|参数|含义|默认值
+|---|---|---|
+|--tinypose_model_dir|指定关键点模型文件夹所在的路径|None|
+|--det_model_dir|指定目标模型文件夹所在的路径|None|
+|--image_file|指定测试图片所在的路径|None|
+
+## 5. PPTinyPose 模型串联 Python接口
+
+```python
+fd.pipeline.PPTinyPose(det_model=None, pptinypose_model=None)
+```
+
+PPTinyPose Pipeline 模型加载和初始化,其中det_model是使用`fd.vision.detection.PicoDet`初始化的检测模型,pptinypose_model是使用`fd.vision.keypointdetection.PPTinyPose`初始化的关键点检测模型。
+
+## 6. 更多指南
+- [PaddleDetection Python API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/object_detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../../)
+- [C++部署](../../cpp)
+
+## 7. 常见问题
+- [如何切换模型推理后端引擎](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/how_to_change_backend.md)
+- [Intel GPU(独立显卡/集成显卡)的使用](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tutorials/intel_gpu/README.md)
+- [编译CPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/cpu.md)
+- [编译GPU部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/gpu.md)
+- [编译Jetson部署库](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/jetson.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/det_keypoint_unite_infer.py b/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/det_keypoint_unite_infer.py
new file mode 100755
index 0000000000000000000000000000000000000000..48e99b26fd869aeb14812be2b2914153659d44e1
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/python/det_keypoint_unite/det_keypoint_unite_infer.py
@@ -0,0 +1,67 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--tinypose_model_dir",
+ required=True,
+ help="path of paddletinypose model directory")
+ parser.add_argument(
+ "--det_model_dir", help="path of paddledetection model directory")
+ parser.add_argument(
+ "--image_file", required=True, help="path of test image file.")
+ return parser.parse_args()
+
+
+def build_picodet_option(args):
+ option = fd.RuntimeOption()
+ option.use_kunlunxin()
+ return option
+
+
+def build_tinypose_option(args):
+ option = fd.RuntimeOption()
+ option.use_kunlunxin()
+ return option
+
+
+args = parse_arguments()
+picodet_model_file = os.path.join(args.det_model_dir, "model.pdmodel")
+picodet_params_file = os.path.join(args.det_model_dir, "model.pdiparams")
+picodet_config_file = os.path.join(args.det_model_dir, "infer_cfg.yml")
+
+# setup runtime
+runtime_option = build_picodet_option(args)
+det_model = fd.vision.detection.PicoDet(
+ picodet_model_file,
+ picodet_params_file,
+ picodet_config_file,
+ runtime_option=runtime_option)
+
+tinypose_model_file = os.path.join(args.tinypose_model_dir, "model.pdmodel")
+tinypose_params_file = os.path.join(args.tinypose_model_dir, "model.pdiparams")
+tinypose_config_file = os.path.join(args.tinypose_model_dir, "infer_cfg.yml")
+# setup runtime
+runtime_option = build_tinypose_option(args)
+tinypose_model = fd.vision.keypointdetection.PPTinyPose(
+ tinypose_model_file,
+ tinypose_params_file,
+ tinypose_config_file,
+ runtime_option=runtime_option)
+
+# predict
+im = cv2.imread(args.image_file)
+pipeline = fd.pipeline.PPTinyPose(det_model, tinypose_model)
+pipeline.detection_model_score_threshold = 0.5
+pipeline_result = pipeline.predict(im)
+print("Paddle TinyPose Result:\n", pipeline_result)
+
+# visualize
+vis_im = fd.vision.vis_keypoint_detection(
+ im, pipeline_result, conf_threshold=0.2)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("TinyPose visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/kunlunxin/python/infer.py b/deploy/fastdeploy/kunlunxin/python/infer.py
new file mode 100755
index 0000000000000000000000000000000000000000..2916bd66836ba452108f3862429244b344d87e88
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/python/infer.py
@@ -0,0 +1,45 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir", required=True, help="Path of PaddleDetection model.")
+ parser.add_argument(
+ "--image_file", type=str, required=True, help="Path of test image file.")
+ return parser.parse_args()
+
+args = parse_arguments()
+
+runtime_option = fd.RuntimeOption()
+runtime_option.use_kunlunxin()
+
+if args.model_dir is None:
+ model_dir = fd.download_model(name='ppyoloe_crn_l_300e_coco')
+else:
+ model_dir = args.model_dir
+
+model_file = os.path.join(model_dir, "model.pdmodel")
+params_file = os.path.join(model_dir, "model.pdiparams")
+config_file = os.path.join(model_dir, "infer_cfg.yml")
+
+# settting for runtime
+model = fd.vision.detection.PPYOLOE(
+ model_file, params_file, config_file, runtime_option=runtime_option)
+
+# predict
+if args.image_file is None:
+ image_file = fd.utils.get_detection_test_image()
+else:
+ image_file = args.image_file
+im = cv2.imread(image_file)
+result = model.predict(im)
+print(result)
+
+# visualize
+vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("Visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/kunlunxin/python/pptinypose_infer.py b/deploy/fastdeploy/kunlunxin/python/pptinypose_infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f30f594b7fffa5e53ab8fe6bb1025b7355e5242e
--- /dev/null
+++ b/deploy/fastdeploy/kunlunxin/python/pptinypose_infer.py
@@ -0,0 +1,42 @@
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_dir",
+ required=True,
+ help="path of PP-TinyPose model directory")
+ parser.add_argument(
+ "--image_file", required=True, help="path of test image file.")
+ return parser.parse_args()
+
+
+args = parse_arguments()
+
+runtime_option = fd.RuntimeOption()
+runtime_option.use_kunlunxin()
+
+tinypose_model_file = os.path.join(args.model_dir, "model.pdmodel")
+tinypose_params_file = os.path.join(args.model_dir, "model.pdiparams")
+tinypose_config_file = os.path.join(args.model_dir, "infer_cfg.yml")
+# setup runtime
+tinypose_model = fd.vision.keypointdetection.PPTinyPose(
+ tinypose_model_file,
+ tinypose_params_file,
+ tinypose_config_file,
+ runtime_option=runtime_option)
+
+# predict
+im = cv2.imread(args.image_file)
+tinypose_result = tinypose_model.predict(im)
+print("Paddle TinyPose Result:\n", tinypose_result)
+
+# visualize
+vis_im = fd.vision.vis_keypoint_detection(
+ im, tinypose_result, conf_threshold=0.5)
+cv2.imwrite("visualized_result.jpg", vis_im)
+print("TinyPose visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/quantize/README.md b/deploy/fastdeploy/quantize/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..aff080ef644745519fefd916b3211230a1e1a603
--- /dev/null
+++ b/deploy/fastdeploy/quantize/README.md
@@ -0,0 +1,64 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection 量化模型部署-FastDeploy
+
+FastDeploy已支持部署量化模型,并提供一键模型自动化压缩的工具.
+用户可以使用一键模型自动化压缩工具,自行对模型量化后部署, 也可以直接下载FastDeploy提供的量化模型进行部署.
+
+## 1. FastDeploy一键模型自动化压缩工具
+
+FastDeploy 提供了一键模型自动化压缩工具, 能够简单地通过输入一个配置文件, 对模型进行量化.
+详细教程请见: [一键模型自动化压缩工具](https://github.com/PaddlePaddle/FastDeploy/tree/develop/tools/common_tools/auto_compression)。**注意**: 推理量化后的分类模型仍然需要FP32模型文件夹下的deploy.yaml文件, 自行量化的模型文件夹内不包含此yaml文件, 用户从FP32模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。
+
+## 2. 量化完成的PaddleDetection模型
+
+用户也可以直接下载下表中的量化模型进行部署.(点击模型名字即可下载)
+
+Benchmark表格说明:
+- Runtime时延为模型在各种Runtime上的推理时延,包含CPU->GPU数据拷贝,GPU推理,GPU->CPU数据拷贝时间. 不包含模型各自的前后处理时间.
+- 端到端时延为模型在实际推理场景中的时延, 包含模型的前后处理.
+- 所测时延均为推理1000次后求得的平均值, 单位是毫秒.
+- INT8 + FP16 为在推理INT8量化模型的同时, 给Runtime 开启FP16推理选项
+- INT8 + FP16 + PM, 为在推理INT8量化模型和开启FP16的同时, 开启使用Pinned Memory的选项,可加速GPU->CPU数据拷贝的速度
+- 最大加速比, 为FP32时延除以INT8推理的最快时延,得到最大加速比.
+- 策略为量化蒸馏训练时, 采用少量无标签数据集训练得到量化模型, 并在全量验证集上验证精度, INT8精度并不代表最高的INT8精度.
+- CPU为Intel(R) Xeon(R) Gold 6271C, 所有测试中固定CPU线程数为1. GPU为Tesla T4, TensorRT版本8.4.15.
+
+
+- Runtime Benchmark
+| 模型 |推理后端 |部署硬件 | FP32 Runtime时延 | INT8 Runtime时延 | INT8 + FP16 Runtime时延 | INT8+FP16+PM Runtime时延 | 最大加速比 | FP32 mAP | INT8 mAP | 量化方式 |
+| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar ) | TensorRT | GPU | 27.90 | 6.39 |6.44|5.95 | 4.67 | 51.4 | 50.7 | 量化蒸馏训练 |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar ) | Paddle-TensorRT | GPU | 30.89 |None | 13.78 |14.01 | 2.24 | 51.4 | 50.5 | 量化蒸馏训练 |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar) | ONNX Runtime | CPU | 1057.82 | 449.52 |None|None | 2.35 |51.4 | 50.0 |量化蒸馏训练 |
+
+NOTE:
+- TensorRT比Paddle-TensorRT快的原因是在runtime移除了multiclass_nms3算子
+
+- 端到端 Benchmark
+| 模型 |推理后端 |部署硬件 | FP32 End2End时延 | INT8 End2End时延 | INT8 + FP16 End2End时延 | INT8+FP16+PM End2End时延 | 最大加速比 | FP32 mAP | INT8 mAP | 量化方式 |
+| ------------------- | -----------------|-----------| -------- |-------- |-------- | --------- |-------- |----- |----- |----- |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar ) | TensorRT | GPU | 35.75 | 15.42 |20.70|20.85 | 2.32 | 51.4 | 50.7 | 量化蒸馏训练 |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar ) | Paddle-TensorRT | GPU | 33.48 |None | 18.47 |18.03 | 1.81 | 51.4 | 50.5 | 量化蒸馏训练 |
+| [ppyoloe_crn_l_300e_coco](https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco_qat.tar) | ONNX Runtime | CPU | 1067.17 | 461.037 |None|None | 2.31 |51.4 | 50.0 |量化蒸馏训练 |
+
+
+量化后模型的Benchmark比较,请参考[量化模型 Benchmark](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/quantize.md)
+
+## 3. 支持部署量化模型的硬件
+
+FastDeploy 量化模型部署的过程大致都与FP32模型类似,只是模型量化与非量化的区别,如果硬件在量化模型部署过程有特殊处理,也会在文档中特别标明,因此量化模型部署可以参考如下硬件的链接
+
+|硬件类型|该硬件是否支持|使用指南|Python|C++|
+|:---:|:---:|:---:|:---:|:---:|
+|X86 CPU|✅|[链接](cpu-gpu)|✅|✅|
+|NVIDIA GPU|✅|[链接](cpu-gpu)|✅|✅|
+|飞腾CPU|✅|[链接](cpu-gpu)|✅|✅|
+|ARM CPU|✅|[链接](cpu-gpu)|✅|✅|
+|Intel GPU(集成显卡)|✅|[链接](cpu-gpu)|✅|✅|
+|Intel GPU(独立显卡)|✅|[链接](cpu-gpu)|✅|✅|
+|昆仑|✅|[链接](kunlun)|✅|✅|
+|昇腾|✅|[链接](ascend)|✅|✅|
+|瑞芯微|✅|[链接](rockchip)|✅|✅|
+|晶晨|✅|[链接](amlogic)|--|✅|
+|算能|✅|[链接](sophgo)|✅|✅|
diff --git a/deploy/fastdeploy/rockchip/rknpu2/README.md b/deploy/fastdeploy/rockchip/rknpu2/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..18a19cc04eb1fffdb6fc48ca5cab753007abe793
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/README.md
@@ -0,0 +1,121 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection RKNPU2部署示例
+
+## 1. 说明
+RKNPU2 提供了一个高性能接口来访问 Rockchip NPU,支持如下硬件的部署
+- RK3566/RK3568
+- RK3588/RK3588S
+- RV1103/RV1106
+
+在RKNPU2上已经通过测试的PaddleDetection模型如下:
+
+- Picodet
+- PPYOLOE(int8)
+- YOLOV8
+
+如果你需要查看详细的速度信息,请查看[RKNPU2模型速度一览表](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
+
+## 2. 使用预导出的模型列表
+
+### ONNX模型转RKNN模型
+
+为了方便大家使用,我们提供了python脚本,通过我们预配置的config文件,你将能够快速地转换ONNX模型到RKNN模型
+
+```bash
+python tools/rknpu2/export.py --config_path tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml \
+ --target_platform rk3588
+```
+
+### RKNN模型列表
+
+为了方便大家测试,我们提供picodet和ppyoloe两个模型,解压后即可使用:
+
+| 模型名称 | 下载地址 |
+|-----------------------------|-----------------------------------------------------------------------------------|
+| picodet_s_416_coco_lcnet | https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/picodet_s_416_coco_lcnet.zip |
+| ppyoloe_plus_crn_s_80e_coco | https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/ppyoloe_plus_crn_s_80e_coco.zip |
+
+
+## 3. 自行导出PaddleDetection部署模型以及转换模型
+
+RKNPU部署模型前需要将Paddle模型转换成RKNN模型,具体步骤如下:
+
+* Paddle动态图模型转换为ONNX模型,请参考[PaddleDetection导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/EXPORT_MODEL.md)
+,注意在转换时请设置**export.nms=True**.
+* ONNX模型转换RKNN模型的过程,请参考[转换文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/export.md)进行转换。
+
+### 3.1 模型转换example
+
+#### 3.1.1 注意点
+
+PPDetection模型在RKNPU2上部署时要注意以下几点:
+
+* 模型导出需要包含Decode
+* 由于RKNPU2不支持NMS,因此输出节点必须裁剪至NMS之前
+* 由于RKNPU2 Div算子的限制,模型的输出节点需要裁剪至Div算子之前
+
+#### 3.1.2 Paddle模型转换为ONNX模型
+
+由于Rockchip提供的rknn-toolkit2工具暂时不支持Paddle模型直接导出为RKNN模型,因此需要先将Paddle模型导出为ONNX模型,再将ONNX模型转为RKNN模型。
+
+```bash
+# 以Picodet为例
+# 下载Paddle静态图模型并解压
+wget https://paddledet.bj.bcebos.com/deploy/Inference/picodet_s_416_coco_lcnet.tar
+tar xvf picodet_s_416_coco_lcnet.tar
+
+# 静态图转ONNX模型,注意,这里的save_file请和压缩包名对齐
+paddle2onnx --model_dir picodet_s_416_coco_lcnet \
+ --model_filename model.pdmodel \
+ --params_filename model.pdiparams \
+ --save_file picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
+ --enable_dev_version True
+
+# 固定shape
+python -m paddle2onnx.optimize --input_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
+ --output_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
+ --input_shape_dict "{'image':[1,3,416,416], 'scale_factor':[1,2]}"
+```
+
+#### 3.1.3 编写yaml文件
+
+**修改normalize参数**
+
+如果你需要在NPU上执行normalize操作,请根据你的模型配置normalize参数,例如:
+
+```yaml
+mean:
+ -
+ - 123.675
+ - 116.28
+ - 103.53
+std:
+ -
+ - 58.395
+ - 57.12
+ - 57.375
+```
+
+**修改outputs参数**
+
+由于Paddle2ONNX版本的不同,转换模型的输出节点名称也有所不同,请使用[Netron](https://netron.app)对模型进行可视化,并找到以下蓝色方框标记的NonMaxSuppression节点,红色方框的节点名称即为目标名称。
+
+## 4. 模型可视化
+例如,使用Netron可视化后,得到以下图片:
+
+
+
+找到蓝色方框标记的NonMaxSuppression节点,可以看到红色方框标记的两个节点名称为p2o.Div.79和p2o.Concat.9,因此需要修改outputs参数,修改后如下:
+
+```yaml
+outputs_nodes:
+ - 'p2o.Mul.179'
+ - 'p2o.Concat.9'
+```
+
+
+## 5. 详细的部署示例
+- [RKNN总体部署教程](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
+- [C++部署](cpp)
+- [Python部署](python)
diff --git a/deploy/fastdeploy/rockchip/rknpu2/cpp/CMakeLists.txt b/deploy/fastdeploy/rockchip/rknpu2/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a46b11f81383e6de7efc7117716d7079b57b34fc
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/cpp/CMakeLists.txt
@@ -0,0 +1,11 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 3.10)
+project(infer_demo)
+
+set(CMAKE_CXX_STANDARD 14)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeployConfig.cmake)
+include_directories(${FastDeploy_INCLUDE_DIRS})
+add_executable(infer_demo infer.cc)
+target_link_libraries(infer_demo ${FastDeploy_LIBS})
diff --git a/deploy/fastdeploy/rockchip/rknpu2/cpp/README.md b/deploy/fastdeploy/rockchip/rknpu2/cpp/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d67581fabd98663c73c6cd851caa4c756d78de11
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/cpp/README.md
@@ -0,0 +1,47 @@
+[English](README.md) | 简体中文
+# PaddleDetection RKNPU2 C++部署示例
+
+本目录下用于展示PaddleDetection系列模型在RKNPU2上的部署,以下的部署过程以PPYOLOE为例子。
+
+## 1. 部署环境准备
+在部署前,需确认以下两个步骤:
+
+1. 软硬件环境满足要求
+2. 根据开发环境,下载预编译部署库或者从头编译FastDeploy仓库
+
+以上步骤请参考[RK2代NPU部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)实现
+
+## 2. 部署模型准备
+
+模型转换代码请参考[模型转换文档](../README.md)
+
+## 3. 运行部署示例
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/rockchip/rknpu2/cpp
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 编译部署示例
+mkdir build && cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j8
+
+wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/ppyoloe_plus_crn_s_80e_coco.zip
+unzip ppyoloe_plus_crn_s_80e_coco.zip
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# 运行部署示例
+# CPU推理
+./infer_demo ./ppyoloe_plus_crn_s_80e_coco 000000014439.jpg 0
+# RKNPU2推理
+./infer_demo ./ppyoloe_plus_crn_s_80e_coco 000000014439.jpg 1
+```
+
+## 4. 更多指南
+RKNPU上对模型的输入要求是使用NHWC格式,且图片归一化操作会在转RKNN模型时,内嵌到模型中,因此我们在使用FastDeploy部署时,需要先调用DisableNormalizeAndPermute(C++)或`disable_normalize_and_permute(Python),在预处理阶段禁用归一化以及数据格式的转换。
+
+- [Python部署](../python)
+- [转换PaddleDetection RKNN模型文档](../README.md)
diff --git a/deploy/fastdeploy/rockchip/rknpu2/cpp/infer.cc b/deploy/fastdeploy/rockchip/rknpu2/cpp/infer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..79b2091f205b46cfa73f15cc2dd22f1403981e15
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/cpp/infer.cc
@@ -0,0 +1,96 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+
+void ONNXInfer(const std::string& model_dir, const std::string& image_file) {
+ std::string model_file = model_dir + "/ppyoloe_plus_crn_s_80e_coco.onnx";
+ std::string params_file;
+ std::string config_file = model_dir + "/infer_cfg.yml";
+ auto option = fastdeploy::RuntimeOption();
+ option.UseCpu();
+ auto format = fastdeploy::ModelFormat::ONNX;
+
+ auto model = fastdeploy::vision::detection::PPYOLOE(
+ model_file, params_file, config_file, option, format);
+
+ fastdeploy::TimeCounter tc;
+ tc.Start();
+ auto im = cv::imread(image_file);
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ tc.End();
+ tc.PrintInfo("PPDet in ONNX");
+
+ std::cout << res.Str() << std::endl;
+ cv::imwrite("infer_onnx.jpg", vis_im);
+ std::cout << "Visualized result saved in ./infer_onnx.jpg" << std::endl;
+}
+
+void RKNPU2Infer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file =
+ model_dir + "/ppyoloe_plus_crn_s_80e_coco_rk3588_quantized.rknn";
+ auto params_file = "";
+ auto config_file = model_dir + "/infer_cfg.yml";
+
+ auto option = fastdeploy::RuntimeOption();
+ option.UseRKNPU2();
+
+ auto format = fastdeploy::ModelFormat::RKNN;
+
+ auto model = fastdeploy::vision::detection::PPYOLOE(
+ model_file, params_file, config_file, option, format);
+
+ model.GetPreprocessor().DisablePermute();
+ model.GetPreprocessor().DisableNormalize();
+ model.GetPostprocessor().ApplyNMS();
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ fastdeploy::TimeCounter tc;
+ tc.Start();
+ if (!model.Predict(&im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+ tc.End();
+ tc.PrintInfo("PPDet in RKNPU2");
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("infer_rknpu2.jpg", vis_im);
+ std::cout << "Visualized result saved in ./infer_rknpu2.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 4) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image run_option, "
+ "e.g ./infer_demo ./model_dir ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+
+ if (std::atoi(argv[3]) == 0) {
+ ONNXInfer(argv[1], argv[2]);
+ } else if (std::atoi(argv[3]) == 1) {
+ RKNPU2Infer(argv[1], argv[2]);
+ }
+ return 0;
+}
diff --git a/deploy/fastdeploy/rockchip/rknpu2/python/README.md b/deploy/fastdeploy/rockchip/rknpu2/python/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d231cdaa305f5a194968e1c8ce597d7335a465dd
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/python/README.md
@@ -0,0 +1,41 @@
+[English](README.md) | 简体中文
+# PaddleDetection RKNPU2 Python部署示例
+
+本目录下用于展示PaddleDetection系列模型在RKNPU2上的部署,以下的部署过程以PPYOLOE为例子。
+
+## 1. 部署环境准备
+在部署前,需确认以下步骤
+
+- 1. 软硬件环境满足要求,RKNPU2环境部署等参考[FastDeploy环境要求](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/rknpu2/rknpu2.md)
+
+## 2. 部署模型准备
+
+模型转换代码请参考[模型转换文档](../README.md)
+
+## 3. 运行部署示例
+
+本目录下提供`infer.py`快速完成PPYOLOE在RKNPU上部署的示例。执行如下脚本即可完成
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/rockchip/rknpu2/python
+# 注意:如果当前分支找不到下面的fastdeploy测试代码,请切换到develop分支
+# git checkout develop
+
+# 下载图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/rknpu2/ppyoloe_plus_crn_s_80e_coco.zip
+unzip ppyoloe_plus_crn_s_80e_coco.zip
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# 运行部署示例
+python3 infer.py --model_file ./ppyoloe_plus_crn_s_80e_coco/ppyoloe_plus_crn_s_80e_coco_rk3588_quantized.rknn \
+ --config_file ./ppyoloe_plus_crn_s_80e_coco/infer_cfg.yml \
+ --image_file 000000014439.jpg
+```
+
+# 4. 更多指南
+RKNPU上对模型的输入要求是使用NHWC格式,且图片归一化操作会在转RKNN模型时,内嵌到模型中,因此我们在使用FastDeploy部署时,需要先调用DisableNormalizeAndPermute(C++)或`disable_normalize_and_permute(Python),在预处理阶段禁用归一化以及数据格式的转换。
+
+- [C++部署](../cpp)
+- [转换PaddleDetection RKNN模型文档](../README.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/rockchip/rknpu2/python/infer.py b/deploy/fastdeploy/rockchip/rknpu2/python/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5ac057ba08b5a17305a725ef69a1c4203be1e84
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rknpu2/python/infer.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_file",
+ default="./ppyoloe_plus_crn_s_80e_coco/ppyoloe_plus_crn_s_80e_coco_rk3588_quantized.rknn",
+ help="Path of rknn model.")
+ parser.add_argument(
+ "--config_file",
+ default="./ppyoloe_plus_crn_s_80e_coco/infer_cfg.yml",
+ help="Path of config.")
+ parser.add_argument(
+ "--image_file",
+ type=str,
+ default="./000000014439.jpg",
+ help="Path of test image file.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_arguments()
+
+ model_file = args.model_file
+ params_file = ""
+ config_file = args.config_file
+
+ # setup runtime
+ runtime_option = fd.RuntimeOption()
+ runtime_option.use_rknpu2()
+
+ model = fd.vision.detection.PPYOLOE(
+ model_file,
+ params_file,
+ config_file,
+ runtime_option=runtime_option,
+ model_format=fd.ModelFormat.RKNN)
+ model.preprocessor.disable_normalize()
+ model.preprocessor.disable_permute()
+ model.postprocessor.apply_nms()
+
+ # predict
+ im = cv2.imread(args.image_file)
+ result = model.predict(im)
+ print(result)
+
+ # visualize
+ vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+ cv2.imwrite("visualized_result.jpg", vis_im)
+ print("Visualized result save in ./visualized_result.jpg")
diff --git a/deploy/fastdeploy/rockchip/rv1126/README.md b/deploy/fastdeploy/rockchip/rv1126/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..02b41153274431cdaad15080f1ad37d07d05b2e8
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rv1126/README.md
@@ -0,0 +1,17 @@
+[English](README.md) | 简体中文
+
+# PaddleDetection 检测模型在瑞芯微NPU上的部署方案-FastDeploy
+
+## 1. 说明
+本示例基于RV1126来介绍如何使用FastDeploy部署PaddleDetection模型,支持如下芯片的部署:
+- Rockchip RV1109
+- Rockchip RV1126
+- Rockchip RK1808
+
+模型的量化和量化模型的下载请参考:[模型量化](../../quantize/README.md)
+
+## 详细部署文档
+
+在 RV1126 上只支持 C++ 的部署。
+
+- [C++部署](cpp)
diff --git a/deploy/fastdeploy/rockchip/rv1126/cpp/CMakeLists.txt b/deploy/fastdeploy/rockchip/rv1126/cpp/CMakeLists.txt
new file mode 100755
index 0000000000000000000000000000000000000000..af493f6b67d2135a94c06590248bd6f28d364a54
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rv1126/cpp/CMakeLists.txt
@@ -0,0 +1,27 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+include_directories(${FastDeploy_INCLUDE_DIRS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
+
+set(CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR}/build/install)
+
+install(TARGETS infer_demo DESTINATION ./)
+
+install(DIRECTORY models DESTINATION ./)
+install(DIRECTORY images DESTINATION ./)
+
+file(GLOB_RECURSE FASTDEPLOY_LIBS ${FASTDEPLOY_INSTALL_DIR}/lib/lib*.so*)
+file(GLOB_RECURSE ALL_LIBS ${FASTDEPLOY_INSTALL_DIR}/third_libs/install/lib*.so*)
+list(APPEND ALL_LIBS ${FASTDEPLOY_LIBS})
+install(PROGRAMS ${ALL_LIBS} DESTINATION lib)
+
+file(GLOB ADB_TOOLS run_with_adb.sh)
+install(PROGRAMS ${ADB_TOOLS} DESTINATION ./)
diff --git a/deploy/fastdeploy/rockchip/rv1126/cpp/README.md b/deploy/fastdeploy/rockchip/rv1126/cpp/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..811b8a1029cdf6325d528b46a04e655411cc0488
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rv1126/cpp/README.md
@@ -0,0 +1,64 @@
+[English](README.md) | 简体中文
+# PaddleDetection 量化模型 RV1126 C++ 部署示例
+
+本目录下提供的 `infer.cc`,可以帮助用户快速完成 PP-YOLOE 量化模型在 RV1126 上的部署推理加速。
+
+## 1. 部署环境准备
+### 1.1 FastDeploy 交叉编译环境准备
+软硬件环境满足要求,以及交叉编译环境的准备,请参考:[瑞芯微RV1126部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#自行编译安装)
+
+## 2. 部署模型准备
+1. 用户可以直接使用由 FastDeploy 提供的量化模型进行部署。
+2. 用户可以先使用 PaddleDetection 自行导出 Float32 模型,注意导出模型模型时设置参数:use_shared_conv=False,更多细节请参考:[PP-YOLOE](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe)
+3. 用户可以使用 FastDeploy 提供的[一键模型自动化压缩工具](https://github.com/PaddlePaddle/FastDeploy/blob/develop/tools/common_tools/auto_compression/),自行进行模型量化, 并使用产出的量化模型进行部署。(注意: 推理量化后的检测模型仍然需要FP32模型文件夹下的 infer_cfg.yml 文件,自行量化的模型文件夹内不包含此 yaml 文件,用户从 FP32 模型文件夹下复制此yaml文件到量化后的模型文件夹内即可。)
+4. 模型需要异构计算,异构计算文件可以参考:[异构计算](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/faq/heterogeneous_computing_on_timvx_npu.md),由于 FastDeploy 已经提供了模型,可以先测试我们提供的异构文件,验证精度是否符合要求。
+
+更多量化相关相关信息可查阅[模型量化](../../../quantize/README.md)
+
+## 3. 运行部署示例
+请按照以下步骤完成在 RV1126 上部署 PP-YOLOE 量化模型:
+1. 交叉编译编译 FastDeploy 库,具体请参考:[交叉编译 FastDeploy](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/rv1126.md)
+
+2. 将编译后的库拷贝到当前目录,可使用如下命令:
+```bash
+cp -r FastDeploy/build/fastdeploy-timvx/ PaddleDetection/deploy/fastdeploy/rockchip/rv1126/cpp
+```
+
+3. 在当前路径下载部署所需的模型和示例图片:
+```bash
+cd PaddleDetection/deploy/fastdeploy/rockchip/rv1126/cpp
+mkdir models && mkdir images
+wget https://bj.bcebos.com/fastdeploy/models/ppyoloe_noshare_qat.tar.gz
+tar -xvf ppyoloe_noshare_qat.tar.gz
+cp -r ppyoloe_noshare_qat models
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+cp -r 000000014439.jpg images
+```
+
+4. 编译部署示例,可使入如下命令:
+```bash
+cd PaddleDetection/deploy/fastdeploy/rockchip/rv1126/cpp
+mkdir build && cd build
+cmake -DCMAKE_TOOLCHAIN_FILE=${PWD}/../fastdeploy-timvx/toolchain.cmake -DFASTDEPLOY_INSTALL_DIR=${PWD}/../fastdeploy-timvx -DTARGET_ABI=armhf ..
+make -j8
+make install
+# 成功编译之后,会生成 install 文件夹,里面有一个运行 demo 和部署所需的库
+```
+
+5. 基于 adb 工具部署 PP-YOLOE 检测模型到 Rockchip RV1126,可使用如下命令:
+```bash
+# 进入 install 目录
+cd PaddleDetection/deploy/fastdeploy/rockchip/rv1126/cpp/build/install/
+# 如下命令表示:bash run_with_adb.sh 需要运行的demo 模型路径 图片路径 设备的DEVICE_ID
+bash run_with_adb.sh infer_demo ppyoloe_noshare_qat 000000014439.jpg $DEVICE_ID
+```
+
+部署成功后运行结果如下:
+
+
+
+需要特别注意的是,在 RV1126 上部署的模型需要是量化后的模型,模型的量化请参考:[模型量化](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/quantize.md)
+
+## 4. 更多指南
+- [PaddleDetection C++ API文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/namespacefastdeploy_1_1vision_1_1detection.html)
+- [FastDeploy部署PaddleDetection模型概览](../../)
diff --git a/deploy/fastdeploy/rockchip/rv1126/cpp/infer.cc b/deploy/fastdeploy/rockchip/rv1126/cpp/infer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d4a69b49238cbeaed2b2078430ff616ce3c305d1
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rv1126/cpp/infer.cc
@@ -0,0 +1,66 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "fastdeploy/vision.h"
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+void InitAndInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + sep + "model.pdmodel";
+ auto params_file = model_dir + sep + "model.pdiparams";
+ auto config_file = model_dir + sep + "infer_cfg.yml";
+ auto subgraph_file = model_dir + sep + "subgraph.txt";
+ fastdeploy::vision::EnableFlyCV();
+ fastdeploy::RuntimeOption option;
+ option.UseTimVX();
+ option.paddle_lite_option.nnadapter_subgraph_partition_config_path =
+ subgraph_file
+
+ auto model = fastdeploy::vision::detection::PPYOLOE(model_file, params_file,
+ config_file, option);
+ assert(model.Initialized());
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("vis_result.jpg", vis_im);
+ std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
+
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout << "Usage: infer_demo path/to/quant_model "
+ "path/to/image "
+ "e.g ./infer_demo ./PPYOLOE_L_quant ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+
+ std::string model_dir = argv[1];
+ std::string test_image = argv[2];
+ InitAndInfer(model_dir, test_image);
+ return 0;
+}
diff --git a/deploy/fastdeploy/rockchip/rv1126/cpp/run_with_adb.sh b/deploy/fastdeploy/rockchip/rv1126/cpp/run_with_adb.sh
new file mode 100755
index 0000000000000000000000000000000000000000..aacaed4c516a42eb4e74aec733a5cb41294f38b4
--- /dev/null
+++ b/deploy/fastdeploy/rockchip/rv1126/cpp/run_with_adb.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+HOST_SPACE=${PWD}
+echo ${HOST_SPACE}
+WORK_SPACE=/data/local/tmp/test
+
+# The first parameter represents the demo name
+DEMO_NAME=image_classification_demo
+if [ -n "$1" ]; then
+ DEMO_NAME=$1
+fi
+
+# The second parameter represents the model name
+MODEL_NAME=mobilenet_v1_fp32_224
+if [ -n "$2" ]; then
+ MODEL_NAME=$2
+fi
+
+# The third parameter indicates the name of the image to be tested
+IMAGE_NAME=0001.jpg
+if [ -n "$3" ]; then
+ IMAGE_NAME=$3
+fi
+
+# The fourth parameter represents the ID of the device
+ADB_DEVICE_NAME=
+if [ -n "$4" ]; then
+ ADB_DEVICE_NAME="-s $4"
+fi
+
+# Set the environment variables required during the running process
+EXPORT_ENVIRONMENT_VARIABLES="export GLOG_v=5; export VIV_VX_ENABLE_GRAPH_TRANSFORM=-pcq:1; export VIV_VX_SET_PER_CHANNEL_ENTROPY=100; export TIMVX_BATCHNORM_FUSION_MAX_ALLOWED_QUANT_SCALE_DEVIATION=300000; export VSI_NN_LOG_LEVEL=5;"
+
+EXPORT_ENVIRONMENT_VARIABLES="${EXPORT_ENVIRONMENT_VARIABLES}export LD_LIBRARY_PATH=${WORK_SPACE}/lib:\$LD_LIBRARY_PATH;"
+
+# Please install adb, and DON'T run this in the docker.
+set -e
+adb $ADB_DEVICE_NAME shell "rm -rf $WORK_SPACE"
+adb $ADB_DEVICE_NAME shell "mkdir -p $WORK_SPACE"
+
+# Upload the demo, librarys, model and test images to the device
+adb $ADB_DEVICE_NAME push ${HOST_SPACE}/lib $WORK_SPACE
+adb $ADB_DEVICE_NAME push ${HOST_SPACE}/${DEMO_NAME} $WORK_SPACE
+adb $ADB_DEVICE_NAME push models $WORK_SPACE
+adb $ADB_DEVICE_NAME push images $WORK_SPACE
+
+# Execute the deployment demo
+adb $ADB_DEVICE_NAME shell "cd $WORK_SPACE; ${EXPORT_ENVIRONMENT_VARIABLES} chmod +x ./${DEMO_NAME}; ./${DEMO_NAME} ./models/${MODEL_NAME} ./images/$IMAGE_NAME"
diff --git a/deploy/fastdeploy/serving/README.md b/deploy/fastdeploy/serving/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..8e6bbc0631bfd14e9a6b50347f828c8da65272ea
--- /dev/null
+++ b/deploy/fastdeploy/serving/README.md
@@ -0,0 +1,111 @@
+[English](README.md) | 简体中文
+# PaddleDetection 服务化部署示例
+
+本文档以PP-YOLOE模型(ppyoloe_crn_l_300e_coco)为例,进行详细介绍。其他PaddleDetection模型都已支持服务化部署,只需将下述命令中的模型和配置名字修改成要部署模型的名字。
+
+PaddleDetection模型导出和预训练模型下载请看[PaddleDetection模型部署](../README.md)文档。
+
+## 1. 部署环境准备
+在服务化部署前,需确认
+
+- 1. 服务化镜像的软硬件环境要求和镜像拉取命令请参考[FastDeploy服务化部署](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/README_CN.md)
+
+
+## 2. 启动服务
+
+```bash
+#下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/serving
+
+#下载PPYOLOE模型文件和测试图片
+wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+tar xvf ppyoloe_crn_l_300e_coco.tgz
+
+# 将配置文件放入预处理目录
+mv ppyoloe_crn_l_300e_coco/infer_cfg.yml models/preprocess/1/
+
+# 将模型放入 models/runtime/1目录下, 并重命名为model.pdmodel和model.pdiparams
+mv ppyoloe_crn_l_300e_coco/model.pdmodel models/runtime/1/model.pdmodel
+mv ppyoloe_crn_l_300e_coco/model.pdiparams models/runtime/1/model.pdiparams
+
+# 将ppdet和runtime中的ppyoloe配置文件重命名成标准的config名字
+# 其他模型比如faster_rcc就将faster_rcnn_config.pbtxt重命名为config.pbtxt
+cp models/ppdet/ppyoloe_config.pbtxt models/ppdet/config.pbtxt
+cp models/runtime/ppyoloe_runtime_config.pbtxt models/runtime/config.pbtxt
+
+# 注意: 由于mask_rcnn模型多一个输出,需要将后处理目录(models/postprocess)中的mask_config.pbtxt重命名为config.pbtxt
+
+# 拉取fastdeploy镜像(x.y.z为镜像版本号,需替换成fastdeploy版本数字)
+# GPU镜像
+docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
+# CPU镜像
+docker pull paddlepaddle/fastdeploy:z.y.z-cpu-only-21.10
+
+# 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /serving 目录
+nvidia-docker run -it --net=host --name fd_serving --shm-size="1g" -v `pwd`/:/serving registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 bash
+
+# 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量,会拥有所有GPU卡的调度权限)
+CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models
+```
+>> **注意**:
+
+>> 由于mask_rcnn模型多一个输出,部署mask_rcnn需要将后处理目录(models/postprocess)中的mask_config.pbtxt重命名为config.pbtxt
+
+>> 拉取镜像请看[服务化部署主文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/README_CN.md)
+
+>> 执行fastdeployserver启动服务出现"Address already in use", 请使用`--grpc-port`指定grpc端口号来启动服务,同时更改客户端示例中的请求端口号.
+
+>> 其他启动参数可以使用 fastdeployserver --help 查看
+
+服务启动成功后, 会有以下输出:
+```
+......
+I0928 04:51:15.784517 206 grpc_server.cc:4117] Started GRPCInferenceService at 0.0.0.0:8001
+I0928 04:51:15.785177 206 http_server.cc:2815] Started HTTPService at 0.0.0.0:8000
+I0928 04:51:15.826578 206 http_server.cc:167] Started Metrics Service at 0.0.0.0:8002
+```
+
+
+## 3. 客户端请求
+
+在物理机器中执行以下命令,发送grpc请求并输出结果
+```
+#下载测试图片
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+#安装客户端依赖
+python3 -m pip install tritonclient[all]
+
+# 发送请求
+python3 paddledet_grpc_client.py
+```
+
+发送请求成功后,会返回json格式的检测结果并打印输出:
+```
+output_name: DET_RESULT
+[[159.93016052246094, 82.35527038574219, 199.8546600341797, 164.68682861328125],
+... ...,
+[60.200584411621094, 123.73260498046875, 108.83859252929688, 169.07467651367188]]
+```
+
+## 4. 配置修改
+
+当前默认配置在GPU上运行Paddle引擎, 如果要在CPU或其他推理引擎上运行。 需要修改`models/runtime/config.pbtxt`中配置,详情请参考[配置文档](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/docs/zh_CN/model_configuration.md)
+
+
+## 5. 使用VisualDL进行可视化部署
+
+可以使用VisualDL进行[Serving可视化部署](https://github.com/PaddlePaddle/FastDeploy/blob/develop/serving/docs/zh_CN/vdl_management.md),上述启动服务、配置修改以及客户端请求的操作都可以基于VisualDL进行。
+
+通过VisualDL的可视化界面对PaddleDetection进行服务化部署只需要如下三步:
+```text
+1. 载入模型库:./vision/detection/paddledetection/serving/models
+2. 下载模型资源文件:点击preprocess模型,点击版本号1添加预训练模型,选择检测模型ppyoloe_crn_l_300e_coco进行下载,此时preprocess中将会有资源文件infer_cfg.yml。点击runtime模型,点击版本号1添加预训练模型,选择检测模型ppyoloe_crn_l_300e_coco进行下载,此时runtime中将会有资源文件model.pdmodel和model.pdiparams。
+3. 设置启动配置文件:点击ensemble配置按钮,选择配置文件ppyoloe_config.pbtxt,并设为启动配置文件。点击runtime模型,选择配置文件ppyoloe_runtime_config.pbtxt,并设为启动配置文件。
+4. 启动服务:点击启动服务按钮,输入启动参数。
+```
+
+
+
diff --git a/deploy/fastdeploy/serving/models/postprocess/1/model.py b/deploy/fastdeploy/serving/models/postprocess/1/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..35054e516576e0c274d8875f65a184ebb1669d97
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/postprocess/1/model.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import numpy as np
+import time
+
+import fastdeploy as fd
+
+# triton_python_backend_utils is available in every Triton Python model. You
+# need to use this module to create inference requests and responses. It also
+# contains some utility functions for extracting information from model_config
+# and converting Triton input/output types to numpy types.
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+ """Your Python model must use the same class name. Every Python model
+ that is created must have "TritonPythonModel" as the class name.
+ """
+
+ def initialize(self, args):
+ """`initialize` is called only once when the model is being loaded.
+ Implementing `initialize` function is optional. This function allows
+ the model to intialize any state associated with this model.
+ Parameters
+ ----------
+ args : dict
+ Both keys and values are strings. The dictionary keys and values are:
+ * model_config: A JSON string containing the model configuration
+ * model_instance_kind: A string containing model instance kind
+ * model_instance_device_id: A string containing model instance device ID
+ * model_repository: Model repository path
+ * model_version: Model version
+ * model_name: Model name
+ """
+ # You must parse model_config. JSON string is not parsed here
+ self.model_config = json.loads(args['model_config'])
+ print("model_config:", self.model_config)
+
+ self.input_names = []
+ for input_config in self.model_config["input"]:
+ self.input_names.append(input_config["name"])
+ print("postprocess input names:", self.input_names)
+
+ self.output_names = []
+ self.output_dtype = []
+ for output_config in self.model_config["output"]:
+ self.output_names.append(output_config["name"])
+ dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
+ self.output_dtype.append(dtype)
+ print("postprocess output names:", self.output_names)
+
+ self.postprocess_ = fd.vision.detection.PaddleDetPostprocessor()
+
+ def execute(self, requests):
+ """`execute` must be implemented in every Python model. `execute`
+ function receives a list of pb_utils.InferenceRequest as the only
+ argument. This function is called when an inference is requested
+ for this model. Depending on the batching configuration (e.g. Dynamic
+ Batching) used, `requests` may contain multiple requests. Every
+ Python model, must create one pb_utils.InferenceResponse for every
+ pb_utils.InferenceRequest in `requests`. If there is an error, you can
+ set the error argument when creating a pb_utils.InferenceResponse.
+ Parameters
+ ----------
+ requests : list
+ A list of pb_utils.InferenceRequest
+ Returns
+ -------
+ list
+ A list of pb_utils.InferenceResponse. The length of this list must
+ be the same as `requests`
+ """
+ responses = []
+ for request in requests:
+ infer_outputs = []
+ for name in self.input_names:
+ infer_output = pb_utils.get_input_tensor_by_name(request, name)
+ if infer_output:
+ infer_output = infer_output.as_numpy()
+ infer_outputs.append(infer_output)
+
+ results = self.postprocess_.run(infer_outputs)
+ r_str = fd.vision.utils.fd_result_to_json(results)
+
+ r_np = np.array(r_str, dtype=np.object_)
+ out_tensor = pb_utils.Tensor(self.output_names[0], r_np)
+ inference_response = pb_utils.InferenceResponse(
+ output_tensors=[out_tensor, ])
+ responses.append(inference_response)
+ return responses
+
+ def finalize(self):
+ """`finalize` is called only once when the model is being unloaded.
+ Implementing `finalize` function is optional. This function allows
+ the model to perform any necessary clean ups before exit.
+ """
+ print('Cleaning up...')
diff --git a/deploy/fastdeploy/serving/models/postprocess/config.pbtxt b/deploy/fastdeploy/serving/models/postprocess/config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..bb09e32c6d776e04734ff350772072e9badfb3a3
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/postprocess/config.pbtxt
@@ -0,0 +1,30 @@
+name: "postprocess"
+backend: "python"
+
+input [
+ {
+ name: "post_input1"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "post_input2"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ }
+]
+
+output [
+ {
+ name: "post_output"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+
+instance_group [
+ {
+ count: 1
+ kind: KIND_CPU
+ }
+]
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/postprocess/mask_config.pbtxt b/deploy/fastdeploy/serving/models/postprocess/mask_config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..8985cc78a2499965f6651ae2c7d8759c4ba1a1a0
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/postprocess/mask_config.pbtxt
@@ -0,0 +1,34 @@
+backend: "python"
+
+input [
+ {
+ name: "post_input1"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "post_input2"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ },
+ {
+ name: "post_input3"
+ data_type: TYPE_INT32
+ dims: [ -1, -1, -1 ]
+ }
+]
+
+output [
+ {
+ name: "post_output"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+
+instance_group [
+ {
+ count: 1
+ kind: KIND_CPU
+ }
+]
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/ppdet/1/README.md b/deploy/fastdeploy/serving/models/ppdet/1/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..877efdf8de744c2ad1aa0a864e7fc2b5009e67d0
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/ppdet/1/README.md
@@ -0,0 +1,3 @@
+# PaddleDetection Pipeline
+
+The pipeline directory does not have model files, but a version number directory needs to be maintained.
diff --git a/deploy/fastdeploy/serving/models/ppdet/faster_rcnn_config.pbtxt b/deploy/fastdeploy/serving/models/ppdet/faster_rcnn_config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..91d132b9adccca7d9f2776fee68d4aa5830c4c03
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/ppdet/faster_rcnn_config.pbtxt
@@ -0,0 +1,80 @@
+platform: "ensemble"
+
+input [
+ {
+ name: "INPUT"
+ data_type: TYPE_UINT8
+ dims: [ -1, -1, -1, 3 ]
+ }
+]
+output [
+ {
+ name: "DET_RESULT"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+ensemble_scheduling {
+ step [
+ {
+ model_name: "preprocess"
+ model_version: 1
+ input_map {
+ key: "preprocess_input"
+ value: "INPUT"
+ }
+ output_map {
+ key: "preprocess_output1"
+ value: "RUNTIME_INPUT1"
+ }
+ output_map {
+ key: "preprocess_output2"
+ value: "RUNTIME_INPUT2"
+ }
+ output_map {
+ key: "preprocess_output3"
+ value: "RUNTIME_INPUT3"
+ }
+ },
+ {
+ model_name: "runtime"
+ model_version: 1
+ input_map {
+ key: "image"
+ value: "RUNTIME_INPUT1"
+ }
+ input_map {
+ key: "scale_factor"
+ value: "RUNTIME_INPUT2"
+ }
+ input_map {
+ key: "im_shape"
+ value: "RUNTIME_INPUT3"
+ }
+ output_map {
+ key: "concat_12.tmp_0"
+ value: "RUNTIME_OUTPUT1"
+ }
+ output_map {
+ key: "concat_8.tmp_0"
+ value: "RUNTIME_OUTPUT2"
+ }
+ },
+ {
+ model_name: "postprocess"
+ model_version: 1
+ input_map {
+ key: "post_input1"
+ value: "RUNTIME_OUTPUT1"
+ }
+ input_map {
+ key: "post_input2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ output_map {
+ key: "post_output"
+ value: "DET_RESULT"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/ppdet/mask_rcnn_config.pbtxt b/deploy/fastdeploy/serving/models/ppdet/mask_rcnn_config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..b0ee4e092af57d50eb8ed00debeb6ba7c85a2055
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/ppdet/mask_rcnn_config.pbtxt
@@ -0,0 +1,88 @@
+platform: "ensemble"
+
+input [
+ {
+ name: "INPUT"
+ data_type: TYPE_UINT8
+ dims: [ -1, -1, -1, 3 ]
+ }
+]
+output [
+ {
+ name: "DET_RESULT"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+ensemble_scheduling {
+ step [
+ {
+ model_name: "preprocess"
+ model_version: 1
+ input_map {
+ key: "preprocess_input"
+ value: "INPUT"
+ }
+ output_map {
+ key: "preprocess_output1"
+ value: "RUNTIME_INPUT1"
+ }
+ output_map {
+ key: "preprocess_output2"
+ value: "RUNTIME_INPUT2"
+ }
+ output_map {
+ key: "preprocess_output3"
+ value: "RUNTIME_INPUT3"
+ }
+ },
+ {
+ model_name: "runtime"
+ model_version: 1
+ input_map {
+ key: "image"
+ value: "RUNTIME_INPUT1"
+ }
+ input_map {
+ key: "scale_factor"
+ value: "RUNTIME_INPUT2"
+ }
+ input_map {
+ key: "im_shape"
+ value: "RUNTIME_INPUT3"
+ }
+ output_map {
+ key: "concat_9.tmp_0"
+ value: "RUNTIME_OUTPUT1"
+ }
+ output_map {
+ key: "concat_5.tmp_0"
+ value: "RUNTIME_OUTPUT2"
+ },
+ output_map {
+ key: "tmp_109"
+ value: "RUNTIME_OUTPUT3"
+ }
+ },
+ {
+ model_name: "postprocess"
+ model_version: 1
+ input_map {
+ key: "post_input1"
+ value: "RUNTIME_OUTPUT1"
+ }
+ input_map {
+ key: "post_input2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ input_map {
+ key: "post_input3"
+ value: "RUNTIME_OUTPUT3"
+ }
+ output_map {
+ key: "post_output"
+ value: "DET_RESULT"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/ppdet/ppyolo_config.pbtxt b/deploy/fastdeploy/serving/models/ppdet/ppyolo_config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..f7c1fe6121b01680b969c739711aff738901df60
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/ppdet/ppyolo_config.pbtxt
@@ -0,0 +1,80 @@
+platform: "ensemble"
+
+input [
+ {
+ name: "INPUT"
+ data_type: TYPE_UINT8
+ dims: [ -1, -1, -1, 3 ]
+ }
+]
+output [
+ {
+ name: "DET_RESULT"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+ensemble_scheduling {
+ step [
+ {
+ model_name: "preprocess"
+ model_version: 1
+ input_map {
+ key: "preprocess_input"
+ value: "INPUT"
+ }
+ output_map {
+ key: "preprocess_output1"
+ value: "RUNTIME_INPUT1"
+ }
+ output_map {
+ key: "preprocess_output2"
+ value: "RUNTIME_INPUT2"
+ }
+ output_map {
+ key: "preprocess_output3"
+ value: "RUNTIME_INPUT3"
+ }
+ },
+ {
+ model_name: "runtime"
+ model_version: 1
+ input_map {
+ key: "image"
+ value: "RUNTIME_INPUT1"
+ }
+ input_map {
+ key: "scale_factor"
+ value: "RUNTIME_INPUT2"
+ }
+ input_map {
+ key: "im_shape"
+ value: "RUNTIME_INPUT3"
+ }
+ output_map {
+ key: "matrix_nms_0.tmp_0"
+ value: "RUNTIME_OUTPUT1"
+ }
+ output_map {
+ key: "matrix_nms_0.tmp_2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ },
+ {
+ model_name: "postprocess"
+ model_version: 1
+ input_map {
+ key: "post_input1"
+ value: "RUNTIME_OUTPUT1"
+ }
+ input_map {
+ key: "post_input2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ output_map {
+ key: "post_output"
+ value: "DET_RESULT"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/ppdet/ppyoloe_config.pbtxt b/deploy/fastdeploy/serving/models/ppdet/ppyoloe_config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..3cb479b46f5464f5fe8d5ed743c643fefd651626
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/ppdet/ppyoloe_config.pbtxt
@@ -0,0 +1,72 @@
+platform: "ensemble"
+
+input [
+ {
+ name: "INPUT"
+ data_type: TYPE_UINT8
+ dims: [ -1, -1, -1, 3 ]
+ }
+]
+output [
+ {
+ name: "DET_RESULT"
+ data_type: TYPE_STRING
+ dims: [ -1 ]
+ }
+]
+ensemble_scheduling {
+ step [
+ {
+ model_name: "preprocess"
+ model_version: 1
+ input_map {
+ key: "preprocess_input"
+ value: "INPUT"
+ }
+ output_map {
+ key: "preprocess_output1"
+ value: "RUNTIME_INPUT1"
+ }
+ output_map {
+ key: "preprocess_output2"
+ value: "RUNTIME_INPUT2"
+ }
+ },
+ {
+ model_name: "runtime"
+ model_version: 1
+ input_map {
+ key: "image"
+ value: "RUNTIME_INPUT1"
+ }
+ input_map {
+ key: "scale_factor"
+ value: "RUNTIME_INPUT2"
+ }
+ output_map {
+ key: "multiclass_nms3_0.tmp_0"
+ value: "RUNTIME_OUTPUT1"
+ }
+ output_map {
+ key: "multiclass_nms3_0.tmp_2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ },
+ {
+ model_name: "postprocess"
+ model_version: 1
+ input_map {
+ key: "post_input1"
+ value: "RUNTIME_OUTPUT1"
+ }
+ input_map {
+ key: "post_input2"
+ value: "RUNTIME_OUTPUT2"
+ }
+ output_map {
+ key: "post_output"
+ value: "DET_RESULT"
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/preprocess/1/model.py b/deploy/fastdeploy/serving/models/preprocess/1/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ea72054de3695e9c6a0bbdaa5ccdcd03c62888a
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/preprocess/1/model.py
@@ -0,0 +1,114 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import numpy as np
+import os
+
+import fastdeploy as fd
+
+# triton_python_backend_utils is available in every Triton Python model. You
+# need to use this module to create inference requests and responses. It also
+# contains some utility functions for extracting information from model_config
+# and converting Triton input/output types to numpy types.
+import triton_python_backend_utils as pb_utils
+
+
+class TritonPythonModel:
+ """Your Python model must use the same class name. Every Python model
+ that is created must have "TritonPythonModel" as the class name.
+ """
+
+ def initialize(self, args):
+ """`initialize` is called only once when the model is being loaded.
+ Implementing `initialize` function is optional. This function allows
+ the model to intialize any state associated with this model.
+ Parameters
+ ----------
+ args : dict
+ Both keys and values are strings. The dictionary keys and values are:
+ * model_config: A JSON string containing the model configuration
+ * model_instance_kind: A string containing model instance kind
+ * model_instance_device_id: A string containing model instance device ID
+ * model_repository: Model repository path
+ * model_version: Model version
+ * model_name: Model name
+ """
+ # You must parse model_config. JSON string is not parsed here
+ self.model_config = json.loads(args['model_config'])
+ print("model_config:", self.model_config)
+
+ self.input_names = []
+ for input_config in self.model_config["input"]:
+ self.input_names.append(input_config["name"])
+ print("preprocess input names:", self.input_names)
+
+ self.output_names = []
+ self.output_dtype = []
+ for output_config in self.model_config["output"]:
+ self.output_names.append(output_config["name"])
+ # dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
+ # self.output_dtype.append(dtype)
+ self.output_dtype.append(output_config["data_type"])
+ print("preprocess output names:", self.output_names)
+
+ # init PaddleClasPreprocess class
+ yaml_path = os.path.abspath(os.path.dirname(
+ __file__)) + "/infer_cfg.yml"
+ self.preprocess_ = fd.vision.detection.PaddleDetPreprocessor(yaml_path)
+
+ def execute(self, requests):
+ """`execute` must be implemented in every Python model. `execute`
+ function receives a list of pb_utils.InferenceRequest as the only
+ argument. This function is called when an inference is requested
+ for this model. Depending on the batching configuration (e.g. Dynamic
+ Batching) used, `requests` may contain multiple requests. Every
+ Python model, must create one pb_utils.InferenceResponse for every
+ pb_utils.InferenceRequest in `requests`. If there is an error, you can
+ set the error argument when creating a pb_utils.InferenceResponse.
+ Parameters
+ ----------
+ requests : list
+ A list of pb_utils.InferenceRequest
+ Returns
+ -------
+ list
+ A list of pb_utils.InferenceResponse. The length of this list must
+ be the same as `requests`
+ """
+ responses = []
+ for request in requests:
+ data = pb_utils.get_input_tensor_by_name(request,
+ self.input_names[0])
+ data = data.as_numpy()
+ outputs = self.preprocess_.run(data)
+
+ output_tensors = []
+ for idx, name in enumerate(self.output_names):
+ dlpack_tensor = outputs[idx].to_dlpack()
+ output_tensor = pb_utils.Tensor.from_dlpack(name,
+ dlpack_tensor)
+ output_tensors.append(output_tensor)
+
+ inference_response = pb_utils.InferenceResponse(
+ output_tensors=output_tensors)
+ responses.append(inference_response)
+ return responses
+
+ def finalize(self):
+ """`finalize` is called only once when the model is being unloaded.
+ Implementing `finalize` function is optional. This function allows
+ the model to perform any necessary clean ups before exit.
+ """
+ print('Cleaning up...')
diff --git a/deploy/fastdeploy/serving/models/preprocess/config.pbtxt b/deploy/fastdeploy/serving/models/preprocess/config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..39a42113bcedbd0b17be5b0ac16322a458ff2ce2
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/preprocess/config.pbtxt
@@ -0,0 +1,35 @@
+name: "preprocess"
+backend: "python"
+
+input [
+ {
+ name: "preprocess_input"
+ data_type: TYPE_UINT8
+ dims: [ -1, -1, -1, 3 ]
+ }
+]
+
+output [
+ {
+ name: "preprocess_output1"
+ data_type: TYPE_FP32
+ dims: [ -1, 3, -1, -1 ]
+ },
+ {
+ name: "preprocess_output2"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ },
+ {
+ name: "preprocess_output3"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ }
+]
+
+instance_group [
+ {
+ count: 1
+ kind: KIND_CPU
+ }
+]
\ No newline at end of file
diff --git a/deploy/fastdeploy/serving/models/runtime/1/README.md b/deploy/fastdeploy/serving/models/runtime/1/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e5d914b439adef6008af354c729e0e43befbebd
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/runtime/1/README.md
@@ -0,0 +1,5 @@
+# Runtime Directory
+
+This directory holds the model files.
+Paddle models must be model.pdmodel and model.pdiparams files.
+ONNX models must be model.onnx files.
diff --git a/deploy/fastdeploy/serving/models/runtime/faster_rcnn_runtime_config.pbtxt b/deploy/fastdeploy/serving/models/runtime/faster_rcnn_runtime_config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..9f4b9833e82011e6f9cf9ee2e472244cb5020333
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/runtime/faster_rcnn_runtime_config.pbtxt
@@ -0,0 +1,58 @@
+backend: "fastdeploy"
+
+# Input configuration of the model
+input [
+ {
+ # input name
+ name: "image"
+ # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
+ data_type: TYPE_FP32
+ # input shape, The batch dimension is omitted and the actual shape is [batch, c, h, w]
+ dims: [ -1, 3, -1, -1 ]
+ },
+ {
+ name: "scale_factor"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ },
+ {
+ name: "im_shape"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ }
+]
+
+# The output of the model is configured in the same format as the input
+output [
+ {
+ name: "concat_12.tmp_0"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "concat_8.tmp_0"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ }
+]
+
+# Number of instances of the model
+instance_group [
+ {
+ # The number of instances is 1
+ count: 1
+ # Use GPU, CPU inference option is:KIND_CPU
+ kind: KIND_GPU
+ # The instance is deployed on the 0th GPU card
+ gpus: [0]
+ }
+]
+
+optimization {
+ execution_accelerators {
+ gpu_execution_accelerator : [ {
+ # use Paddle engine
+ name: "paddle",
+ }
+ ]
+}}
diff --git a/deploy/fastdeploy/serving/models/runtime/mask_rcnn_runtime_config.pbtxt b/deploy/fastdeploy/serving/models/runtime/mask_rcnn_runtime_config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..13fdd5b41ded918c5c3bc60f724046630a1b4d30
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/runtime/mask_rcnn_runtime_config.pbtxt
@@ -0,0 +1,63 @@
+backend: "fastdeploy"
+
+# Input configuration of the model
+input [
+ {
+ # input name
+ name: "image"
+ # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
+ data_type: TYPE_FP32
+ # input shape, The batch dimension is omitted and the actual shape is [batch, c, h, w]
+ dims: [ -1, 3, -1, -1 ]
+ },
+ {
+ name: "scale_factor"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ },
+ {
+ name: "im_shape"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ }
+]
+
+# The output of the model is configured in the same format as the input
+output [
+ {
+ name: "concat_9.tmp_0"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "concat_5.tmp_0"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ },
+ {
+ name: "tmp_109"
+ data_type: TYPE_INT32
+ dims: [ -1, -1, -1 ]
+ }
+]
+
+# Number of instances of the model
+instance_group [
+ {
+ # The number of instances is 1
+ count: 1
+ # Use GPU, CPU inference option is:KIND_CPU
+ kind: KIND_GPU
+ # The instance is deployed on the 0th GPU card
+ gpus: [0]
+ }
+]
+
+optimization {
+ execution_accelerators {
+ gpu_execution_accelerator : [ {
+ # use Paddle engine
+ name: "paddle",
+ }
+ ]
+}}
diff --git a/deploy/fastdeploy/serving/models/runtime/ppyolo_runtime_config.pbtxt b/deploy/fastdeploy/serving/models/runtime/ppyolo_runtime_config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..0f7b6330846da874c60942a0a08694dceca0c713
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/runtime/ppyolo_runtime_config.pbtxt
@@ -0,0 +1,58 @@
+backend: "fastdeploy"
+
+# Input configuration of the model
+input [
+ {
+ # input name
+ name: "image"
+ # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
+ data_type: TYPE_FP32
+ # input shape, The batch dimension is omitted and the actual shape is [batch, c, h, w]
+ dims: [ -1, 3, -1, -1 ]
+ },
+ {
+ name: "scale_factor"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ },
+ {
+ name: "im_shape"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ }
+]
+
+# The output of the model is configured in the same format as the input
+output [
+ {
+ name: "matrix_nms_0.tmp_0"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "matrix_nms_0.tmp_2"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ }
+]
+
+# Number of instances of the model
+instance_group [
+ {
+ # The number of instances is 1
+ count: 1
+ # Use GPU, CPU inference option is:KIND_CPU
+ kind: KIND_GPU
+ # The instance is deployed on the 0th GPU card
+ gpus: [0]
+ }
+]
+
+optimization {
+ execution_accelerators {
+ gpu_execution_accelerator : [ {
+ # use Paddle engine
+ name: "paddle",
+ }
+ ]
+}}
diff --git a/deploy/fastdeploy/serving/models/runtime/ppyoloe_runtime_config.pbtxt b/deploy/fastdeploy/serving/models/runtime/ppyoloe_runtime_config.pbtxt
new file mode 100644
index 0000000000000000000000000000000000000000..dc8d15845ce48619f8c4959bbad55916d239f5ee
--- /dev/null
+++ b/deploy/fastdeploy/serving/models/runtime/ppyoloe_runtime_config.pbtxt
@@ -0,0 +1,55 @@
+# optional, If name is specified it must match the name of the model repository directory containing the model.
+name: "runtime"
+backend: "fastdeploy"
+
+# Input configuration of the model
+input [
+ {
+ # input name
+ name: "image"
+ # input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
+ data_type: TYPE_FP32
+ # input shape, The batch dimension is omitted and the actual shape is [batch, c, h, w]
+ dims: [ -1, 3, -1, -1 ]
+ },
+ {
+ name: "scale_factor"
+ data_type: TYPE_FP32
+ dims: [ -1, 2 ]
+ }
+]
+
+# The output of the model is configured in the same format as the input
+output [
+ {
+ name: "multiclass_nms3_0.tmp_0"
+ data_type: TYPE_FP32
+ dims: [ -1, 6 ]
+ },
+ {
+ name: "multiclass_nms3_0.tmp_2"
+ data_type: TYPE_INT32
+ dims: [ -1 ]
+ }
+]
+
+# Number of instances of the model
+instance_group [
+ {
+ # The number of instances is 1
+ count: 1
+ # Use GPU, CPU inference option is:KIND_CPU
+ kind: KIND_GPU
+ # The instance is deployed on the 0th GPU card
+ gpus: [0]
+ }
+]
+
+optimization {
+ execution_accelerators {
+ gpu_execution_accelerator : [ {
+ # use Paddle engine
+ name: "paddle",
+ }
+ ]
+}}
diff --git a/deploy/fastdeploy/serving/paddledet_grpc_client.py b/deploy/fastdeploy/serving/paddledet_grpc_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..84223949678ccebfcd5a433704cae242ae7336cb
--- /dev/null
+++ b/deploy/fastdeploy/serving/paddledet_grpc_client.py
@@ -0,0 +1,109 @@
+import logging
+import numpy as np
+import time
+from typing import Optional
+import cv2
+import json
+
+from tritonclient import utils as client_utils
+from tritonclient.grpc import InferenceServerClient, InferInput, InferRequestedOutput, service_pb2_grpc, service_pb2
+
+LOGGER = logging.getLogger("run_inference_on_triton")
+
+
+class SyncGRPCTritonRunner:
+ DEFAULT_MAX_RESP_WAIT_S = 120
+
+ def __init__(
+ self,
+ server_url: str,
+ model_name: str,
+ model_version: str,
+ *,
+ verbose=False,
+ resp_wait_s: Optional[float]=None, ):
+ self._server_url = server_url
+ self._model_name = model_name
+ self._model_version = model_version
+ self._verbose = verbose
+ self._response_wait_t = self.DEFAULT_MAX_RESP_WAIT_S if resp_wait_s is None else resp_wait_s
+
+ self._client = InferenceServerClient(
+ self._server_url, verbose=self._verbose)
+ error = self._verify_triton_state(self._client)
+ if error:
+ raise RuntimeError(
+ f"Could not communicate to Triton Server: {error}")
+
+ LOGGER.debug(
+ f"Triton server {self._server_url} and model {self._model_name}:{self._model_version} "
+ f"are up and ready!")
+
+ model_config = self._client.get_model_config(self._model_name,
+ self._model_version)
+ model_metadata = self._client.get_model_metadata(self._model_name,
+ self._model_version)
+ LOGGER.info(f"Model config {model_config}")
+ LOGGER.info(f"Model metadata {model_metadata}")
+
+ for tm in model_metadata.inputs:
+ print("tm:", tm)
+ self._inputs = {tm.name: tm for tm in model_metadata.inputs}
+ self._input_names = list(self._inputs)
+ self._outputs = {tm.name: tm for tm in model_metadata.outputs}
+ self._output_names = list(self._outputs)
+ self._outputs_req = [
+ InferRequestedOutput(name) for name in self._outputs
+ ]
+
+ def Run(self, inputs):
+ """
+ Args:
+ inputs: list, Each value corresponds to an input name of self._input_names
+ Returns:
+ results: dict, {name : numpy.array}
+ """
+ infer_inputs = []
+ for idx, data in enumerate(inputs):
+ infer_input = InferInput(self._input_names[idx], data.shape,
+ "UINT8")
+ infer_input.set_data_from_numpy(data)
+ infer_inputs.append(infer_input)
+
+ results = self._client.infer(
+ model_name=self._model_name,
+ model_version=self._model_version,
+ inputs=infer_inputs,
+ outputs=self._outputs_req,
+ client_timeout=self._response_wait_t, )
+ results = {name: results.as_numpy(name) for name in self._output_names}
+ return results
+
+ def _verify_triton_state(self, triton_client):
+ if not triton_client.is_server_live():
+ return f"Triton server {self._server_url} is not live"
+ elif not triton_client.is_server_ready():
+ return f"Triton server {self._server_url} is not ready"
+ elif not triton_client.is_model_ready(self._model_name,
+ self._model_version):
+ return f"Model {self._model_name}:{self._model_version} is not ready"
+ return None
+
+
+if __name__ == "__main__":
+ model_name = "ppdet"
+ model_version = "1"
+ url = "localhost:8001"
+ runner = SyncGRPCTritonRunner(url, model_name, model_version)
+ im = cv2.imread("000000014439.jpg")
+ im = np.array([im, ])
+ # batch input
+ # im = np.array([im, im, im])
+ for i in range(1):
+ result = runner.Run([im, ])
+ for name, values in result.items():
+ print("output_name:", name)
+ # values is batch
+ for value in values:
+ value = json.loads(value)
+ print(value['boxes'])
diff --git a/deploy/fastdeploy/sophgo/README.md b/deploy/fastdeploy/sophgo/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1da9a4b2247ed47f8fbbd1d4f35a84aa50043b46
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/README.md
@@ -0,0 +1,108 @@
+# PaddleDetection SOPHGO部署示例
+
+## 1. 支持模型列表
+
+目前SOPHGO支持如下模型的部署
+- [PP-YOLOE系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/ppyoloe)
+- [PicoDet系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4/configs/picodet)
+- [YOLOV8系列模型](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.4)
+
+## 2. 准备PP-YOLOE YOLOV8或者PicoDet部署模型以及转换模型
+
+SOPHGO-TPU部署模型前需要将Paddle模型转换成bmodel模型,具体步骤如下:
+- Paddle动态图模型转换为ONNX模型,请参考[PaddleDetection导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.4/deploy/EXPORT_MODEL.md).
+- ONNX模型转换bmodel模型的过程,请参考[TPU-MLIR](https://github.com/sophgo/tpu-mlir)
+
+## 3. 模型转换example
+
+PP-YOLOE YOLOV8和PicoDet模型转换过程类似,下面以ppyoloe_crn_s_300e_coco为例子,教大家如何转换Paddle模型到SOPHGO-TPU模型
+
+### 导出ONNX模型
+```shell
+#导出paddle模型
+python tools/export_model.py -c configs/ppyoloe/ppyoloe_crn_s_300e_coco.yml --output_dir=output_inference -o weights=https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_300e_coco.pdparams
+
+#paddle模型转ONNX模型
+paddle2onnx --model_dir ppyoloe_crn_s_300e_coco \
+ --model_filename model.pdmodel \
+ --params_filename model.pdiparams \
+ --save_file ppyoloe_crn_s_300e_coco.onnx \
+ --enable_dev_version True
+
+#进入Paddle2ONNX文件夹,固定ONNX模型shape
+python -m paddle2onnx.optimize --input_model ppyoloe_crn_s_300e_coco.onnx \
+ --output_model ppyoloe_crn_s_300e_coco.onnx \
+ --input_shape_dict "{'image':[1,3,640,640]}"
+
+```
+### 导出bmodel模型
+
+以转化BM1684x的bmodel模型为例子,我们需要下载[TPU-MLIR](https://github.com/sophgo/tpu-mlir)工程,安装过程具体参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。
+## 4. 安装
+``` shell
+docker pull sophgo/tpuc_dev:latest
+
+# myname1234是一个示例,也可以设置其他名字
+docker run --privileged --name myname1234 -v $PWD:/workspace -it sophgo/tpuc_dev:latest
+
+source ./envsetup.sh
+./build.sh
+```
+
+## 5. ONNX模型转换为bmodel模型
+``` shell
+mkdir ppyoloe_crn_s_300e_coco && cd ppyoloe_crn_s_300e_coco
+
+# 下载测试图片,并将图片转换为npz格式
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+#使用python获得模型转换所需要的npz文件
+im = cv2.imread(im)
+im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+#[640 640]为ppyoloe_crn_s_300e_coco的输入大小
+im_scale_y = 640 / float(im.shape[0])
+im_scale_x = 640 / float(im.shape[1])
+inputs = {}
+inputs['image'] = np.array((im, )).astype('float32')
+inputs['scale_factor'] = np.array([im_scale_y, im_scale_x]).astype('float32')
+np.savez('inputs.npz', image = inputs['image'], scale_factor = inputs['scale_factor'])
+
+#放入onnx模型文件ppyoloe_crn_s_300e_coco.onnx
+
+mkdir workspace && cd workspace
+
+# 将ONNX模型转换为mlir模型
+model_transform.py \
+ --model_name ppyoloe_crn_s_300e_coco \
+ --model_def ../ppyoloe_crn_s_300e_coco.onnx \
+ --input_shapes [[1,3,640,640],[1,2]] \
+ --keep_aspect_ratio \
+ --pixel_format rgb \
+ --output_names p2o.Div.1,p2o.Concat.29 \
+ --test_input ../inputs.npz \
+ --test_result ppyoloe_crn_s_300e_coco_top_outputs.npz \
+ --mlir ppyoloe_crn_s_300e_coco.mlir
+```
+## 6. 注意
+**由于TPU-MLIR当前不支持后处理算法,所以需要查看后处理的输入作为网络的输出**
+具体方法为:output_names需要通过[NETRO](https://netron.app/) 查看,网页中打开需要转换的ONNX模型,搜索NonMaxSuppression节点
+查看INPUTS中boxes和scores的名字,这个两个名字就是我们所需的output_names
+例如使用Netron可视化后,可以得到如下图片
+
+找到蓝色方框标记的NonMaxSuppression节点,可以看到红色方框标记的两个节点名称为p2o.Div.1,p2o.Concat.29
+
+``` bash
+# 将mlir模型转换为BM1684x的F32 bmodel模型
+model_deploy.py \
+ --mlir ppyoloe_crn_s_300e_coco.mlir \
+ --quantize F32 \
+ --chip bm1684x \
+ --test_input ppyoloe_crn_s_300e_coco_in_f32.npz \
+ --test_reference ppyoloe_crn_s_300e_coco_top_outputs.npz \
+ --model ppyoloe_crn_s_300e_coco_1684x_f32.bmodel
+```
+最终获得可以在BM1684x上能够运行的bmodel模型ppyoloe_crn_s_300e_coco_1684x_f32.bmodel。如果需要进一步对模型进行加速,可以将ONNX模型转换为INT8 bmodel,具体步骤参见[TPU-MLIR文档](https://github.com/sophgo/tpu-mlir/blob/master/README.md)。
+
+## 7. 详细的部署示例
+- [Cpp部署](./cpp)
+- [python部署](./python)
diff --git a/deploy/fastdeploy/sophgo/cpp/CMakeLists.txt b/deploy/fastdeploy/sophgo/cpp/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f27493372848517dcbd50200a83f2907ec2b1c57
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
+PROJECT(infer_demo C CXX)
+CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
+option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
+
+set(ENABLE_LITE_BACKEND OFF)
+#set(FDLIB ${FASTDEPLOY_INSTALL_DIR})
+
+include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+include_directories(${FASTDEPLOY_INCS})
+include_directories(${FastDeploy_INCLUDE_DIRS})
+
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
diff --git a/deploy/fastdeploy/sophgo/cpp/README.md b/deploy/fastdeploy/sophgo/cpp/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..44179ddffe69dab01eb8468d00e510608b3696f6
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/cpp/README.md
@@ -0,0 +1,57 @@
+# PaddleDetection 算能 C++部署示例
+
+本目录下提供`infer.cc`,`快速完成 PP-YOLOE ,在SOPHGO BM1684x板子上加速部署的示例。PP-YOLOV8和 PicoDet的部署逻辑类似,只需要切换模型即可。
+
+## 1. 部署环境准备
+在部署前,需自行编译基于算能硬件的预测库,参考文档[算能硬件部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#算能硬件部署环境)
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+## 3. 生成基本目录文件
+
+该例程由以下几个部分组成
+```text
+.
+├── CMakeLists.txt
+├── fastdeploy-sophgo # 编译文件夹
+├── image # 存放图片的文件夹
+├── infer.cc
+└── model # 存放模型文件的文件夹
+```
+
+## 4. 运行部署示例
+
+### 4.1 编译并拷贝SDK到thirdpartys文件夹
+
+请参考[SOPHGO部署库编译](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install/sophgo.md)仓库编译SDK,编译完成后,将在build目录下生成fastdeploy-sophgo目录.
+
+### 4.2 拷贝模型文件,以及配置文件至model文件夹
+将Paddle模型转换为SOPHGO bmodel模型,转换步骤参考[文档](../README.md)
+将转换后的SOPHGO bmodel模型文件拷贝至model中
+
+### 4.3 准备测试图片至image文件夹
+```bash
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+cp 000000014439.jpg ./images
+```
+
+### 4.4 编译example
+
+```bash
+cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-sophgo
+make
+```
+
+## 4.5 运行例程
+
+```bash
+#ppyoloe推理示例
+./infer_demo model images/000000014439.jpg
+```
+
+## 5. 更多指南
+- [FastDeploy部署PaddleDetection模型概览](../../)
+- [Python部署](../python)
+- [模型转换](../README.md)
\ No newline at end of file
diff --git a/deploy/fastdeploy/sophgo/cpp/infer.cc b/deploy/fastdeploy/sophgo/cpp/infer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6ee6aeb1c448dd5818a262812f91fb6d129145e0
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/cpp/infer.cc
@@ -0,0 +1,60 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include
+
+#include
+#include
+
+#include "fastdeploy/vision.h"
+
+void SophgoInfer(const std::string& model_dir, const std::string& image_file) {
+ auto model_file = model_dir + "/ppyoloe_crn_s_300e_coco_1684x_f32.bmodel";
+ auto params_file = "";
+ auto config_file = model_dir + "/infer_cfg.yml";
+
+ auto option = fastdeploy::RuntimeOption();
+ option.UseSophgo();
+
+ auto format = fastdeploy::ModelFormat::SOPHGO;
+
+ auto model = fastdeploy::vision::detection::PPYOLOE(
+ model_file, params_file, config_file, option, format);
+
+ model.GetPostprocessor().ApplyNMS();
+
+ auto im = cv::imread(image_file);
+
+ fastdeploy::vision::DetectionResult res;
+ if (!model.Predict(&im, &res)) {
+ std::cerr << "Failed to predict." << std::endl;
+ return;
+ }
+
+ std::cout << res.Str() << std::endl;
+ auto vis_im = fastdeploy::vision::VisDetection(im, res, 0.5);
+ cv::imwrite("infer_sophgo.jpg", vis_im);
+ std::cout << "Visualized result saved in ./infer_sophgo.jpg" << std::endl;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 3) {
+ std::cout
+ << "Usage: infer_demo path/to/model_dir path/to/image, "
+ "e.g ./infer_demo ./model_dir ./test.jpeg"
+ << std::endl;
+ return -1;
+ }
+ SophgoInfer(argv[1], argv[2]);
+ return 0;
+}
diff --git a/deploy/fastdeploy/sophgo/python/README.md b/deploy/fastdeploy/sophgo/python/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e8a1f59833aa49ebe58846792db8ecf6ef045544
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/python/README.md
@@ -0,0 +1,30 @@
+# PaddleDetection Python部署示例
+
+## 1. 部署环境准备
+
+在部署前,需自行编译基于算能硬件的FastDeploy python wheel包并安装,参考文档[算能硬件部署环境](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/cn/build_and_install#算能硬件部署环境)
+
+本目录下提供`infer.py`, 快速完成 PP-YOLOE ,在SOPHGO TPU上部署的示例,执行如下脚本即可完成。PP-YOLOV8和 PicoDet的部署逻辑类似,只需要切换模型即可。
+
+## 2. 部署模型准备
+在部署前,请准备好您所需要运行的推理模型,你可以选择使用[预导出的推理模型](../README.md)或者[自行导出PaddleDetection部署模型](../README.md)。
+
+```bash
+# 下载部署示例代码
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection/deploy/fastdeploy/sophgo/python
+
+# 下载图片
+wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
+
+# 推理
+#ppyoloe推理示例
+python3 infer.py --model_file model/ppyoloe_crn_s_300e_coco_1684x_f32.bmodel --config_file model/infer_cfg.yml --image_file ./000000014439.jpg
+
+# 运行完成后返回结果如下所示
+可视化结果存储在sophgo_result.jpg中
+```
+
+## 3. 更多指南
+- [C++部署](../cpp)
+- [转换PP-YOLOE SOPHGO模型文档](../README.md)
diff --git a/deploy/fastdeploy/sophgo/python/infer.py b/deploy/fastdeploy/sophgo/python/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..f10418f9c4701c8fca64e57129cdf0450ce4b32b
--- /dev/null
+++ b/deploy/fastdeploy/sophgo/python/infer.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import fastdeploy as fd
+import cv2
+import os
+
+
+def parse_arguments():
+ import argparse
+ import ast
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--model_file", required=True, help="Path of sophgo model.")
+ parser.add_argument("--config_file", required=True, help="Path of config.")
+ parser.add_argument(
+ "--image_file", type=str, required=True, help="Path of test image file.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_arguments()
+
+ model_file = args.model_file
+ params_file = ""
+ config_file = args.config_file
+
+ # setup runtime
+ runtime_option = fd.RuntimeOption()
+ runtime_option.use_sophgo()
+
+ model = fd.vision.detection.PPYOLOE(
+ model_file,
+ params_file,
+ config_file,
+ runtime_option=runtime_option,
+ model_format=fd.ModelFormat.SOPHGO)
+
+ model.postprocessor.apply_nms()
+
+ # predict
+ im = cv2.imread(args.image_file)
+ result = model.predict(im)
+ print(result)
+
+ # visualize
+ vis_im = fd.vision.vis_detection(im, result, score_threshold=0.5)
+ cv2.imwrite("sophgo_result.jpg", vis_im)
+ print("Visualized result save in ./sophgo_result.jpg")