diff --git a/docs/zh_CN/PULC/PULC_vehicle_attribute.md b/docs/zh_CN/PULC/PULC_vehicle_attribute.md
index 35b731f324236f4b9bcade4074c4a7afd21b9e8e..03f67321fd04e1e33be0f7829da8bfce1c2be0a8 100644
--- a/docs/zh_CN/PULC/PULC_vehicle_attribute.md
+++ b/docs/zh_CN/PULC/PULC_vehicle_attribute.md
@@ -58,7 +58,7 @@
 从表中可以看出，backbone 为 Res2Net200_vd_26w_4s 时精度较高，但是推理速度较慢。将 backbone 替换为轻量级模型 MobileNetV3_small_x0_35 后，速度可以大幅提升，但是精度下降明显。将 backbone 替换为 PPLCNet_x1_0 时，精度提升 2 个百分点，同时速度也提升 23% 左右。在此基础上，使用 SSLD 预训练模型后，在不改变推理速度的前提下，精度可以提升约 0.5 个百分点，进一步地，当融合EDA策略后，精度可以再提升 0.52 个百分点，最后，在使用 SKL-UGI 知识蒸馏后，精度可以继续提升 0.23 个百分点。此时，PPLCNet_x1_0 的精度与 Res2Net200_vd_26w_4s 仅相差 0.55 个百分点，但是速度快 32 倍。关于 PULC 的训练方法和推理部署方法将在下面详细介绍。
 
 **备注：**
-    
+
 * 延时是基于 Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz 测试得到，开启 MKLDNN 加速策略，线程数为10。
 * 关于PP-LCNet的介绍可以参考[PP-LCNet介绍](../models/PP-LCNet.md)，相关论文可以查阅[PP-LCNet paper](https://arxiv.org/abs/2109.15099)。
 
@@ -178,7 +178,7 @@ from xml.dom.minidom import parse
 
 vehicleids = []
 
-def convert_annotation(input_fp, output_fp):
+def convert_annotation(input_fp, output_fp, subdir):
     in_file = open(input_fp)
     list_file = open(output_fp, 'w')
     tree = parse(in_file)
@@ -201,12 +201,12 @@ def convert_annotation(input_fp, output_fp):
             typeid = int (item.getAttribute("typeID"))
             label[typeid+9] = '1'
         label = ','.join(label)
-        list_file.write(os.path.join('image_train', name)  + "\t" + label + "\n")
+        list_file.write(os.path.join(subdir, name)  + "\t" + label + "\n")
 
     list_file.close()
 
-convert_annotation('train_label.xml', 'train_list.txt')  #imagename vehiclenum colorid typeid
-convert_annotation('test_label.xml', 'test_list.txt')
+convert_annotation('train_label.xml', 'train_list.txt', 'image_train')  #imagename vehiclenum colorid typeid
+convert_annotation('test_label.xml', 'test_list.txt', 'image_test')
 ```
 
 执行上述命令后，`VeRi`目录中具有以下数据：
diff --git a/test_tipc/docs/test_inference_cpp.md b/test_tipc/docs/test_inference_cpp.md
index db1e27d9a8c19d5879b3eedf78f823514e9fa367..5432ea454fb55c4a8d121af3f7136090ac38f23f 100644
--- a/test_tipc/docs/test_inference_cpp.md
+++ b/test_tipc/docs/test_inference_cpp.md
@@ -248,20 +248,20 @@ bash test_tipc/prepare.sh test_tipc/config/ResNet/ResNet50_linux_gpu_normal_norm
 测试方法如下所示，希望测试不同的模型文件，只需更换为自己的参数配置文件，即可完成对应模型的测试。
 
 ```shell
-bash test_tipc/test_inference_cpp.sh ${your_params_file}
+bash test_tipc/test_inference_cpp.sh ${your_params_file} cpp_infer
 ```
 
 以`ResNet50`的`Linux GPU/CPU C++推理测试`为例，命令如下所示。
 
 ```shell
-bash test_tipc/test_inference_cpp.sh test_tipc/config/ResNet/ResNet50_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt
+bash test_tipc/test_inference_cpp.sh test_tipc/config/ResNet/ResNet50_linux_gpu_normal_normal_infer_cpp_linux_gpu_cpu.txt cpp_infer
 ```
 
 输出结果如下，表示命令运行成功。
 
 ```shell
-Run successfully with command - ./deploy/cpp/build/clas_system -c inference_cls.yaml > ./test_tipc/output/ResNet50/cls_cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log 2>&1!
-Run successfully with command - ./deploy/cpp/build/clas_system -c inference_cls.yaml > ./test_tipc/output/ResNet50/cls_cpp_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log 2>&1!
+Run successfully with command - ResNet50 - ./deploy/cpp/build/clas_system -c inference_cls.yaml > ./test_tipc/output/ResNet50/cpp_infer/cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log 2>&1!
+Run successfully with command - ResNet50 - ./deploy/cpp/build/clas_system -c inference_cls.yaml > ./test_tipc/output/ResNet50/cpp_infer/cpp_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log 2>&1!
 ```
 
 最终log中会打印出结果，如下所示
@@ -312,6 +312,6 @@ Current total inferen time cost: 5449.39 ms.
     Top5: class_id: 265, score: 0.0420, label: toy poodle
 
 ```
-详细log位于`./test_tipc/output/ResNet50/cls_cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log`和`./test_tipc/output/ResNet50/cls_cpp_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log`中。
+详细log位于`./test_tipc/output/ResNet50/cpp_infer/cpp_infer_gpu_usetrt_False_precision_fp32_batchsize_1.log`和`./test_tipc/output/ResNet50/cpp_infer_cpu_usemkldnn_False_threads_1_precision_fp32_batchsize_1.log`中。
 
 如果运行失败，也会在终端中输出运行失败的日志信息以及对应的运行命令。可以基于该命令，分析运行失败的原因。
diff --git a/test_tipc/prepare.sh b/test_tipc/prepare.sh
index f1047fd0e38db5a297790490a59d1fd0c486fc44..73705aa54d1c055c833f15ea7bf37a68f9d72742 100644
--- a/test_tipc/prepare.sh
+++ b/test_tipc/prepare.sh
@@ -84,7 +84,12 @@ if [[ ${MODE} = "cpp_infer" ]]; then
     fi
     if [[ ! -d "./deploy/cpp/paddle_inference/" ]]; then
         pushd ./deploy/cpp/
-        wget -nc https://paddle-inference-lib.bj.bcebos.com/2.2.2/cxx_c/Linux/GPU/x86-64_gcc8.2_avx_mkl_cuda10.1_cudnn7.6.5_trt6.0.1.5/paddle_inference.tgz
+        PADDLEInfer=$3
+        if [ "" = "$PADDLEInfer" ];then
+            wget -nc https://paddle-inference-lib.bj.bcebos.com/2.2.2/cxx_c/Linux/GPU/x86-64_gcc8.2_avx_mkl_cuda10.1_cudnn7.6.5_trt6.0.1.5/paddle_inference.tgz --no-check-certificate
+        else
+            wget -nc ${PADDLEInfer} --no-check-certificate
+        fi
         tar xf paddle_inference.tgz
         popd
     fi
diff --git a/test_tipc/test_inference_cpp.sh b/test_tipc/test_inference_cpp.sh
index 6f67f479cf2fad49eeb85badea64f4d90e2a3964..255e0839a5d2541cd01018b4a09922af677edf80 100644
--- a/test_tipc/test_inference_cpp.sh
+++ b/test_tipc/test_inference_cpp.sh
@@ -2,10 +2,17 @@
 source test_tipc/common_func.sh
 
 FILENAME=$1
-GPUID=$2
+MODE=$2
+
+# set cuda device
+GPUID=$3
 if [[ ! $GPUID ]];then
    GPUID=0
 fi
+env="export CUDA_VISIBLE_DEVICES=${GPUID}"
+set CUDA_VISIBLE_DEVICES
+eval $env
+
 dataline=$(awk 'NR==1, NR==19{print}'  $FILENAME)
 
 # parser params
@@ -30,7 +37,7 @@ cpp_benchmark_value=$(func_parser_value "${lines[16]}")
 generate_yaml_cmd=$(func_parser_value "${lines[17]}")
 transform_index_cmd=$(func_parser_value "${lines[18]}")
 
-LOG_PATH="./test_tipc/output/${model_name}"
+LOG_PATH="./test_tipc/output/${model_name}/${MODE}"
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results_cpp.log"
 # generate_yaml_cmd="python3 test_tipc/generate_cpp_yaml.py"
@@ -56,7 +63,7 @@ function func_shitu_cpp_inference(){
                         if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
                             precison="int8"
                         fi
-                        _save_log_path="${_log_path}/shitu_cpp_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
+                        _save_log_path="${_log_path}/cpp_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
                         eval $transform_index_cmd
                         command="${generate_yaml_cmd} --type shitu --batch_size ${batch_size} --mkldnn ${use_mkldnn} --gpu ${use_gpu} --cpu_thread ${threads} --tensorrt False --precision ${precision} --data_dir ${_img_dir} --benchmark True --cls_model_dir ${cpp_infer_model_dir} --det_model_dir ${cpp_det_infer_model_dir} --gpu_id ${GPUID}"
                         eval $command
@@ -80,7 +87,7 @@ function func_shitu_cpp_inference(){
                         continue
                     fi
                     for batch_size in ${cpp_batch_size_list[*]}; do
-                        _save_log_path="${_log_path}/shitu_cpp_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
+                        _save_log_path="${_log_path}/cpp_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
                         eval $transform_index_cmd
                         command="${generate_yaml_cmd} --type shitu --batch_size ${batch_size} --mkldnn False --gpu ${use_gpu} --cpu_thread 1 --tensorrt ${use_trt} --precision ${precision} --data_dir ${_img_dir} --benchmark True --cls_model_dir ${cpp_infer_model_dir} --det_model_dir ${cpp_det_infer_model_dir} --gpu_id ${GPUID}"
                         eval $command
@@ -118,7 +125,7 @@ function func_cls_cpp_inference(){
                         if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
                             precison="int8"
                         fi
-                        _save_log_path="${_log_path}/cls_cpp_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
+                        _save_log_path="${_log_path}/cpp_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
 
                         command="${generate_yaml_cmd} --type cls --batch_size ${batch_size} --mkldnn ${use_mkldnn} --gpu ${use_gpu} --cpu_thread ${threads} --tensorrt False --precision ${precision} --data_dir ${_img_dir} --benchmark True --cls_model_dir ${cpp_infer_model_dir} --gpu_id ${GPUID}"
                         eval $command
@@ -142,7 +149,7 @@ function func_cls_cpp_inference(){
                         continue
                     fi
                     for batch_size in ${cpp_batch_size_list[*]}; do
-                        _save_log_path="${_log_path}/cls_cpp_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
+                        _save_log_path="${_log_path}/cpp_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
                         command="${generate_yaml_cmd} --type cls --batch_size ${batch_size} --mkldnn False --gpu ${use_gpu} --cpu_thread 1 --tensorrt ${use_trt} --precision ${precision} --data_dir ${_img_dir} --benchmark True --cls_model_dir ${cpp_infer_model_dir} --gpu_id ${GPUID}"
                         eval $command
                         command="${_script} > ${_save_log_path} 2>&1"
@@ -235,18 +242,6 @@ cd ../../../
 # cd ../../
 echo "################### build PaddleClas demo finished ###################"
 
-
-# set cuda device
-GPUID=$3
-if [ ${#GPUID} -le 0 ];then
-    env="export CUDA_VISIBLE_DEVICES=0"
-else
-    env="export CUDA_VISIBLE_DEVICES=${GPUID}"
-fi
-set CUDA_VISIBLE_DEVICES
-eval $env
-
-
 echo "################### run test ###################"
 export Count=0
 IFS="|"