update docs

a5127120 · HydrogenSulfate · 3aa024ec · a5127120 · a5127120 · a5127120
13 changed file
--- a/deploy/configs/inference_drink.yaml
+++ b/deploy/configs/inference_drink.yaml
 Global:
-  infer_imgs: "./drink_dataset_v1.0/test_images/hongniu_1.jpg"
+  infer_imgs: "./drink_dataset_v2.0/test_images/100.jpeg"
  det_inference_model_dir: "./models/picodet_PPLCNet_x2_5_mainbody_lite_v1.0_infer"
-  rec_inference_model_dir: "./models/general_PPLCNet_x2_5_lite_v1.0_infer"
+  rec_inference_model_dir: "./models/general_PPLCNetV2_base_pretrained_v1.0_infer"
  rec_nms_thresold: 0.05

  batch_size: 1
@@ -51,9 +51,9 @@ RecPostProcess: null
 # indexing engine config
 IndexProcess:
  index_method: "HNSW32" # supported: HNSW32, IVF, Flat
-  image_root: "./drink_dataset_v1.0/gallery"
-  index_dir: "./drink_dataset_v1.0/index"
-  data_file: "./drink_dataset_v1.0/gallery/drink_label.txt"
+  image_root: "./drink_dataset_v2.0/gallery"
+  index_dir: "./drink_dataset_v2.0/index"
+  data_file: "./drink_dataset_v2.0/gallery/drink_label.txt"
  index_operation: "new" # suported: "append", "remove", "new"
  delimiter: "\t"
  dist_type: "IP"

--- a/deploy/configs/inference_general.yaml
+++ b/deploy/configs/inference_general.yaml
 Global:
-  infer_imgs: "./drink_dataset_v1.0/test_images/nongfu_spring.jpeg"
+  infer_imgs: "./drink_dataset_v2.0/test_images/100.jpeg"
  det_inference_model_dir: "./models/picodet_PPLCNet_x2_5_mainbody_lite_v1.0_infer"
-  rec_inference_model_dir: "./models/general_PPLCNet_x2_5_lite_v1.0_infer"
+  rec_inference_model_dir: "./models/general_PPLCNetV2_base_pretrained_v1.0_infer"
  rec_nms_thresold: 0.05

  batch_size: 1
@@ -38,12 +38,15 @@ DetPostProcess: {}
 RecPreProcess:
  transform_ops:
    - ResizeImage:
-        size: 224
+        size: [224, 224]
+        return_numpy: False
+        interpolation: bilinear
+        backend: cv2
    - NormalizeImage:
-        scale: 0.00392157
+        scale: 1.0/255.0
        mean: [0.485, 0.456, 0.406]
        std: [0.229, 0.224, 0.225]
-        order: ""
+        order: hwc
    - ToCHWImage:

 RecPostProcess: null
@@ -51,9 +54,9 @@ RecPostProcess: null
 # indexing engine config
 IndexProcess:
  index_method: "HNSW32" # supported: HNSW32, IVF, Flat
-  image_root: "./drink_dataset_v1.0/gallery/"
-  index_dir: "./drink_dataset_v1.0/index"
-  data_file: "./drink_dataset_v1.0/gallery/drink_label.txt"
+  image_root: "./drink_dataset_v2.0/gallery/"
+  index_dir: "./drink_dataset_v2.0/index"
+  data_file: "./drink_dataset_v2.0/gallery/drink_label.txt"
  index_operation: "new" # suported: "append", "remove", "new"
  delimiter: "\t"
  dist_type: "IP"

--- a/deploy/configs/inference_rec.yaml
+++ b/deploy/configs/inference_rec.yaml
 Global:
  infer_imgs: "./images/wangzai.jpg"
-  rec_inference_model_dir: "./models/product_ResNet50_vd_aliproduct_v1.0_infer"
+  rec_inference_model_dir: "./models/general_PPLCNetV2_base_pretrained_v1.0_infer"
  batch_size: 1
  use_gpu: False
  enable_mkldnn: True

--- a/docs/images/quick_start/android_demo/reset.png
+++ b/docs/images/quick_start/android_demo/reset.png
--- a/docs/images/quick_start/android_demo/shezhi.png
+++ b/docs/images/quick_start/android_demo/shezhi.png
--- a/docs/images/recognition/drink_data_demo/output/mosilian.jpeg
+++ b/docs/images/recognition/drink_data_demo/output/mosilian.jpeg
--- a/docs/images/recognition/drink_data_demo/test_images/100.jpeg
+++ b/docs/images/recognition/drink_data_demo/test_images/100.jpeg
--- a/docs/images/structure.jpg
+++ b/docs/images/structure.jpg
--- a/docs/images/structure.png
+++ b/docs/images/structure.png
--- a/docs/zh_CN/PPShiTu/PPShiTuV2_introduction.md
+++ b/docs/zh_CN/PPShiTu/PPShiTuV2_introduction.md
@@ -126,23 +126,23 @@ python3.7 -m paddle.distributed.launch tools/train.py \

 结合以上3个优化点，最终在多个数据集的实验结果如下：

-  | 模型       | training data     | product<sup>*</sup> |
-  | :--------- | :---------------- | :------------------ |
-  | -          | -                 | recall@1%(mAP%)     |
-  | PP-ShiTuV1 | PP-ShiTuV2 数据集 | 63.0(51.5)          |
-  | PP-ShiTuV2 | PP-ShiTuV2 数据集 | 73.7(61.0)          |
-
-  | 模型       | training data     | Aliproduct      | VeRI-Wild       | LogoDet-3k      | iCartoonFace    | SOP             | Inshop          |
-  | :--------- | :---------------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- |
-  | -          | -                 | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) |
-  | PP-ShiTuV1 | PP-ShiTuV2 数据集 | 83.9(83.2)      | 88.7(60.1)      | 86.1(73.6)      | 84.1(72.3)      | 79.7(58.6)      | 89.1(69.4)      |
-  | PP-ShiTuV2 | PP-ShiTuV2 数据集 | 84.2(83.3)      | 87.8(68.8)      | 88.0(63.2)      | 53.6(27.5)      | 77.6(55.3)      | 90.8(74.3)      |
-
-  | 模型       | training data     | gldv2           | imdb_face       | iNat            | instre          | sketch          | sop<sup>*</sup> |
-  | :--------- | :---------------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- |
-  | -          | -                 | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) |
-  | PP-ShiTuV1 | PP-ShiTuV2 数据集 | 98.2(91.6)      | 28.8(8.42)      | 12.6(6.1)       | 72.0(50.4)      | 27.9(9.5)       | 97.6(90.3)      |
-  | PP-ShiTuV2 | PP-ShiTuV2 数据集 | 98.1(90.5)      | 35.9(11.2)      | 38.6(23.9)      | 87.7(71.4)      | 39.3(15.6)      | 98.3(90.9)      |
+  | 模型       | product<sup>*</sup> |
+  | :--------- | :------------------ |
+  | -          | recall@1%(mAP%)     |
+  | PP-ShiTuV1 | 63.0(51.5)          |
+  | PP-ShiTuV2 | 73.7(61.0)          |
+
+  | 模型       | Aliproduct      | VeRI-Wild       | LogoDet-3k      | iCartoonFace    | SOP             | Inshop          |
+  | :--------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- |
+  | -          | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) |
+  | PP-ShiTuV1 | 83.9(83.2)      | 88.7(60.1)      | 86.1(73.6)      | 84.1(72.3)      | 79.7(58.6)      | 89.1(69.4)      |
+  | PP-ShiTuV2 | 84.2(83.3)      | 87.8(68.8)      | 88.0(63.2)      | 53.6(27.5)      | 77.6(55.3)      | 90.8(74.3)      |
+
+  | 模型       | gldv2           | imdb_face       | iNat            | instre          | sketch          | sop<sup>*</sup> |
+  | :--------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- |
+  | -          | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) |
+  | PP-ShiTuV1 | 98.2(91.6)      | 28.8(8.42)      | 12.6(6.1)       | 72.0(50.4)      | 27.9(9.5)       | 97.6(90.3)      |
+  | PP-ShiTuV2 | 98.1(90.5)      | 35.9(11.2)      | 38.6(23.9)      | 87.7(71.4)      | 39.3(15.6)      | 98.3(90.9)      |

 **注：** product数据集是为了验证PP-ShiTu的泛化性能而制作的数据集，所有的数据都没有在训练和测试集中出现。该数据包含8个大类（人脸、化妆品、地标、红酒、手表、车、运动鞋、饮料），299个小类。测试时，使用299个小类的标签进行测试；sop数据集来自[GPR1200: A Benchmark for General-Purpose Content-Based Image Retrieval](https://arxiv.org/abs/2111.13122)，可视为“SOP”数据集的子集。


--- a/docs/zh_CN/image_recognition_pipeline/feature_extraction.md
+++ b/docs/zh_CN/image_recognition_pipeline/feature_extraction.md
@@ -6,19 +6,13 @@
 - [1. 摘要](#1-摘要)
 - [2. 介绍](#2-介绍)
 - [3. 方法](#3-方法)
-  - [3.1 PP-ShiTuV1](#31-pp-shituv1)
+  - [3.1 PP-ShiTuV2](#31-pp-shituv2)
    - [3.1.1 Backbone](#311-backbone)
    - [3.1.2 Neck](#312-neck)
    - [3.1.3 Head](#313-head)
    - [3.1.4 Loss](#314-loss)
-  - [3.2 PP-ShiTuV2](#32-pp-shituv2)
-    - [3.2.1 Backbone](#321-backbone)
-    - [3.2.2 Neck](#322-neck)
-    - [3.2.3 Head](#323-head)
-    - [3.2.4 Loss](#324-loss)
 - [4. 实验部分](#4-实验部分)
-  - [4.1 PP-ShiTuV1](#41-pp-shituv1)
-  - [4.2 PP-ShiTuV2](#42-pp-shituv2)
+  - [4.1 PP-ShiTuV2](#41-pp-shituv2)
 - [5. 自定义特征提取](#5-自定义特征提取)
  - [5.1 数据准备](#51-数据准备)
  - [5.2 模型训练](#52-模型训练)
@@ -52,39 +46,21 @@

 ## 3. 方法

-### 3.1 PP-ShiTuV1
+### 3.1 PP-ShiTuV2

 #### 3.1.1 Backbone

-Backbone 部分采用了 [PP_LCNet_x2_5](../models/PP-LCNet.md)，其针对Intel CPU端的性能优化探索了多个有效的结构设计方案，最终实现了在不增加推理时间的情况下，进一步提升模型的性能，最终大幅度超越现有的 SOTA 模型。
-
-#### 3.1.2 Neck
-
-Neck 部分采用了 [FC Layer](../../../ppcls/arch/gears/fc.py)，对 Backbone 抽取得到的特征进行降维，减少了特征存储的成本与计算量。
-
-#### 3.1.3 Head
-
-Head 部分选用 [ArcMargin](../../../ppcls/arch/gears/arcmargin.py)，在训练时通过指定margin，增大同类特征之间的角度差异再进行分类，进一步提升抽取特征的表征能力。
-
-#### 3.1.4 Loss
-
-Loss 部分选用 [Cross entropy loss](../../../ppcls/loss/celoss.py)，在训练时以分类任务的损失函数来指导网络进行优化。详细的配置文件见[通用识别配置文件](../../../ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5.yaml)。
-
-### 3.2 PP-ShiTuV2
-
-#### 3.2.1 Backbone
-
 Backbone 部分采用了 [PP-LCNetV2_base](../models/PP-LCNetV2.md)，其针对Intel CPU端的性能优化探索了多个有效的结构设计方案，最终实现了在不增加推理时间的情况下，进一步提升模型的性能，最终大幅度超越现有的 SOTA 模型。

-#### 3.2.2 Neck
+#### 3.1.2 Neck

 Neck 部分采用了 [BN Neck](../../../ppcls/arch/gears/bnneck.py)，对 Backbone 抽取得到的特征的每个维度进行标准化操作，减少了同时优化度量学习损失和分类损失的难度。

-#### 3.2.3 Head
+#### 3.1.3 Head

 Head 部分选用 [FC Layer](../../../ppcls/arch/gears/fc.py)，使用分类头将 feature 转换成 logits 供后续计算分类损失。

-#### 3.2.4 Loss
+#### 3.1.4 Loss

 Loss 部分选用 [Cross entropy loss](../../../ppcls/loss/celoss.py) 和 [TripletAngularMarginLoss](../../../ppcls/loss/tripletangularmarginloss.py)，在训练时以分类损失和基于角度的三元组损失来指导网络进行优化。详细的配置文件见[GeneralRecognitionV2_PPLCNetV2_base.yaml](../../../ppcls/configs/GeneralRecognitionV2/GeneralRecognitionV2_PPLCNetV2_base.yaml#L63-77)。

@@ -92,33 +68,7 @@ Loss 部分选用 [Cross entropy loss](../../../ppcls/loss/celoss.py) 和 [Tripl

 ## 4. 实验部分

-### 4.1 PP-ShiTuV1
-
-训练数据为如下 7 个公开数据集的汇总：
-
-| 数据集       | 数据量  |  类别数  |   场景   |                                  数据集地址                                  |
-| :----------- | :-----: | :------: | :------: | :--------------------------------------------------------------------------: |
-| Aliproduct   | 2498771 |  50030   |   商品   |  [地址](https://retailvisionworkshop.github.io/recognition_challenge_2020/)  |
-| GLDv2        | 1580470 |  81313   |   地标   |           [地址](https://github.com/cvdfoundation/google-landmark)           |
-| VeRI-Wild    | 277797  |  30671   |   车辆   |                [地址](https://github.com/PKU-IMRE/VERI-Wild)                 |
-| LogoDet-3K   | 155427  |   3000   |   Logo   |          [地址](https://github.com/Wangjing1551/LogoDet-3K-Dataset)          |
-| iCartoonFace | 389678  |   5013   | 动漫人物 | [地址](http://challenge.ai.iqiyi.com/detail?raceId=5def69ace9fcf68aef76a75d) |
-| SOP          |  59551  |  11318   |   商品   |          [地址](https://cvgl.stanford.edu/projects/lifted_struct/)           |
-| Inshop       |  25882  |   3997   |   商品   |        [地址](http://mmlab.ie.cuhk.edu.hk/projects/DeepFashion.html)         |
-| **Total**    | **5M**  | **185K** |    -     |                                      -                                       |
-
-最终的模型精度指标如下表所示:
-
-|              模型               | Aliproduct | VeRI-Wild | LogoDet-3K | iCartoonFace |  SOP  | Inshop | Latency(ms) |
-| :-----------------------------: | :--------: | :-------: | :--------: | :----------: | :---: | :----: | :---------: |
-| GeneralRecognition_PPLCNet_x2_5 |   0.839    |   0.888   |   0.861    |    0.841     | 0.793 | 0.892  |     5.0     |
-
-* 预训练模型地址：[通用识别预训练模型](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/general_PPLCNet_x2_5_pretrained_v1.0.pdparams)
-* 采用的评测指标为：`Recall@1`
-* 速度评测机器的 CPU 具体信息为：`Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz`
-* 速度指标的评测条件为： 开启 MKLDNN, 线程数设置为 10
-
-### 4.2 PP-ShiTuV2
+### 4.1 PP-ShiTuV2

 我们对原有的训练数据进行了合理扩充与优化，最终使用如下 16 个公开数据集的汇总：

@@ -145,9 +95,15 @@ Loss 部分选用 [Cross entropy loss](../../../ppcls/loss/celoss.py) 和 [Tripl

 最终的模型精度指标如下表所示:

-|                模型                 | Aliproduct | VeRI-Wild | LogoDet-3K |  SOP  | Inshop | imdb_face | iNat  | instre | sketch | Latency(ms) |
-| :---------------------------------: | :--------: | :-------: | :--------: | :---: | :----: | :-------: | :---: | :----: | :----: | :---------: |
-| GeneralRecognitionV2_PPLCNetV2_base |   0.842    |   0.878   |   0.880    | 0.776 | 0.908  |   0.359   | 0.386 | 0.877  | 0.393  |     TODO     |
+  | 模型       | Aliproduct      | VeRI-Wild       | LogoDet-3k      | iCartoonFace    | SOP             | Inshop          |
+  | :--------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- |
+  | -          | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) |
+  | PP-ShiTuV2 | 84.2(83.3)      | 87.8(68.8)      | 88.0(63.2)      | 53.6(27.5)      | 77.6(55.3)      | 90.8(74.3)      |
+
+  | 模型       | gldv2           | imdb_face       | iNat            | instre          | sketch          | sop<sup>*</sup> |
+  | :--------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- | :-------------- |
+  | -          | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) | recall@1%(mAP%) |
+  | PP-ShiTuV2 | 98.1(90.5)      | 35.9(11.2)      | 38.6(23.9)      | 87.7(71.4)      | 39.3(15.6)      | 98.3(90.9)      |

 * 预训练模型地址：[general_PPLCNetV2_base_pretrained_v1.0.pdparams](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/PPShiTuV2/general_PPLCNetV2_base_pretrained_v1.0.pdparams)
 * 采用的评测指标为：`Recall@1` 与 `mAP`

--- a/docs/zh_CN/inference_deployment/python_deploy.md
+++ b/docs/zh_CN/inference_deployment/python_deploy.md
@@ -62,8 +62,8 @@ cd PaddleClas/deploy
 ```shell
 mkdir -p models
 # 下载通用检测 inference 模型并解压
-wget -nc -P ./models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar
-tar -xf ./models/ppyolov2_r50vd_dcn_mainbody_v1.0_infer.tar -C ./models/
+wget -nc -P ./models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/picodet_PPLCNet_x2_5_mainbody_lite_v1.0_infer.tar
+tar -xf ./models/picodet_PPLCNet_x2_5_mainbody_lite_v1.0_infer.tar -C ./models/
 ```

 使用以下命令进行预测：
@@ -91,8 +91,8 @@ cd PaddleClas/deploy
 ```shell
 mkdir -p models
 # 下载商品特征提取 inference 模型并解压
-wget -nc -P ./models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/product_ResNet50_vd_aliproduct_v1.0_infer.tar
-tar -xf ./models/product_ResNet50_vd_aliproduct_v1.0_infer.tar -C ./models/
+wget -nc -P ./models/ https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/PP-ShiTuV2/general_PPLCNetV2_base_pretrained_v1.0_infer.tar
+tar -xf ./models/general_PPLCNetV2_base_pretrained_v1.0_infer.tar -C ./models/
 ```

 使用以下命令进行预测：

--- a/docs/zh_CN/quick_start/quick_start_recognition.md
+++ b/docs/zh_CN/quick_start/quick_start_recognition.md