update code

9dde9376 · HydrogenSulfate · af90cd7c · daf7eea2 · 9dde9376 · 9dde9376
269 changed file
--- a/README_ch.md
+++ b/README_ch.md
@@ -38,7 +38,7 @@ Res2Net200_vd预训练模型Top-1精度高达85.1%。
 * 您可以扫描下面的微信群二维码， 加入PaddleClas 微信交流群。获得更高效的问题答疑，与各行各业开发者充分交流，期待您的加入。

 <div align="center">
-<img src="https://user-images.githubusercontent.com/12560511/160273340-831de8c3-f31d-44d1-9437-fcd4765a6851.jpg" width="200"/>
+<img src="https://user-images.githubusercontent.com/12560511/162710270-8a249aca-4fa9-46f9-95e5-66d906fe6d66.jpg" width="200"/>
 </div>

 ## 快速体验

--- a/README_en.md
+++ b/README_en.md
@@ -41,7 +41,7 @@ Four sample solutions are provided, including product recognition, vehicle recog
 * You can also scan the QR code below to join the PaddleClas WeChat group to get more efficient answers to your questions and to communicate with developers from all walks of life. We look forward to hearing from you.

 <div align="center">
-<img src="https://user-images.githubusercontent.com/12560511/160273340-831de8c3-f31d-44d1-9437-fcd4765a6851.jpg" width="200"/>
+<img src="https://user-images.githubusercontent.com/12560511/162710270-8a249aca-4fa9-46f9-95e5-66d906fe6d66.jpg" width="200"/>
 </div>

 ## Quick Start

--- a/deploy/lite/Makefile
+++ b/deploy/lite/Makefile
 ARM_ABI = arm8
 export ARM_ABI

-include ../Makefile.def
+LITE_ROOT=./inference_lite_lib.android.armv8

-LITE_ROOT=../../../
+include ${LITE_ROOT}/demo/cxx/Makefile.def

 THIRD_PARTY_DIR=${LITE_ROOT}/third_party

@@ -29,7 +29,7 @@ OPENCV_LIBS = ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/${ARM_PATH}/libs/libopencv_im
              ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/${ARM_PATH}/3rdparty/libs/libtbb.a \
              ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/${ARM_PATH}/3rdparty/libs/libcpufeatures.a

-OPENCV_INCLUDE = -I../../../third_party/${OPENCV_VERSION}/${ARM_PATH}/include
+OPENCV_INCLUDE = -I${LITE_ROOT}/third_party/${OPENCV_VERSION}/${ARM_PATH}/include

 CXX_INCLUDES = $(INCLUDES) ${OPENCV_INCLUDE} -I$(LITE_ROOT)/cxx/include


--- a/deploy/lite/config.txt
+++ b/deploy/lite/config.txt
-clas_model_file ./MobileNetV3_large_x1_0.nb
-label_path ./imagenet1k_label_list.txt
+clas_model_file /data/local/tmp/arm_cpu/MobileNetV3_large_x1_0.nb
+label_path /data/local/tmp/arm_cpu/imagenet1k_label_list.txt
 resize_short_size 256
 crop_size 224
 visualize 0
+num_threads 1
+batch_size 1
+precision FP32
+runtime_device arm_cpu
 enable_benchmark 0
+tipc_benchmark 0
--- a/deploy/lite/image_classfication.cpp
+++ b/deploy/lite/image_classfication.cpp
@@ -21,6 +21,7 @@
 #include <opencv2/opencv.hpp>
 #include <sys/time.h>
 #include <vector>
+#include "AutoLog/auto_log/lite_autolog.h"

 using namespace paddle::lite_api; // NOLINT
 using namespace std;
@@ -149,8 +150,10 @@ cv::Mat CenterCropImg(const cv::Mat &img, const int &crop_size) {
 std::vector<RESULT>
 RunClasModel(std::shared_ptr<PaddlePredictor> predictor, const cv::Mat &img,
             const std::map<std::string, std::string> &config,
-             const std::vector<std::string> &word_labels, double &cost_time) {
+             const std::vector<std::string> &word_labels, double &cost_time,
+             std::vector<double> *time_info) {
  // Read img
+  auto preprocess_start = std::chrono::steady_clock::now();
  int resize_short_size = stoi(config.at("resize_short_size"));
  int crop_size = stoi(config.at("crop_size"));
  int visualize = stoi(config.at("visualize"));
@@ -172,8 +175,8 @@ RunClasModel(std::shared_ptr<PaddlePredictor> predictor, const cv::Mat &img,
  std::vector<float> scale = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};
  const float *dimg = reinterpret_cast<const float *>(img_fp.data);
  NeonMeanScale(dimg, data0, img_fp.rows * img_fp.cols, mean, scale);
-
-  auto start = std::chrono::system_clock::now();
+  auto preprocess_end = std::chrono::steady_clock::now();
+  auto inference_start = std::chrono::system_clock::now();
  // Run predictor
  predictor->Run();

@@ -181,9 +184,10 @@ RunClasModel(std::shared_ptr<PaddlePredictor> predictor, const cv::Mat &img,
  std::unique_ptr<const Tensor> output_tensor(
      std::move(predictor->GetOutput(0)));
  auto *output_data = output_tensor->data<float>();
-  auto end = std::chrono::system_clock::now();
+  auto inference_end = std::chrono::system_clock::now();
+  auto postprocess_start = std::chrono::system_clock::now();
  auto duration =
-      std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+      std::chrono::duration_cast<std::chrono::microseconds>(inference_end - inference_start);
  cost_time = double(duration.count()) *
              std::chrono::microseconds::period::num /
              std::chrono::microseconds::period::den;
@@ -196,6 +200,13 @@ RunClasModel(std::shared_ptr<PaddlePredictor> predictor, const cv::Mat &img,
  cv::Mat output_image;
  auto results =
      PostProcess(output_data, output_size, word_labels, output_image);
+  auto postprocess_end = std::chrono::system_clock::now();
+  std::chrono::duration<float> preprocess_diff = preprocess_end - preprocess_start;
+  time_info->push_back(double(preprocess_diff.count() * 1000));
+  std::chrono::duration<float> inference_diff = inference_end - inference_start;
+  time_info->push_back(double(inference_diff.count() * 1000));
+  std::chrono::duration<float> postprocess_diff = postprocess_end - postprocess_start;
+  time_info->push_back(double(postprocess_diff.count() * 1000));

  if (visualize) {
    std::string output_image_path = "./clas_result.png";
@@ -309,6 +320,12 @@ int main(int argc, char **argv) {

  std::string clas_model_file = config.at("clas_model_file");
  std::string label_path = config.at("label_path");
+  std::string crop_size = config.at("crop_size");
+  int num_threads = stoi(config.at("num_threads"));
+  int batch_size = stoi(config.at("batch_size"));
+  std::string precision = config.at("precision");
+  std::string runtime_device = config.at("runtime_device");
+  bool tipc_benchmark = bool(stoi(config.at("tipc_benchmark")));

  // Load Labels
  std::vector<std::string> word_labels = LoadLabels(label_path);
@@ -319,8 +336,9 @@ int main(int argc, char **argv) {
    cv::cvtColor(srcimg, srcimg, cv::COLOR_BGR2RGB);

    double run_time = 0;
+    std::vector<double> time_info;
    std::vector<RESULT> results =
-        RunClasModel(clas_predictor, srcimg, config, word_labels, run_time);
+        RunClasModel(clas_predictor, srcimg, config, word_labels, run_time, &time_info);

    std::cout << "===clas result for image: " << img_path << "===" << std::endl;
    for (int i = 0; i < results.size(); i++) {
@@ -338,6 +356,19 @@ int main(int argc, char **argv) {
    } else {
      std::cout << "Current time cost: " << run_time << " s." << std::endl;
    }
+    if (tipc_benchmark) {
+      AutoLogger autolog(clas_model_file,
+                         runtime_device,
+                         num_threads,
+                         batch_size,
+                         crop_size,
+                         precision,
+                         time_info,
+                         1);
+    std::cout << "=======================TIPC Lite Information=======================" << std::endl;
+    autolog.report();
+    }
+
  }

  return 0;

--- a/deploy/lite/readme.md
+++ b/deploy/lite/readme.md
@@ -25,8 +25,8 @@ Paddle Lite是飞桨轻量化推理引擎，为手机、IOT端提供高效推理
 1. [建议]直接下载，预测库下载链接如下：
      |平台|预测库下载链接|
      |-|-|
-      |Android|[arm7](https://paddlelite-data.bj.bcebos.com/Release/2.8-rc/Android/gcc/inference_lite_lib.android.armv7.gcc.c++_static.with_extra.with_cv.tar.gz) / [arm8](https://paddlelite-data.bj.bcebos.com/Release/2.8-rc/Android/gcc/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.with_cv.tar.gz)|
-      |iOS|[arm7](https://paddlelite-data.bj.bcebos.com/Release/2.8-rc/iOS/inference_lite_lib.ios.armv7.with_cv.with_extra.tiny_publish.tar.gz) / [arm8](https://paddlelite-data.bj.bcebos.com/Release/2.8-rc/iOS/inference_lite_lib.ios.armv8.with_cv.with_extra.tiny_publish.tar.gz)|
+      |Android|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.android.armv7.clang.c++_static.with_extra.with_cv.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.android.armv8.clang.c++_static.with_extra.with_cv.tar.gz)|
+      |iOS|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.ios.armv7.with_cv.with_extra.tiny_publish.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.ios.armv8.with_cv.with_extra.tiny_publish.tar.gz)|

      **注**：
      1. 如果是从 Paddle-Lite [官方文档](https://paddle-lite.readthedocs.io/zh/latest/quick_start/release_lib.html#android-toolchain-gcc)下载的预测库，
@@ -44,11 +44,11 @@ git checkout develop

 **注意**：编译Paddle-Lite获得预测库时，需要打开`--with_cv=ON --with_extra=ON`两个选项，`--arch`表示`arm`版本，这里指定为armv8，更多编译命令介绍请参考[链接](https://paddle-lite.readthedocs.io/zh/latest/user_guides/Compile/Android.html#id2)。

-直接下载预测库并解压后，可以得到`inference_lite_lib.android.armv8/`文件夹，通过编译Paddle-Lite得到的预测库位于`Paddle-Lite/build.lite.android.armv8.gcc/inference_lite_lib.android.armv8/`文件夹下。
+直接下载预测库并解压后，可以得到`inference_lite_lib.android.armv8.clang.c++_static.with_extra.with_cv/`文件夹，通过编译Paddle-Lite得到的预测库位于`Paddle-Lite/build.lite.android.armv8.gcc/inference_lite_lib.android.armv8/`文件夹下。
 预测库的文件目录如下：

 ```
-inference_lite_lib.android.armv8/
+inference_lite_lib.android.armv8.clang.c++_static.with_extra.with_cv/
 |-- cxx                                        C++ 预测库和头文件
 |   |-- include                                C++ 头文件
 |   |   |-- paddle_api.h
@@ -86,7 +86,7 @@ Python下安装 `paddlelite`，目前最高支持`Python3.7`。
 **注意**：`paddlelite`whl包版本必须和预测库版本对应。

 ```shell
-pip install paddlelite==2.8
+pip install paddlelite==2.10
 ```

 之后使用`paddle_lite_opt`工具可以进行inference模型的转换。`paddle_lite_opt`的部分参数如下
@@ -146,6 +146,24 @@ paddle_lite_opt --model_file=./MobileNetV3_large_x1_0_infer/inference.pdmodel --

 **注意**：`--optimize_out` 参数为优化后模型的保存路径，无需加后缀`.nb`；`--model_file` 参数为模型结构信息文件的路径，`--param_file` 参数为模型权重信息文件的路径，请注意文件名。

+<a name="2.1.4"></a>
+
+#### 2.1.4 执行编译，得到可执行文件clas_system
+
+```shell
+# 克隆 Autolog 代码库，以便获取自动化日志
+cd PaddleClas_root_path
+cd deploy/lite/
+git clone https://github.com/LDOUBLEV/AutoLog.git
+```
+
+```shell
+# 编译
+make -j
+```
+
+执行 `make` 命令后，会在当前目录生成 `clas_system` 可执行文件，该文件用于 Lite 预测。
+
 <a name="2.2与手机联调"></a>
 ### 2.2 与手机联调

@@ -167,7 +185,7 @@ paddle_lite_opt --model_file=./MobileNetV3_large_x1_0_infer/inference.pdmodel --

    win上安装需要去谷歌的安卓平台下载ADB软件包进行安装：[链接](https://developer.android.com/studio)

-4. 手机连接电脑后，开启手机`USB调试`选项，选择`文件传输`模式，在电脑终端中输入：
+3. 手机连接电脑后，开启手机`USB调试`选项，选择`文件传输`模式，在电脑终端中输入：

 ```shell
 adb devices
@@ -178,40 +196,18 @@ List of devices attached
 744be294    device
 ```

-5. 准备优化后的模型、预测库文件、测试图像和类别映射文件。
-
-```shell
-cd PaddleClas_root_path
-cd deploy/lite/

-# 运行prepare.sh
-# prepare.sh 会将预测库文件、测试图像和使用的字典文件放置在预测库中的demo/cxx/clas文件夹下
-sh prepare.sh /{lite prediction library path}/inference_lite_lib.android.armv8
+4. 将优化后的模型、预测库文件、测试图像和类别映射文件push到手机上。

-# 进入lite demo的工作目录
-cd /{lite prediction library path}/inference_lite_lib.android.armv8/
-cd demo/cxx/clas/
-
-# 将C++预测动态库so文件复制到debug文件夹中
-cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/
-```
-
-`prepare.sh` 以 `PaddleClas/deploy/lite/imgs/tabby_cat.jpg` 作为测试图像，将测试图像复制到`demo/cxx/clas/debug/` 文件夹下。
-将 `paddle_lite_opt` 工具优化后的模型文件放置到 `/{lite prediction library path}/inference_lite_lib.android.armv8/demo/cxx/clas/debug/` 文件夹下。本例中，使用[2.1.3](#2.1.3)生成的 `MobileNetV3_large_x1_0.nb` 模型文件。
-
-执行完成后，clas文件夹下将有如下文件格式：
-
-```
-demo/cxx/clas/
-|-- debug/
-|   |--MobileNetV3_large_x1_0.nb                优化后的分类器模型文件
-|   |--tabby_cat.jpg                           	待测试图像
-|   |--imagenet1k_label_list.txt                类别映射文件
-|   |--libpaddle_light_api_shared.so    C++预测库文件
-|   |--config.txt                       分类预测超参数配置
-|-- config.txt                  				分类预测超参数配置
-|-- image_classfication.cpp            	图像分类代码文件
-|-- Makefile                    				编译文件
+```shell
+adb shell mkdir -p /data/local/tmp/arm_cpu/
+adb push clas_system /data/local/tmp/arm_cpu/
+adb shell chmod +x /data/local/tmp/arm_cpu//clas_system
+adb push inference_lite_lib.android.armv8.clang.c++_static.with_extra.with_cv/cxx/lib/libpaddle_light_api_shared.so /data/local/tmp/arm_cpu/
+adb push MobileNetV3_large_x1_0.nb /data/local/tmp/arm_cpu/
+adb push config.txt /data/local/tmp/arm_cpu/
+adb push ../../ppcls/utils/imagenet1k_label_list.txt /data/local/tmp/arm_cpu/
+adb push imgs/tabby_cat.jpg /data/local/tmp/arm_cpu/
 ```

 #### 注意：
@@ -224,32 +220,22 @@ clas_model_file ./MobileNetV3_large_x1_0.nb # 模型文件地址
 label_path ./imagenet1k_label_list.txt 			# 类别映射文本文件
 resize_short_size 256 # resize之后的短边边长
 crop_size 224 				# 裁剪后用于预测的边长
-visualize 0 # 是否进行可视化，如果选择的话，会在当前文件夹下生成名为clas_result.png的图像文件。
+visualize 0 # 是否进行可视化，如果选择的话，会在当前文件夹下生成名为clas_result.png的图像文件
+num_threads 1 # 线程数，默认是1。
+precision FP32 # 精度类型，可以选择 FP32 或者 INT8，默认是 FP32。
+runtime_device arm_cpu # 设备类型，默认是 arm_cpu
+enable_benchmark 0 # 是否开启benchmark， 默认是 0
+tipc_benchmark 0 # 是否开启tipc_benchmark，默认是 0
 ```

-5. 启动调试，上述步骤完成后就可以使用ADB将文件夹 `debug/` push到手机上运行，步骤如下：
+5. 执行预测命令

-```shell
-# 执行编译，得到可执行文件clas_system
-make -j
-
-# 将编译得到的可执行文件移动到debug文件夹中
-mv clas_system ./debug/
-
-# 将上述debug文件夹push到手机上
-adb push debug /data/local/tmp/
-
-adb shell
-cd /data/local/tmp/debug
-export LD_LIBRARY_PATH=/data/local/tmp/debug:$LD_LIBRARY_PATH
+执行以下命令，可完成在手机上的预测。

-# clas_system可执行文件的使用方式为:
-# ./clas_system 配置文件路径  测试图像路径
-./clas_system ./config.txt ./tabby_cat.jpg
+```shell
+adb shell 'export LD_LIBRARY_PATH=/data/local/tmp/arm_cpu/; /data/local/tmp/arm_cpu/clas_system /data/local/tmp/arm_cpu/config.txt /data/local/tmp/arm_cpu/tabby_cat.jpg'
 ```

-如果对代码做了修改，则需要重新编译并push到手机上。
-
 运行效果如下：

 <div align="center">
@@ -263,3 +249,4 @@ A1：如果已经走通了上述步骤，更换模型只需要替换 `.nb` 模

 Q2：换一个图测试怎么做？  
 A2：替换 debug 下的测试图像为你想要测试的图像，使用 ADB 再次 push 到手机上即可。
+
--- a/docs/en/algorithm_introduction/ISE_ReID_en.md
+++ b/docs/en/algorithm_introduction/ISE_ReID_en.md
+# ISE
+---
+## Catalogue
+
+- [1. Introduction](#1)
+- [2. Performance on Market1501 and MSMT17](#2)
+- [3. Test](#3)
+- [4. Reference](#4)
+
+<a name='1'></a>
+## 1. Introduction
+
+ISE (Implicit Sample Extension) is a simple, efficient, and effective learning algorithm for unsupervised person Re-ID. ISE generates what we call support samples around the cluster boundaries. The sample generation process in ISE depends on two critical mechanisms, i.e., a progressive linear interpolation strategy and a label-preserving loss function. The generated support samples from ISE provide complementary information, which can nicely handle the "sub and mixed" clustering errors. ISE achieves superior performance than other unsupervised methods on Market1501 and MSMT17 datasets.
+
+> [**Implicit Sample Extension for Unsupervised Person Re-Identification**](https://arxiv.org/abs/2204.06892v1)<br>
+> Xinyu Zhang, Dongdong Li, Zhigang Wang, Jian Wang, Errui Ding, Javen Qinfeng Shi, Zhaoxiang Zhang, Jingdong Wang<br>
+> CVPR2022
+
+![image](../../images/ISE_ReID/ISE_pipeline.png)
+
+<a name='2'></a>
+## 2. Performance on Market1501 and MSMT17
+
+The main results on Market1501 (M) and MSMT17 (MS). PIL denotes the progressive linear interpolation strategy. LP represents the label-preserving loss function.
+
+| Methods | M | Link | MS | Link |
+| --- | -- | -- | -- | - |
+| Baseline | 82.5 (92.5) | - | 30.1 (58.6) | - |
+| ISE (+PIL) | 83.9 (93.9) | - | 33.5 (63.9) | - |
+| ISE (+LP)  | 83.6 (92.7) | - | 31.4 (59.9) | - |
+| ISE (Ours) (+PIL+LP) | **84.7 (94.0)** | [ISE_M](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ISE_M_model.pdparams) | **35.0 (64.7)** | [ISE_MS](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ISE_MS_model.pdparams) |
+
+<a name="3"></a>
+## 3. Test
+The training code is coming soon. We first release the test code with the pretrained models.
+
+**Test:** You can simply run the following script for the evaluation.
+
+```
+python tools/eval.py -c ./ppcls/configs/Person/ResNet50_UReID_infer.yaml
+```
+**Steps:**
+1. Download the pretrained model first, and put the model into: ```./pd_model_trace/ISE/```.
+2. Change the dataset name in: ```./ppcls/configs/Person/ResNet50_UReID_infer.yaml```.
+3. Run the above script.
+
+
+<a name="4"></a>
+## 4. Reference
+
+If you find ISE useful in your research, please kindly consider citing our paper:
+
+```
+@inproceedings{zhang2022Implicit,
+  title={Implicit Sample Extension for Unsupervised Person Re-Identification},
+  author={Xinyu Zhang, Dongdong Li, Zhigang Wang, Jian Wang, Errui Ding, Javen Qinfeng Shi, Zhaoxiang Zhang, Jingdong Wang},
+  booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year={2022}
+}
+```
--- a/docs/en/algorithm_introduction/ImageNet_models_en.md
+++ b/docs/en/algorithm_introduction/ImageNet_models_en.md
@@ -29,7 +29,8 @@
 - [22. TNT series](#22)
 - [23. CSwinTransformer series](#23)
 - [24. PVTV2 series](#24)
- [25. Other models](#25)
+- [25. MobileViT series](#25)
+- [26. Other models](#26)
 - [Reference](#reference)

 <a name="1"></a>
@@ -532,10 +533,21 @@ The accuracy and speed indicators of PVTV2 series models are shown in the follow
 | PVT_V2_B4    | 0.836 | 0.967 | - | - | - | 9.8 | 62.6 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B4_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/PVT_V2_B4_infer.tar) |
 | PVT_V2_B5    | 0.837 | 0.966 | - | - | - | 11.4 | 82.0 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B5_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/PVT_V2_B5_infer.tar) |

-
 <a name="25"></a>

-## 25. Other models
+## 25. MobileViT series <sup>[[42](#ref42)]</sup>
+
+The accuracy and speed indicators of MobileViT series models are shown in the following table. For more introduction, please refer to:[MobileViT series model documents](../models/MobileViT_en.md)
+
+| Model       | Top-1 Acc | Top-5 Acc | time(ms)<br>bs=1 | time(ms)<br>bs=4 | time(ms)<br/>bs=8 | FLOPs(M) | Params(M) | Pretrained Model Download Address                                               | Inference Model Download Address                                      |
+| ---------- | --------- | --------- | ---------------- | ---------------- | -------- | --------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+|  MobileViT_XXS    | 0.6867 | 0.8878 | - | - | - | 1849.35  |  5.59   | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileViT_XXS_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileViT_XXS_infer.tar) |
+|  MobileViT_XS    | 0.7454 | 0.9227 | - | - | - | 930.75  |  2.33   | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileViT_XS_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileViT_XS_infer.tar) |
+|  MobileViT_S    | 0.7814 | 0.9413 | - | - | - | 337.24  |   1.28   | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileViT_S_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileViT_S_infer.tar) |
+
+<a name="26"></a>
+
+## 26. Other models

 The accuracy and speed indicators of AlexNet <sup>[[18](#ref18)]</sup>, SqueezeNet series <sup>[[19](#ref19)]</sup>, VGG series <sup>[[20](#ref20)]</sup>, DarkNet53 <sup>[[21](#ref21)]</sup> and other models are shown in the following table. For more information, please refer to: [Other model documents](../models/Others_en.md).

@@ -637,3 +649,5 @@ TRANSFORMERS FOR IMAGE RECOGNITION AT SCALE.
 <a name="ref40">[40]</a>Xiaoyi Dong, Jianmin Bao, Dongdong Chen, Weiming Zhang, Nenghai Yu, Lu Yuan, Dong Chen, Baining Guo. CSWin Transformer: A General Vision Transformer Backbone with Cross-Shaped Windows.

 <a name="ref41">[41]</a>Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. PVTv2: Improved Baselines with Pyramid Vision Transformer.
+
+<a name="ref42">[42]</a>Sachin Mehta, Mohammad Rastegari. MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer.
--- a/docs/en/inference_deployment/paddle_lite_deploy_en.md
+++ b/docs/en/inference_deployment/paddle_lite_deploy_en.md
@@ -18,6 +18,7 @@ If you only want to test speed, please refer to [The tutorial of Paddle-Lite mob
        - [2.1.1 [RECOMMEND] Use pip to install Paddle-Lite and optimize model](#2.1.1)
        - [2.1.2 Compile Paddle-Lite to generate opt tool](#2.1.2)
        - [2.1.3 Demo of get the optimized model](#2.1.3)
+        - [2.1.4 Compile to get the executable file clas_system](#2.1.4)
    - [2.2 Run optimized model on Phone](#2.2)
 - [3. FAQ](#3)

@@ -40,8 +41,8 @@ For the detailed compilation directions of different development environments, p

 |Platform|Inference Library Download Link|
 |-|-|
-|Android|[arm7](https://paddlelite-data.bj.bcebos.com/Release/2.8-rc/Android/gcc/inference_lite_lib.android.armv7.gcc.c++_static.with_extra.with_cv.tar.gz) / [arm8](https://paddlelite-data.bj.bcebos.com/Release/2.8-rc/Android/gcc/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.with_cv.tar.gz)|
-|iOS|[arm7](https://paddlelite-data.bj.bcebos.com/Release/2.8-rc/iOS/inference_lite_lib.ios.armv7.with_cv.with_extra.tiny_publish.tar.gz) / [arm8](https://paddlelite-data.bj.bcebos.com/Release/2.8-rc/iOS/inference_lite_lib.ios.armv8.with_cv.with_extra.tiny_publish.tar.gz)|
+|Android|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.android.armv7.clang.c++_static.with_extra.with_cv.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.android.armv8.clang.c++_static.with_extra.with_cv.tar.gz) |
+|iOS|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.ios.armv7.with_cv.with_extra.tiny_publish.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.10/inference_lite_lib.ios.armv8.with_cv.with_extra.tiny_publish.tar.gz)|

 **NOTE**:

@@ -53,7 +54,7 @@ For the detailed compilation directions of different development environments, p
 The structure of the inference library is as follows:

 ```
-inference_lite_lib.android.armv8/
+inference_lite_lib.android.armv8.clang.c++_static.with_extra.with_cv/
 |-- cxx                                                    C++ inference library and header files
 |   |-- include                                            C++ header files
 |   |   |-- paddle_api.h
@@ -148,6 +149,23 @@ paddle_lite_opt --model_file=./MobileNetV3_large_x1_0_infer/inference.pdmodel --
 ```

 When the above code command is completed, there will be ``MobileNetV3_large_x1_0.nb` in the current directory, which is the converted model file.
+<a name="2.1.4"></a>
+
+#### 2.1.4 Compile to get the executable file clas_system
+
+```shell
+# Clone the Autolog repository to get automation logs
+cd PaddleClas_root_path
+cd deploy/lite/
+git clone https://github.com/LDOUBLEV/AutoLog.git
+```
+
+```shell
+# Compile
+make -j
+```
+
+After executing the `make` command, the `clas_system` executable file is generated in the current directory, which is used for Lite prediction.

 <a name="2.2"></a>
 ## 2.2 Run optimized model on Phone
@@ -172,7 +190,7 @@ When the above code command is completed, there will be ``MobileNetV3_large_x1_0
    * Install ADB for windows
      If install ADB fo Windows, you need to download from Google's Android platform: [Download Link](https://developer.android.com/studio).

-    First, make sure the phone is connected to the computer, turn on the `USB debugging` option of the phone, and select the `file transfer` mode. Verify whether ADB is installed successfully as follows:
+3. First, make sure the phone is connected to the computer, turn on the `USB debugging` option of the phone, and select the `file transfer` mode. Verify whether ADB is installed successfully as follows:

    ```shell
    $ adb devices
@@ -183,42 +201,22 @@ When the above code command is completed, there will be ``MobileNetV3_large_x1_0

    If there is `device` output like the above, it means the installation was successful.

-4. Prepare optimized model, inference library files, test image and dictionary file used.
+4. Push the optimized model, prediction library file, test image and class map file to the phone.

 ```shell
-cd PaddleClas_root_path
-cd deploy/lite/
-
-# prepare.sh will put the inference library files, the test image and the dictionary files in demo/cxx/clas
-sh prepare.sh /{lite inference library path}/inference_lite_lib.android.armv8
-
-# enter the working directory of lite demo
-cd /{lite inference library path}/inference_lite_lib.android.armv8/
-cd demo/cxx/clas/
-
-# copy the C++ inference dynamic library file （ie. .so) to the debug folder
-cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/
+```shell
+adb shell mkdir -p /data/local/tmp/arm_cpu/
+adb push clas_system /data/local/tmp/arm_cpu/
+adb shell chmod +x /data/local/tmp/arm_cpu//clas_system
+adb push inference_lite_lib.android.armv8.clang.c++_static.with_extra.with_cv/cxx/lib/libpaddle_light_api_shared.so /data/local/tmp/arm_cpu/
+adb push MobileNetV3_large_x1_0.nb /data/local/tmp/arm_cpu/
+adb push config.txt /data/local/tmp/arm_cpu/
+adb push ../../ppcls/utils/imagenet1k_label_list.txt /data/local/tmp/arm_cpu/
+adb push imgs/tabby_cat.jpg /data/local/tmp/arm_cpu/
 ```

-The `prepare.sh` take `PaddleClas/deploy/lite/imgs/tabby_cat.jpg` as the test image, and copy it to the `demo/cxx/clas/debug/` directory.
-
 You should put the model that optimized by `paddle_lite_opt` under the `demo/cxx/clas/debug/` directory. In this example, use `MobileNetV3_large_x1_0.nb` model file generated in [2.1.3](#2.1.3).

-The structure of the clas demo is as follows after the above command is completed:
-
-```
-demo/cxx/clas/
-|-- debug/
-|   |--MobileNetV3_large_x1_0.nb                    class model
-|   |--tabby_cat.jpg                              test image
-|   |--imagenet1k_label_list.txt                    dictionary file
-|   |--libpaddle_light_api_shared.so              C++ .so file
-|   |--config.txt                                 config file
-|-- config.txt                                    config file
-|-- image_classfication.cpp                       source code
-|-- Makefile                                      compile file
-```
-
 **NOTE**:

 * `Imagenet1k_label_list.txt` is the category mapping file of the `ImageNet1k` dataset. If use a custom category, you need to replace the category mapping file.
@@ -229,33 +227,22 @@ clas_model_file ./MobileNetV3_large_x1_0.nb # path of model file
 label_path ./imagenet1k_label_list.txt      # path of category mapping file
 resize_short_size 256                       # the short side length after resize
 crop_size 224                               # side length used for inference after cropping
-
 visualize 0                                 # whether to visualize. If you set it to 1, an image file named 'clas_result.png' will be generated in the current directory.
+num_threads 1 # The number of threads, the default is 1
+precision FP32 # Precision type, you can choose FP32 or INT8, the default is FP32
+runtime_device arm_cpu # Device type, the default is arm_cpu
+enable_benchmark 0 # Whether to enable benchmark, the default is 0
+tipc_benchmark 0 # Whether to enable tipc_benchmark, the default is 0
 ```

 5. Run Model on Phone

-```shell
-# run compile to get the executable file 'clas_system'
-make -j
-
-# move the compiled executable file to the debug folder
-mv clas_system ./debug/
-
-# push the debug folder to Phone
-adb push debug /data/local/tmp/
+Execute the following command to complete the prediction on the mobile phone.

-adb shell
-cd /data/local/tmp/debug
-export LD_LIBRARY_PATH=/data/local/tmp/debug:$LD_LIBRARY_PATH
-
-# the usage of clas_system is as follows:
-# ./clas_system "path of config file" "path of test image"
-./clas_system ./config.txt ./tabby_cat.jpg
+```shell
+adb shell 'export LD_LIBRARY_PATH=/data/local/tmp/arm_cpu/; /data/local/tmp/arm_cpu/clas_system /data/local/tmp/arm_cpu/config.txt /data/local/tmp/arm_cpu/tabby_cat.jpg'
 ```

-**NOTE**: If you make changes to the code, you need to recompile and repush the `debug ` folder to the phone.
-
 The result is as follows:

 ![](../../images/inference_deployment/lite_demo_result.png)

--- a/docs/en/models/MobileViT_en.md
+++ b/docs/en/models/MobileViT_en.md
+# MobileviT
+---
+## Catalogue
+
+* [1. Overview](#1)
+* [2. Accuracy, FLOPs and Parameters](#2)
+
+<a name='1'></a>
+
+## 1. Overview
+
+MobileViT is a lightweight visual Transformer network that can be used as a general backbone network in the field of computer vision. MobileViT combines the advantages of CNN and Transformer, which can better deal with global features and local features, and better solve the problem of lack of inductive bias in Transformer models.
+, and finally, under the same amount of parameters, compared with other SOTA models, the tasks of image classification, object detection, and semantic segmentation have been greatly improved. [Paper](https://arxiv.org/pdf/2110.02178.pdf)
+
+<a name='2'></a>
+
+## 2. Accuracy, FLOPs and Parameters
+
+| Models           | Top1 | Top5 | Reference<br>top1 | Reference<br>top5 | FLOPs<br>(M) | Params<br>(M) |
+|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
+| MobileViT_XXS    | 0.6867 | 0.8878 | 0.690 | - | 1849.35  | 5.59   |
+| MobileViT_XS    | 0.7454 | 0.9227 | 0.747 | - | 930.75  | 2.33   |
+| MobileViT_S    | 0.7814 | 0.9413 | 0.783 | - | 337.24  | 1.28   |
--- a/docs/images/ISE_pipeline.png
+++ b/docs/images/ISE_pipeline.png
--- a/docs/zh_CN/algorithm_introduction/ISE_ReID.md
+++ b/docs/zh_CN/algorithm_introduction/ISE_ReID.md
+# ISE
+---
+## 目录
+
+- [1. 介绍](#1)
+- [2. 在Market1501和MSMT17上的结果](#2)
+- [3. 测试](#3)
+- [4. 引用](#4)
+
+<a name='1'></a>
+## 1. 介绍
+
+ISE (Implicit Sample Extension)是一种简单、高效、有效的无监督行人再识别学习算法。ISE在聚类蔟边界周围生成样本，我们称之为支持样本。ISE的样本生成过程依赖于两个关键机制，即渐进线性插值策略（progressive linear interpolation）和标签保留的损失函数（label-preserving loss function）。ISE生成的支持样本提供了额外补充信息，可以很好地处理“子类和混合”的聚类错误。ISE在Market1501和MSMT17数据集上取得了优于其他无监督方法的性能。
+
+> [**Implicit Sample Extension for Unsupervised Person Re-Identification**](https://arxiv.org/abs/2204.06892v1)<br>
+> Xinyu Zhang, Dongdong Li, Zhigang Wang, Jian Wang, Errui Ding, Javen Qinfeng Shi, Zhaoxiang Zhang, Jingdong Wang<br>
+> CVPR2022
+
+![image](../../images/ISE_ReID/ISE_pipeline.png)
+
+
+<a name='2'></a>
+## 2. 在Market1501和MSMT17上的结果
+
+在Market1501和MSMT17上的主要结果。“PIL”表示渐进线性插值策略。“LP”表示标签保留的损失函数。
+
+| 方法 | Market1501 | 下载链接 | MSMT17 | 下载链接 |
+| --- | -- | -- | -- | - |
+| Baseline | 82.5 (92.5) | - | 30.1 (58.6) | - |
+| ISE (+PIL) | 83.9 (93.9) | - | 33.5 (63.9) | - |
+| ISE (+LP)  | 83.6 (92.7) | - | 31.4 (59.9) | - |
+| ISE (Ours) (+PIL+LP) | **84.7 (94.0)** | [ISE_M](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ISE_M_model.pdparams) | **35.0 (64.7)** | [ISE_MS](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ISE_MS_model.pdparams) |
+
+
+<a name="3"></a>
+## 3. 测试
+我们很快会提供训练代码，首先我们提供了测试代码和模型。
+
+**测试：** 可简使用如下脚本进行模型评估。
+
+```
+python tools/eval.py -c ./ppcls/configs/Person/ResNet50_UReID_infer.yaml
+```
+**步骤：**
+1. 首先下载模型，并放入：```./pd_model_trace/ISE/```。
+2. 改变```./ppcls/configs/Person/ResNet50_UReID_infer.yaml```中的数据集名称。
+3. 运行上述脚本。
+
+
+<a name="4"></a>
+## 4. 引用
+
+如果ISE在您的研究中有启发，请考虑引用我们的论文:
+
+```
+@inproceedings{zhang2022Implicit,
+  title={Implicit Sample Extension for Unsupervised Person Re-Identification},
+  author={Xinyu Zhang, Dongdong Li, Zhigang Wang, Jian Wang, Errui Ding, Javen Qinfeng Shi, Zhaoxiang Zhang, Jingdong Wang},
+  booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year={2022}
+}
+```
--- a/docs/zh_CN/algorithm_introduction/ImageNet_models.md
+++ b/docs/zh_CN/algorithm_introduction/ImageNet_models.md
@@ -32,7 +32,8 @@
 - [22. TNT 系列](#22)
 - [23. CSwinTransformer 系列](#23)
 - [24. PVTV2 系列](#24)
- [25. 其他模型](#25)
+- [25. MobileViT 系列](#25)
+- [26. 其他模型](#26)
 - [参考文献](#reference)

 <a name="1"></a>
@@ -533,10 +534,21 @@ ViT(Vision Transformer) 与 DeiT（Data-efficient Image Transformers）系列模
 | PVT_V2_B5    | 0.837 | 0.966 | - | - | - | 11.4 | 82.0 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/PVT_V2_B5_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/PVT_V2_B5_infer.tar) |


-
 <a name="25"></a>

-## 25. 其他模型
+## 25. MobileViT 系列 <sup>[[42](#ref42)]</sup>
+
+关于 MobileViT 系列模型的精度、速度指标如下表所示，更多介绍可以参考：[MobileViT 系列模型文档](../models/MobileViT.md)。
+
+| 模型       | Top-1 Acc | Top-5 Acc | time(ms)<br>bs=1 | time(ms)<br>bs=4 | time(ms)<br/>bs=8 | FLOPs(M) | Params(M) | 预训练模型下载地址                                               | inference模型下载地址                                      |
+| ---------- | --------- | --------- | ---------------- | ---------------- | -------- | --------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ |
+|  MobileViT_XXS    | 0.6867 | 0.8878 | - | - | - | 1849.35  |  5.59   | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileViT_XXS_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileViT_XXS_infer.tar) |
+|  MobileViT_XS    | 0.7454 | 0.9227 | - | - | - | 930.75  |  2.33   | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileViT_XS_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileViT_XS_infer.tar) |
+|  MobileViT_S    | 0.7814 | 0.9413 | - | - | - | 337.24  |   1.28   | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileViT_S_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/MobileViT_S_infer.tar) |
+
+<a name="26"></a>
+
+## 26. 其他模型

 关于 AlexNet <sup>[[18](#ref18)]</sup>、SqueezeNet 系列 <sup>[[19](#ref19)]</sup>、VGG 系列 <sup>[[20](#ref20)]</sup>、DarkNet53 <sup>[[21](#ref21)]</sup> 等模型的精度、速度指标如下表所示，更多介绍可以参考：[其他模型文档](../models/Others.md)。

@@ -637,3 +649,5 @@ TRANSFORMERS FOR IMAGE RECOGNITION AT SCALE.
 <a name="ref40">[40]</a>Xiaoyi Dong, Jianmin Bao, Dongdong Chen, Weiming Zhang, Nenghai Yu, Lu Yuan, Dong Chen, Baining Guo. CSWin Transformer: A General Vision Transformer Backbone with Cross-Shaped Windows.

 <a name="ref41">[41]</a>Wenhai Wang, Enze Xie, Xiang Li, Deng-Ping Fan, Kaitao Song, Ding Liang, Tong Lu, Ping Luo, Ling Shao. PVTv2: Improved Baselines with Pyramid Vision Transformer.
+
+<a name="ref42">[42]</a>Sachin Mehta, Mohammad Rastegari. MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer.
--- a/docs/zh_CN/inference_deployment/paddle_lite_deploy.md
+++ b/docs/zh_CN/inference_deployment/paddle_lite_deploy.md
--- a/docs/zh_CN/models/MobileViT.md
+++ b/docs/zh_CN/models/MobileViT.md
+# MobileviT
+---
+## 目录
+
+* [1. 概述](#1)
+* [2. 精度、FLOPs 和参数量](#2)
+
+<a name='1'></a>
+
+## 1. 概述
+
+MobileViT 是一个轻量级的视觉 Transformer 网络，可以用作计算机视觉领域的通用骨干网路。 MobileViT 结合了 CNN 和 Transformer 的优势，可以更好的处理全局特征和局部特征，更好地解决 Transformer 模型缺乏归纳偏置的问题，最终，在同样参数量下，与其他 SOTA 模型相比，在图像分类、目标检测、语义分割任务上都有大幅提升。[论文地址](https://arxiv.org/pdf/2110.02178.pdf)。
+
+<a name='2'></a>
+
+## 2. 精度、FLOPs 和参数量
+
+| Models           | Top1 | Top5 | Reference<br>top1 | Reference<br>top5 | FLOPs<br>(M) | Params<br>(M) |
+|:--:|:--:|:--:|:--:|:--:|:--:|:--:|
+| MobileViT_XXS    | 0.6867 | 0.8878 | 0.690 | - | 1849.35  | 5.59   |
+| MobileViT_XS    | 0.7454 | 0.9227 | 0.747 | - | 930.75  | 2.33   |
+| MobileViT_S    | 0.7814 | 0.9413 | 0.783 | - | 337.24  | 1.28   |
--- a/docs/zh_CN/samples/Goods_Recognition/README.md
+++ b/docs/zh_CN/samples/Goods_Recognition/README.md
+**商品识别**，即在智能零售场景下精准快速的识别商品类别、商品属性等。当下零售市场存在降低人力及运营成本、实现24小时不间断营业等需求，商品识别可以帮助越来越多的零售门店实现智慧零售数字化转型。在智慧零售概念火爆的今天，商品识别具有非常广阔的应用场景，如**货架陈列分析**、**智能结算**、**仓库管理**、**以图搜图**等。  
+
+本案例使用了飞桨图像分类开发套件PaddleClas中的通用图像识别系统PP-ShiTu进行**商品识别**的实现。
+
+![result](./imgs/result.jpg)
+
+**注**: AI Studio在线运行代码请参考[智慧商超商品识别系统](https://aistudio.baidu.com/aistudio/projectdetail/3460304)
\ No newline at end of file
--- a/docs/zh_CN/samples/Goods_Recognition/imgs/result.jpg
+++ b/docs/zh_CN/samples/Goods_Recognition/imgs/result.jpg
--- a/ppcls/arch/backbone/__init__.py
+++ b/ppcls/arch/backbone/__init__.py
@@ -62,7 +62,9 @@ from ppcls.arch.backbone.model_zoo.tnt import TNT_small
 from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds
 from ppcls.arch.backbone.model_zoo.cspnet import CSPDarkNet53
 from ppcls.arch.backbone.model_zoo.pvt_v2 import PVT_V2_B0, PVT_V2_B1, PVT_V2_B2_Linear, PVT_V2_B2, PVT_V2_B3, PVT_V2_B4, PVT_V2_B5
+from ppcls.arch.backbone.model_zoo.mobilevit import MobileViT_XXS, MobileViT_XS, MobileViT_S
 from ppcls.arch.backbone.model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG_B1, RepVGG_B2, RepVGG_B1g2, RepVGG_B1g4, RepVGG_B2g4, RepVGG_B3g4
+from ppcls.arch.backbone.model_zoo.van import VAN_tiny
 from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1
 from ppcls.arch.backbone.variant_models.vgg_variant import VGG19Sigmoid
 from ppcls.arch.backbone.variant_models.pp_lcnet_variant import PPLCNet_x2_5_Tanh

--- a/ppcls/arch/backbone/legendary_models/resnet.py
+++ b/ppcls/arch/backbone/legendary_models/resnet.py
@@ -276,6 +276,7 @@ class ResNet(TheseusLayer):
                 config,
                 stages_pattern,
                 version="vb",
+                 stem_act="relu",
                 class_num=1000,
                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
                 data_format="NCHW",
@@ -310,7 +311,7 @@ class ResNet(TheseusLayer):
            [[input_image_channel, 32, 3, 2], [32, 32, 3, 1], [32, 64, 3, 1]]
        }

-        self.stem = nn.Sequential(* [
+        self.stem = nn.Sequential(*[
            ConvBNLayer(
                num_channels=in_c,
                num_filters=out_c,

--- a/ppcls/arch/backbone/model_zoo/mobilevit.py
+++ b/ppcls/arch/backbone/model_zoo/mobilevit.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Code was based on https://github.com/BR-IDL/PaddleViT/blob/develop/image_classification/MobileViT/mobilevit.py
+# and https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py
+
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle.nn.initializer import KaimingUniform, TruncatedNormal, Constant
+import math
+
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+
+MODEL_URLS = {
+    "MobileViT_XXS":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileViT_XXS_pretrained.pdparams",
+    "MobileViT_XS":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileViT_XS_pretrained.pdparams",
+    "MobileViT_S":
+    "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileViT_S_pretrained.pdparams",
+}
+
+
+def _init_weights_linear():
+    weight_attr = ParamAttr(initializer=TruncatedNormal(std=.02))
+    bias_attr = ParamAttr(initializer=Constant(0.0))
+    return weight_attr, bias_attr
+
+
+def _init_weights_layernorm():
+    weight_attr = ParamAttr(initializer=Constant(1.0))
+    bias_attr = ParamAttr(initializer=Constant(0.0))
+    return weight_attr, bias_attr
+
+
+class ConvBnAct(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=0,
+                 bias_attr=False,
+                 groups=1):
+        super().__init__()
+        self.in_channels = in_channels
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(initializer=KaimingUniform()),
+            bias_attr=bias_attr)
+        self.norm = nn.BatchNorm2D(out_channels)
+        self.act = nn.Silu()
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        out = self.norm(out)
+        out = self.act(out)
+        return out
+
+
+class Identity(nn.Layer):
+    """ Identity layer"""
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, inputs):
+        return inputs
+
+
+class Mlp(nn.Layer):
+    def __init__(self, embed_dim, mlp_ratio, dropout=0.1):
+        super().__init__()
+        w_attr_1, b_attr_1 = _init_weights_linear()
+        self.fc1 = nn.Linear(
+            embed_dim,
+            int(embed_dim * mlp_ratio),
+            weight_attr=w_attr_1,
+            bias_attr=b_attr_1)
+
+        w_attr_2, b_attr_2 = _init_weights_linear()
+        self.fc2 = nn.Linear(
+            int(embed_dim * mlp_ratio),
+            embed_dim,
+            weight_attr=w_attr_2,
+            bias_attr=b_attr_2)
+
+        self.act = nn.Silu()
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.dropout1(x)
+        x = self.fc2(x)
+        x = self.dropout2(x)
+        return x
+
+
+class Attention(nn.Layer):
+    def __init__(self,
+                 embed_dim,
+                 num_heads,
+                 qkv_bias=True,
+                 dropout=0.1,
+                 attention_dropout=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        self.attn_head_dim = int(embed_dim / self.num_heads)
+        self.all_head_dim = self.attn_head_dim * self.num_heads
+
+        w_attr_1, b_attr_1 = _init_weights_linear()
+        self.qkv = nn.Linear(
+            embed_dim,
+            self.all_head_dim * 3,
+            weight_attr=w_attr_1,
+            bias_attr=b_attr_1 if qkv_bias else False)
+
+        self.scales = self.attn_head_dim**-0.5
+
+        w_attr_2, b_attr_2 = _init_weights_linear()
+        self.proj = nn.Linear(
+            embed_dim, embed_dim, weight_attr=w_attr_2, bias_attr=b_attr_2)
+
+        self.attn_dropout = nn.Dropout(attention_dropout)
+        self.proj_dropout = nn.Dropout(dropout)
+        self.softmax = nn.Softmax(axis=-1)
+
+    def transpose_multihead(self, x):
+        B, P, N, d = x.shape
+        x = x.reshape([B, P, N, self.num_heads, d // self.num_heads])
+        x = x.transpose([0, 1, 3, 2, 4])
+        return x
+
+    def forward(self, x):
+        b_sz, n_patches, in_channels = x.shape
+        qkv = self.qkv(x)
+        qkv = qkv.reshape([
+            b_sz, n_patches, 3, self.num_heads,
+            qkv.shape[-1] // self.num_heads // 3
+        ])
+        qkv = qkv.transpose([0, 3, 2, 1, 4])
+        query, key, value = qkv[:, :, 0], qkv[:, :, 1], qkv[:, :, 2]
+        query = query * self.scales
+        key = key.transpose([0, 1, 3, 2])
+        # QK^T
+        attn = paddle.matmul(query, key)
+        attn = self.softmax(attn)
+        attn = self.attn_dropout(attn)
+        # weighted sum
+        out = paddle.matmul(attn, value)
+        out = out.transpose([0, 2, 1, 3]).reshape(
+            [b_sz, n_patches, out.shape[1] * out.shape[3]])
+        out = self.proj(out)
+        out = self.proj_dropout(out)
+        return out
+
+
+class EncoderLayer(nn.Layer):
+    def __init__(self,
+                 embed_dim,
+                 num_heads=4,
+                 qkv_bias=True,
+                 mlp_ratio=2.0,
+                 dropout=0.1,
+                 attention_dropout=0.,
+                 droppath=0.):
+        super().__init__()
+        w_attr_1, b_attr_1 = _init_weights_layernorm()
+        w_attr_2, b_attr_2 = _init_weights_layernorm()
+
+        self.attn_norm = nn.LayerNorm(
+            embed_dim, weight_attr=w_attr_1, bias_attr=b_attr_1)
+        self.attn = Attention(embed_dim, num_heads, qkv_bias, dropout,
+                              attention_dropout)
+        self.drop_path = DropPath(droppath) if droppath > 0. else Identity()
+        self.mlp_norm = nn.LayerNorm(
+            embed_dim, weight_attr=w_attr_2, bias_attr=b_attr_2)
+        self.mlp = Mlp(embed_dim, mlp_ratio, dropout)
+
+    def forward(self, x):
+        h = x
+        x = self.attn_norm(x)
+        x = self.attn(x)
+        x = self.drop_path(x)
+        x = h + x
+        h = x
+        x = self.mlp_norm(x)
+        x = self.mlp(x)
+        x = self.drop_path(x)
+        x = x + h
+        return x
+
+
+class Transformer(nn.Layer):
+    """Transformer block for MobileViTBlock"""
+
+    def __init__(self,
+                 embed_dim,
+                 num_heads,
+                 depth,
+                 qkv_bias=True,
+                 mlp_ratio=2.0,
+                 dropout=0.1,
+                 attention_dropout=0.,
+                 droppath=0.):
+        super().__init__()
+        depth_decay = [x.item() for x in paddle.linspace(0, droppath, depth)]
+
+        layer_list = []
+        for i in range(depth):
+            layer_list.append(
+                EncoderLayer(embed_dim, num_heads, qkv_bias, mlp_ratio,
+                             dropout, attention_dropout, droppath))
+        self.layers = nn.LayerList(layer_list)
+
+        w_attr_1, b_attr_1 = _init_weights_layernorm()
+        self.norm = nn.LayerNorm(
+            embed_dim, weight_attr=w_attr_1, bias_attr=b_attr_1, epsilon=1e-6)
+
+    def forward(self, x):
+        for layer in self.layers:
+            x = layer(x)
+        out = self.norm(x)
+        return out
+
+
+class MobileV2Block(nn.Layer):
+    """Mobilenet v2 InvertedResidual block"""
+
+    def __init__(self, inp, oup, stride=1, expansion=4):
+        super().__init__()
+        self.stride = stride
+        assert stride in [1, 2]
+
+        hidden_dim = int(round(inp * expansion))
+        self.use_res_connect = self.stride == 1 and inp == oup
+
+        layers = []
+        if expansion != 1:
+            layers.append(ConvBnAct(inp, hidden_dim, kernel_size=1))
+
+        layers.extend([
+            # dw
+            ConvBnAct(
+                hidden_dim,
+                hidden_dim,
+                stride=stride,
+                groups=hidden_dim,
+                padding=1),
+            # pw-linear
+            nn.Conv2D(
+                hidden_dim, oup, 1, 1, 0, bias_attr=False),
+            nn.BatchNorm2D(oup),
+        ])
+
+        self.conv = nn.Sequential(*layers)
+        self.out_channels = oup
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        return self.conv(x)
+
+
+class MobileViTBlock(nn.Layer):
+    """ MobileViTBlock for MobileViT"""
+
+    def __init__(self,
+                 dim,
+                 hidden_dim,
+                 depth,
+                 num_heads=4,
+                 qkv_bias=True,
+                 mlp_ratio=2.0,
+                 dropout=0.1,
+                 attention_dropout=0.,
+                 droppath=0.0,
+                 patch_size=(2, 2)):
+        super().__init__()
+        self.patch_h, self.patch_w = patch_size
+
+        # local representations
+        self.conv1 = ConvBnAct(dim, dim, padding=1)
+        self.conv2 = nn.Conv2D(
+            dim, hidden_dim, kernel_size=1, stride=1, bias_attr=False)
+        # global representations
+        self.transformer = Transformer(
+            embed_dim=hidden_dim,
+            num_heads=num_heads,
+            depth=depth,
+            qkv_bias=qkv_bias,
+            mlp_ratio=mlp_ratio,
+            dropout=dropout,
+            attention_dropout=attention_dropout,
+            droppath=droppath)
+
+        # fusion
+        self.conv3 = ConvBnAct(hidden_dim, dim, kernel_size=1)
+        self.conv4 = ConvBnAct(2 * dim, dim, padding=1)
+
+    def forward(self, x):
+        h = x
+        x = self.conv1(x)
+        x = self.conv2(x)
+
+        patch_h = self.patch_h
+        patch_w = self.patch_w
+        patch_area = int(patch_w * patch_h)
+        _, in_channels, orig_h, orig_w = x.shape
+        new_h = int(math.ceil(orig_h / self.patch_h) * self.patch_h)
+        new_w = int(math.ceil(orig_w / self.patch_w) * self.patch_w)
+        interpolate = False
+
+        if new_w != orig_w or new_h != orig_h:
+            x = F.interpolate(x, size=[new_h, new_w], mode="bilinear")
+            interpolate = True
+
+        num_patch_w, num_patch_h = new_w // patch_w, new_h // patch_h
+        num_patches = num_patch_h * num_patch_w
+        reshaped_x = x.reshape([-1, patch_h, num_patch_w, patch_w])
+        transposed_x = reshaped_x.transpose([0, 2, 1, 3])
+        reshaped_x = transposed_x.reshape(
+            [-1, in_channels, num_patches, patch_area])
+        transposed_x = reshaped_x.transpose([0, 3, 2, 1])
+
+        x = transposed_x.reshape([-1, num_patches, in_channels])
+        x = self.transformer(x)
+        x = x.reshape([-1, patch_h * patch_w, num_patches, in_channels])
+
+        _, pixels, num_patches, channels = x.shape
+        x = x.transpose([0, 3, 2, 1])
+        x = x.reshape([-1, num_patch_w, patch_h, patch_w])
+        x = x.transpose([0, 2, 1, 3])
+        x = x.reshape(
+            [-1, channels, num_patch_h * patch_h, num_patch_w * patch_w])
+
+        if interpolate:
+            x = F.interpolate(x, size=[orig_h, orig_w])
+        x = self.conv3(x)
+        x = paddle.concat((h, x), axis=1)
+        x = self.conv4(x)
+        return x
+
+
+class MobileViT(nn.Layer):
+    """ MobileViT
+        A PaddlePaddle impl of : `MobileViT: Light-weight, General-purpose, and Mobile-friendly Vision Transformer`  -
+          https://arxiv.org/abs/2110.02178
+    """
+
+    def __init__(self,
+                 in_channels=3,
+                 dims=[16, 32, 48, 48, 48, 64, 80, 96, 384],
+                 hidden_dims=[96, 120, 144],
+                 mv2_expansion=4,
+                 class_num=1000):
+        super().__init__()
+        self.conv3x3 = ConvBnAct(
+            in_channels, dims[0], kernel_size=3, stride=2, padding=1)
+        self.mv2_block_1 = MobileV2Block(
+            dims[0], dims[1], expansion=mv2_expansion)
+        self.mv2_block_2 = MobileV2Block(
+            dims[1], dims[2], stride=2, expansion=mv2_expansion)
+        self.mv2_block_3 = MobileV2Block(
+            dims[2], dims[3], expansion=mv2_expansion)
+        self.mv2_block_4 = MobileV2Block(
+            dims[3], dims[4], expansion=mv2_expansion)
+
+        self.mv2_block_5 = MobileV2Block(
+            dims[4], dims[5], stride=2, expansion=mv2_expansion)
+        self.mvit_block_1 = MobileViTBlock(dims[5], hidden_dims[0], depth=2)
+
+        self.mv2_block_6 = MobileV2Block(
+            dims[5], dims[6], stride=2, expansion=mv2_expansion)
+        self.mvit_block_2 = MobileViTBlock(dims[6], hidden_dims[1], depth=4)
+
+        self.mv2_block_7 = MobileV2Block(
+            dims[6], dims[7], stride=2, expansion=mv2_expansion)
+        self.mvit_block_3 = MobileViTBlock(dims[7], hidden_dims[2], depth=3)
+        self.conv1x1 = ConvBnAct(dims[7], dims[8], kernel_size=1)
+
+        self.pool = nn.AdaptiveAvgPool2D(1)
+        self.dropout = nn.Dropout(0.1)
+        self.linear = nn.Linear(dims[8], class_num)
+
+    def forward(self, x):
+        x = self.conv3x3(x)
+        x = self.mv2_block_1(x)
+        x = self.mv2_block_2(x)
+        x = self.mv2_block_3(x)
+        x = self.mv2_block_4(x)
+
+        x = self.mv2_block_5(x)
+        x = self.mvit_block_1(x)
+
+        x = self.mv2_block_6(x)
+        x = self.mvit_block_2(x)
+
+        x = self.mv2_block_7(x)
+        x = self.mvit_block_3(x)
+        x = self.conv1x1(x)
+
+        x = self.pool(x)
+        x = x.reshape(x.shape[:2])
+
+        x = self.dropout(x)
+        x = self.linear(x)
+        return x
+
+
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+
+
+def MobileViT_XXS(pretrained=False, use_ssld=False, **kwargs):
+    model = MobileViT(
+        in_channels=3,
+        dims=[16, 16, 24, 24, 24, 48, 64, 80, 320],
+        hidden_dims=[64, 80, 96],
+        mv2_expansion=2,
+        **kwargs)
+
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileViT_XXS"], use_ssld=use_ssld)
+    return model
+
+
+def MobileViT_XS(pretrained=False, use_ssld=False, **kwargs):
+    model = MobileViT(
+        in_channels=3,
+        dims=[16, 32, 48, 48, 48, 64, 80, 96, 384],
+        hidden_dims=[96, 120, 144],
+        mv2_expansion=4,
+        **kwargs)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileViT_XS"], use_ssld=use_ssld)
+    return model
+
+
+def MobileViT_S(pretrained=False, use_ssld=False, **kwargs):
+    model = MobileViT(
+        in_channels=3,
+        dims=[16, 32, 64, 64, 64, 96, 128, 160, 640],
+        hidden_dims=[144, 192, 240],
+        mv2_expansion=4,
+        **kwargs)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["MobileViT_S"], use_ssld=use_ssld)
+    return model
--- a/ppcls/arch/backbone/model_zoo/van.py
+++ b/ppcls/arch/backbone/model_zoo/van.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Code was heavily based on https://github.com/Visual-Attention-Network/VAN-Classification
+
+from functools import partial
+import math
+import paddle
+import paddle.nn as nn
+from paddle.nn.initializer import TruncatedNormal, Constant
+
+from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
+
+MODEL_URLS = {
+    "VAN_tiny": "",  # TODO
+}
+
+__all__ = list(MODEL_URLS.keys())
+
+trunc_normal_ = TruncatedNormal(std=.02)
+zeros_ = Constant(value=0.)
+ones_ = Constant(value=1.)
+
+
+def drop_path(x, drop_prob=0., training=False):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = paddle.to_tensor(1 - drop_prob)
+    shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
+    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
+    random_tensor = paddle.floor(random_tensor)  # binarize
+    output = x.divide(keep_prob) * random_tensor
+    return output
+
+
+class DropPath(nn.Layer):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+
+
+@paddle.jit.not_to_static
+def swapdim(x, dim1, dim2):
+    a = list(range(len(x.shape)))
+    a[dim1], a[dim2] = a[dim2], a[dim1]
+    return x.transpose(a)
+
+
+class Mlp(nn.Layer):
+    def __init__(self,
+                 in_features,
+                 hidden_features=None,
+                 out_features=None,
+                 act_layer=nn.GELU,
+                 drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Conv2D(in_features, hidden_features, 1)
+        self.dwconv = DWConv(hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Conv2D(hidden_features, out_features, 1)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.dwconv(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class LKA(nn.Layer):
+    def __init__(self, dim):
+        super().__init__()
+        self.conv0 = nn.Conv2D(dim, dim, 5, padding=2, groups=dim)
+        self.conv_spatial = nn.Conv2D(
+            dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3)
+        self.conv1 = nn.Conv2D(dim, dim, 1)
+
+    def forward(self, x):
+        attn = self.conv0(x)
+        attn = self.conv_spatial(attn)
+        attn = self.conv1(attn)
+        return x * attn
+
+
+class Attention(nn.Layer):
+    def __init__(self, d_model):
+        super().__init__()
+        self.proj_1 = nn.Conv2D(d_model, d_model, 1)
+        self.activation = nn.GELU()
+        self.spatial_gating_unit = LKA(d_model)
+        self.proj_2 = nn.Conv2D(d_model, d_model, 1)
+
+    def forward(self, x):
+        shorcut = x
+        x = self.proj_1(x)
+        x = self.activation(x)
+        x = self.spatial_gating_unit(x)
+        x = self.proj_2(x)
+        x = x + shorcut
+        return x
+
+
+class Block(nn.Layer):
+    def __init__(self,
+                 dim,
+                 mlp_ratio=4.,
+                 drop=0.,
+                 drop_path=0.,
+                 act_layer=nn.GELU):
+        super().__init__()
+        self.norm1 = nn.BatchNorm2D(dim)
+        self.attn = Attention(dim)
+        self.drop_path = DropPath(
+            drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = nn.BatchNorm2D(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim,
+                       hidden_features=mlp_hidden_dim,
+                       act_layer=act_layer,
+                       drop=drop)
+        layer_scale_init_value = 1e-2
+        self.layer_scale_1 = self.create_parameter(
+            shape=[dim, 1, 1],
+            default_initializer=Constant(value=layer_scale_init_value))
+        self.layer_scale_2 = self.create_parameter(
+            shape=[dim, 1, 1],
+            default_initializer=Constant(value=layer_scale_init_value))
+
+    def forward(self, x):
+        x = x + self.drop_path(self.layer_scale_1 * self.attn(self.norm1(x)))
+        x = x + self.drop_path(self.layer_scale_2 * self.mlp(self.norm2(x)))
+        return x
+
+
+class OverlapPatchEmbed(nn.Layer):
+    """ Image to Patch Embedding
+    """
+
+    def __init__(self,
+                 img_size=224,
+                 patch_size=7,
+                 stride=4,
+                 in_chans=3,
+                 embed_dim=768):
+        super().__init__()
+        self.proj = nn.Conv2D(
+            in_chans,
+            embed_dim,
+            kernel_size=patch_size,
+            stride=stride,
+            padding=patch_size // 2)
+        self.norm = nn.BatchNorm2D(embed_dim)
+
+    def forward(self, x):
+        x = self.proj(x)
+        _, _, H, W = x.shape
+        x = self.norm(x)
+        return x, H, W
+
+
+class VAN(nn.Layer):
+    r""" VAN
+    A PaddlePaddle impl of : `Visual Attention Network`  -
+      https://arxiv.org/pdf/2202.09741.pdf
+    """
+
+    def __init__(self,
+                 img_size=224,
+                 in_chans=3,
+                 class_num=1000,
+                 embed_dims=[64, 128, 256, 512],
+                 mlp_ratios=[4, 4, 4, 4],
+                 drop_rate=0.,
+                 drop_path_rate=0.,
+                 norm_layer=nn.LayerNorm,
+                 depths=[3, 4, 6, 3],
+                 num_stages=4,
+                 flag=False):
+        super().__init__()
+        if flag == False:
+            self.class_num = class_num
+        self.depths = depths
+        self.num_stages = num_stages
+
+        dpr = [x for x in paddle.linspace(0, drop_path_rate, sum(depths))
+               ]  # stochastic depth decay rule
+        cur = 0
+
+        for i in range(num_stages):
+            patch_embed = OverlapPatchEmbed(
+                img_size=img_size if i == 0 else img_size // (2**(i + 1)),
+                patch_size=7 if i == 0 else 3,
+                stride=4 if i == 0 else 2,
+                in_chans=in_chans if i == 0 else embed_dims[i - 1],
+                embed_dim=embed_dims[i])
+
+            block = nn.LayerList([
+                Block(
+                    dim=embed_dims[i],
+                    mlp_ratio=mlp_ratios[i],
+                    drop=drop_rate,
+                    drop_path=dpr[cur + j]) for j in range(depths[i])
+            ])
+            norm = norm_layer(embed_dims[i])
+            cur += depths[i]
+
+            setattr(self, f"patch_embed{i + 1}", patch_embed)
+            setattr(self, f"block{i + 1}", block)
+            setattr(self, f"norm{i + 1}", norm)
+
+        # classification head
+        self.head = nn.Linear(embed_dims[3],
+                              class_num) if class_num > 0 else nn.Identity()
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                zeros_(m.bias)
+        elif isinstance(m, nn.LayerNorm):
+            zeros_(m.bias)
+            ones_(m.weight)
+        elif isinstance(m, nn.Conv2D):
+            fan_out = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
+            fan_out //= m._groups
+            m.weight.set_value(
+                paddle.normal(
+                    std=math.sqrt(2.0 / fan_out), shape=m.weight.shape))
+            if m.bias is not None:
+                zeros_(m.bias)
+
+    def forward_features(self, x):
+        B = x.shape[0]
+
+        for i in range(self.num_stages):
+            patch_embed = getattr(self, f"patch_embed{i + 1}")
+            block = getattr(self, f"block{i + 1}")
+            norm = getattr(self, f"norm{i + 1}")
+            x, H, W = patch_embed(x)
+            for blk in block:
+                x = blk(x)
+            x = x.flatten(2)
+            x = swapdim(x, 1, 2)
+            x = norm(x)
+            if i != self.num_stages - 1:
+                x = x.reshape([B, H, W, x.shape[2]]).transpose([0, 3, 1, 2])
+
+        return x.mean(axis=1)
+
+    def forward(self, x):
+        x = self.forward_features(x)
+        x = self.head(x)
+
+        return x
+
+
+class DWConv(nn.Layer):
+    def __init__(self, dim=768):
+        super().__init__()
+        self.dwconv = nn.Conv2D(dim, dim, 3, 1, 1, bias_attr=True, groups=dim)
+
+    def forward(self, x):
+        x = self.dwconv(x)
+        return x
+
+
+def _load_pretrained(pretrained, model, model_url, use_ssld=False):
+    if pretrained is False:
+        pass
+    elif pretrained is True:
+        load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
+    elif isinstance(pretrained, str):
+        load_dygraph_pretrain(model, pretrained)
+    else:
+        raise RuntimeError(
+            "pretrained type is not available. Please use `string` or `boolean` type."
+        )
+
+
+def VAN_tiny(pretrained=False, use_ssld=False, **kwargs):
+    model = VAN(embed_dims=[32, 64, 160, 256],
+                mlp_ratios=[8, 8, 4, 4],
+                norm_layer=partial(
+                    nn.LayerNorm, epsilon=1e-6),
+                depths=[3, 3, 5, 2],
+                **kwargs)
+    _load_pretrained(
+        pretrained, model, MODEL_URLS["VAN_tiny"], use_ssld=use_ssld)
+    return model
--- a/ppcls/arch/gears/__init__.py
+++ b/ppcls/arch/gears/__init__.py
@@ -25,7 +25,8 @@ __all__ = ['build_gear']

 def build_gear(config):
    support_dict = [
-        'ArcMargin', 'CosMargin', 'CircleMargin', 'FC', 'VehicleNeck', 'Tanh', "BNNeck"
+        'ArcMargin', 'CosMargin', 'CircleMargin', 'FC', 'VehicleNeck', 'Tanh',
+        'BNNeck'
    ]
    module_name = config.pop('name')
    assert module_name in support_dict, Exception(

--- a/ppcls/arch/gears/bnneck.py
+++ b/ppcls/arch/gears/bnneck.py
-import paddle
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import, division, print_function

+import paddle
+import paddle.nn as nn

-class BNNeck(paddle.nn.Layer):
-    def __init__(self, num_filters, trainable=False):
-        super(BNNeck, self).__init__()
-        self.num_filters = num_filters

-        self.bn = paddle.nn.BatchNorm1D(self.num_filters)
-        # TODO: freeze bn.bias
-        # if not trainable:
-        #     self.bn.bias.trainable = False
+class BNNeck(nn.Layer):
+    def __init__(self, num_features):
+        super().__init__()
+        weight_attr = paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=1.0))
+        bias_attr = paddle.ParamAttr(
+            initializer=paddle.nn.initializer.Constant(value=0.0),
+            trainable=False)
+        self.feat_bn = nn.BatchNorm1D(
+            num_features,
+            momentum=0.9,
+            epsilon=1e-05,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr)
+        # TODO: set bnneck.bias learnable=False
+        self.flatten = nn.Flatten()

-    def forward(self, input, label=None):
-        out = self.bn(input)
-        return out
+    def forward(self, x):
+        x = self.flatten(x)
+        x = self.feat_bn(x)
+        return x
--- a/ppcls/arch/gears/fc.py
+++ b/ppcls/arch/gears/fc.py
@@ -19,16 +19,25 @@ from __future__ import print_function
 import paddle
 import paddle.nn as nn

+from ppcls.arch.utils import get_param_attr_dict
+

 class FC(nn.Layer):
-    def __init__(self, embedding_size, class_num, bias_attr=None):
+    def __init__(self, embedding_size, class_num, **kwargs):
        super(FC, self).__init__()
        self.embedding_size = embedding_size
        self.class_num = class_num
-        # TODO: hard code for initializer
+
        weight_attr = paddle.ParamAttr(
-            initializer=paddle.nn.initializer.Normal(std=0.001))
-        self.fc = paddle.nn.Linear(
+            initializer=paddle.nn.initializer.XavierNormal())
+        if 'weight_attr' in kwargs:
+            weight_attr = get_param_attr_dict(kwargs['weight_attr'])
+
+        bias_attr = None
+        if 'bias_attr' in kwargs:
+            bias_attr = get_param_attr_dict(kwargs['bias_attr'])
+
+        self.fc = nn.Linear(
            self.embedding_size,
            self.class_num,
            weight_attr=weight_attr,

--- a/ppcls/arch/utils.py
+++ b/ppcls/arch/utils.py
@@ -14,9 +14,11 @@

 import six
 import types
+import paddle
 from difflib import SequenceMatcher

 from . import backbone
+from typing import Any, Dict, Union


 def get_architectures():
@@ -31,8 +33,8 @@ def get_architectures():


 def get_blacklist_model_in_static_mode():
-    from ppcls.arch.backbone import distilled_vision_transformer
-    from ppcls.arch.backbone import vision_transformer
+    from ppcls.arch.backbone import (distilled_vision_transformer,
+                                     vision_transformer)
    blacklist = distilled_vision_transformer.__all__ + vision_transformer.__all__
    return blacklist

@@ -51,3 +53,47 @@ def similar_architectures(name='', names=[], thresh=0.1, topk=10):
    scores.sort(key=lambda x: x[1], reverse=True)
    similar_names = [names[s[0]] for s in scores[:min(topk, len(scores))]]
    return similar_names
+
+
+def get_param_attr_dict(ParamAttr_config: Union[None, bool, Dict[str, Dict]]
+                        ) -> Union[None, bool, paddle.ParamAttr]:
+    """parse ParamAttr from an dict
+
+    Args:
+        ParamAttr_config (Union[bool, Dict[str, Dict]]): ParamAttr_config
+
+    Returns:
+        Union[bool, paddle.ParamAttr]: Generated ParamAttr
+    """
+    if ParamAttr_config is None:
+        return None
+    if isinstance(ParamAttr_config, bool):
+        return ParamAttr_config
+    ParamAttr_dict = {}
+    if 'initializer' in ParamAttr_config:
+        initializer_cfg = ParamAttr_config.get('initializer')
+        if 'name' in initializer_cfg:
+            initializer_name = initializer_cfg.pop('name')
+            ParamAttr_dict['initializer'] = getattr(
+                paddle.nn.initializer, initializer_name)(**initializer_cfg)
+        else:
+            raise ValueError(f"'name' must specified in initializer_cfg")
+    if 'learning_rate' in ParamAttr_config:
+        # NOTE: only support an single value now
+        learning_rate_value = ParamAttr_config.get('learning_rate')
+        if isinstance(learning_rate_value, (int, float)):
+            ParamAttr_dict['learning_rate'] = learning_rate_value
+        else:
+            raise ValueError(
+                f"learning_rate_value must be float or int, but got {type(learning_rate_value)}"
+            )
+    if 'regularizer' in ParamAttr_config:
+        regularizer_cfg = ParamAttr_config.get('regularizer')
+        if 'name' in regularizer_cfg:
+            # L1Decay or L2Decay
+            regularizer_name = regularizer_cfg.pop('name')
+            ParamAttr_dict['regularizer'] = getattr(
+                paddle.regularizer, regularizer_name)(**regularizer_cfg)
+        else:
+            raise ValueError(f"'name' must specified in regularizer_cfg")
+    return paddle.ParamAttr(**ParamAttr_dict)
--- a/ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml
+++ b/ppcls/configs/ImageNet/MobileViT/MobileViT_S.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 1
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 300
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 256, 256]
+  save_inference_dir: ./inference
+  use_dali: False
+# model architecture
+Arch:
+  name: MobileViT_S
+  class_num: 1000
+ 
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 0.01
+  no_weight_decay_name: .bias norm
+  one_dim_param_no_weight_decay: True
+  lr:
+    # for 8 cards
+    name: Cosine
+    learning_rate: 0.002
+    eta_min: 0.0002
+    warmup_epoch: 5
+    warmup_start_lr: 0.0002
+
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - RandCropImage:
+            size: 256
+            interpolation: bilinear
+            backend: pil
+        - RandFlipImage:
+            flip_code: 1
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+            order: ''
+
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 128
+      drop_last: False
+      shuffle: True
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+  Eval:
+    dataset: 
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: False
+            channel_first: False
+        - ResizeImage:
+            resize_short: 292
+            interpolation: bilinear
+            backend: pil
+        - CropImage:
+            size: 256
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+
+Infer:
+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+    - ResizeImage:
+        resize_short: 292
+    - CropImage:
+        size: 256
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.0, 0.0, 0.0]
+        std: [1.0, 1.0, 1.0]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: Topk
+    topk: 5
+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
+
+Metric:
+  Train:
+    - TopkAcc:
+        topk: [1, 5]
+  Eval:
+    - TopkAcc:
+        topk: [1, 5]
--- a/ppcls/configs/ImageNet/MobileViT/MobileViT_XS.yaml
+++ b/ppcls/configs/ImageNet/MobileViT/MobileViT_XS.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 1
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 300
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 256, 256]
+  save_inference_dir: ./inference
+  use_dali: False
+# model architecture
+Arch:
+  name: MobileViT_XS
+  class_num: 1000
+ 
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 0.01
+  no_weight_decay_name: .bias norm
+  one_dim_param_no_weight_decay: True
+  lr:
+    # for 8 cards
+    name: Cosine
+    learning_rate: 0.002
+    eta_min: 0.0002
+    warmup_epoch: 5
+    warmup_start_lr: 0.0002
+
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - RandCropImage:
+            size: 256
+            interpolation: bilinear
+            backend: pil
+        - RandFlipImage:
+            flip_code: 1
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+            order: ''
+
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 128
+      drop_last: False
+      shuffle: True
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+  Eval:
+    dataset: 
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: False
+            channel_first: False
+        - ResizeImage:
+            resize_short: 292
+            interpolation: bilinear
+            backend: pil
+        - CropImage:
+            size: 256
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+
+Infer:
+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+    - ResizeImage:
+        resize_short: 292
+    - CropImage:
+        size: 256
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.0, 0.0, 0.0]
+        std: [1.0, 1.0, 1.0]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: Topk
+    topk: 5
+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
+
+Metric:
+  Train:
+    - TopkAcc:
+        topk: [1, 5]
+  Eval:
+    - TopkAcc:
+        topk: [1, 5]
--- a/ppcls/configs/ImageNet/MobileViT/MobileViT_XXS.yaml
+++ b/ppcls/configs/ImageNet/MobileViT/MobileViT_XXS.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 1
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 300
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 256, 256]
+  save_inference_dir: ./inference
+  use_dali: False
+# model architecture
+Arch:
+  name: MobileViT_XXS
+  class_num: 1000
+ 
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 0.01
+  no_weight_decay_name: .bias norm
+  one_dim_param_no_weight_decay: True
+  lr:
+    # for 8 cards
+    name: Cosine
+    learning_rate: 0.002
+    eta_min: 0.0002
+    warmup_epoch: 5
+    warmup_start_lr: 0.0002
+
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - RandCropImage:
+            size: 256
+            interpolation: bilinear
+            backend: pil
+        - RandFlipImage:
+            flip_code: 1
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+            order: ''
+
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 128
+      drop_last: False
+      shuffle: True
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+
+  Eval:
+    dataset: 
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: False
+            channel_first: False
+        - ResizeImage:
+            resize_short: 292
+            interpolation: bilinear
+            backend: pil
+        - CropImage:
+            size: 256
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.0, 0.0, 0.0]
+            std: [1.0, 1.0, 1.0]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 64
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+
+Infer:
+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg 
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+    - ResizeImage:
+        resize_short: 292
+    - CropImage:
+        size: 256
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.0, 0.0, 0.0]
+        std: [1.0, 1.0, 1.0]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: Topk
+    topk: 5
+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
+
+Metric:
+  Train:
+    - TopkAcc:
+        topk: [1, 5]
+  Eval:
+    - TopkAcc:
+        topk: [1, 5]
--- a/ppcls/configs/ImageNet/SENet/SE_ResNeXt101_32x4d_amp_O2_ultra.yaml
+++ b/ppcls/configs/ImageNet/SENet/SE_ResNeXt101_32x4d_amp_O2_ultra.yaml
@@ -34,10 +34,10 @@ Loss:

 # mixed precision training
 AMP:
-    scale_loss: 128.0
-    use_dynamic_loss_scaling: True
-    # O2: pure fp16
-    level: O2
+  scale_loss: 128.0
+  use_dynamic_loss_scaling: True
+  # O2: pure fp16
+  level: O2

 Optimizer:
  name: Momentum

--- a/ppcls/configs/ImageNet/VAN/VAN_tiny.yaml
+++ b/ppcls/configs/ImageNet/VAN/VAN_tiny.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: ./output/
+  device: gpu
+  save_interval: 1
+  eval_during_train: True
+  eval_interval: 1
+  epochs: 300
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 224, 224]
+  save_inference_dir: ./inference
+  # training model under @to_static
+  to_static: False
+
+# model architecture
+Arch:
+  name: VAN_tiny
+  class_num: 1000
+  drop_path_rate: 0.1
+  drop_rate: 0.0
+ 
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+Optimizer:
+  name: AdamW
+  beta1: 0.9
+  beta2: 0.999
+  epsilon: 1e-8
+  weight_decay: 0.05
+  one_dim_param_no_weight_decay: True
+  lr:
+    name: Cosine
+    learning_rate: 1e-3
+    eta_min: 1e-6
+    warmup_epoch: 5
+    warmup_start_lr: 1e-6
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/train_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - RandCropImage:
+            size: 224
+            interpolation: random
+            backend: pil
+        - RandFlipImage:
+            flip_code: 1
+        - TimmAutoAugment:
+            config_str: rand-m9-mstd0.5-inc1
+            interpolation: random
+            img_size: 224
+            mean: [0.5, 0.5, 0.5]
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.5, 0.5, 0.5]
+            std: [0.5, 0.5, 0.5]
+            order: ''
+        - RandomErasing:
+            EPSILON: 0.25
+            sl: 0.02
+            sh: 1.0/3.0
+            r1: 0.3
+            attempt: 10
+            use_log_aspect: True
+            mode: pixel
+      batch_transform_ops:
+        - OpSampler:
+            MixupOperator:
+              alpha: 0.8
+              prob: 0.5
+            CutmixOperator:
+              alpha: 1.0
+              prob: 0.5
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 256
+      drop_last: True
+      shuffle: True
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+
+  Eval:
+    dataset: 
+      name: ImageNetDataset
+      image_root: ./dataset/ILSVRC2012/
+      cls_label_path: ./dataset/ILSVRC2012/val_list.txt
+      transform_ops:
+        - DecodeImage:
+            to_rgb: True
+            channel_first: False
+        - ResizeImage:
+            resize_short: 248
+            interpolation: bicubic
+            backend: pil
+        - CropImage:
+            size: 224
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.5, 0.5, 0.5]
+            std: [0.5, 0.5, 0.5]
+            order: ''
+    sampler:
+      name: DistributedBatchSampler
+      batch_size: 256
+      drop_last: False
+      shuffle: False
+    loader:
+      num_workers: 4
+      use_shared_memory: True
+
+Infer:
+  infer_imgs: docs/images/inference_deployment/whl_demo.jpg
+  batch_size: 10
+  transforms:
+    - DecodeImage:
+        to_rgb: True
+        channel_first: False
+    - ResizeImage:
+        resize_short: 248
+        interpolation: bicubic
+        backend: pil
+    - CropImage:
+        size: 224
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.5, 0.5, 0.5]
+        std: [0.5, 0.5, 0.5]
+        order: ''
+    - ToCHWImage:
+  PostProcess:
+    name: Topk
+    topk: 5
+    class_id_map_file: ppcls/utils/imagenet1k_label_list.txt
+
+Metric:
+  Eval:
+    - TopkAcc:
+        topk: [1, 5]
--- a/ppcls/configs/PersonReID/ResNet50_strong_baseline_market1501.yaml
+++ b/ppcls/configs/PersonReID/ResNet50_strong_baseline_market1501.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  output_dir: "./output/"
+  device: "gpu"
+  save_interval: 40
+  eval_during_train: True
+  eval_interval: 10
+  epochs: 120
+  print_batch_step: 20
+  use_visualdl: False
+  warmup_by_epoch: True
+  eval_mode: "retrieval"
+  re_ranking: True
+  # used for static mode and model export
+  image_shape: [3, 256, 128]
+  save_inference_dir: "./inference"
+
+# model architecture
+Arch:
+  name: "RecModel"
+  infer_output_key: "features"
+  infer_add_softmax: False
+  Backbone:
+    name: "ResNet50_last_stage_stride1"
+    pretrained: True
+    stem_act: null
+  BackboneStopLayer:
+    name: "flatten"
+  Neck:
+    name: BNNeck
+    num_features: &feat_dim 2048
+  Head:
+    name: "FC"
+    embedding_size: *feat_dim
+    class_num: &class_num 751
+    weight_attr:
+      initializer:
+        name: Normal
+        std: 0.001
+    bias_attr: False
+
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+        epsilon: 0.1
+    - TripletLossV3:
+        weight: 1.0
+        margin: 0.3
+        normalize_feature: false
+    - CenterLoss:
+        weight: 0.0005
+        num_classes: *class_num
+        feat_dim: *feat_dim
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+Optimizer:
+  - Adam:
+      scope: model
+      lr:
+        name: Piecewise
+        decay_epochs: [30, 60]
+        values: [0.00035, 0.000035, 0.0000035]
+        warmup_epoch: 10
+        warmup_start_lr: 0.0000035
+        warmup_by_epoch: True
+      regularizer:
+        name: 'L2'
+        coeff: 0.0005
+  - SGD:
+      scope: CenterLoss
+      lr:
+        name: Constant
+        learning_rate: 1000.0
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+        name: "Market1501"
+        image_root: "./dataset/Market-1501-v15.09.15"
+        cls_label_path: "bounding_box_train"
+        transform_ops:
+          - DecodeImage:
+              to_rgb: True
+              channel_first: False
+          - ResizeImage:
+              size: [128, 256]
+          - RandFlipImage:
+              flip_code: 1
+          - Pad:
+              padding: 10
+          - RandCropImage:
+              size: [128, 256]
+              scale: [ 0.8022, 0.8022 ]
+              ratio: [ 0.5, 0.5 ]
+          - NormalizeImage:
+              scale: 0.00392157
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+              order: ''
+          - RandomErasing:
+              EPSILON: 0.5
+              sl: 0.02
+              sh: 0.4
+              r1: 0.3
+              mean: [0.4914, 0.4822, 0.4465]
+    sampler:
+        name: DistributedRandomIdentitySampler
+        batch_size: 64
+        num_instances: 4
+        drop_last: True
+        shuffle: True
+    loader:
+        num_workers: 4
+        use_shared_memory: True
+  Eval:
+    Query:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/Market-1501-v15.09.15"
+        cls_label_path: "query"
+        transform_ops:
+          - DecodeImage:
+              to_rgb: True
+              channel_first: False
+          - ResizeImage:
+              size: [128, 256]
+          - NormalizeImage:
+              scale: 0.00392157
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+              order: ''
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+    Gallery:
+      dataset:
+        name: "Market1501"
+        image_root: "./dataset/Market-1501-v15.09.15"
+        cls_label_path: "bounding_box_test"
+        transform_ops:
+          - DecodeImage:
+              to_rgb: True
+              channel_first: False
+          - ResizeImage:
+              size: [128, 256]
+          - NormalizeImage:
+              scale: 0.00392157
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+              order: ''
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 4
+        use_shared_memory: True
+
+Metric:
+  Eval:
+    - Recallk:
+        topk: [1, 5]
+    - mAP: {}
--- a/ppcls/configs/ResNet50_UReID_infer.yaml
+++ b/ppcls/configs/ResNet50_UReID_infer.yaml
+# global configs
+Global:
+  checkpoints: null
+  pretrained_model: null
+  # pretrained_model: "./pd_model_trace/ISE/ISE_M_model" # pretrained ISE model for Market1501
+  # pretrained_model: "./pd_model_trace/ISE/ISE_MS_model" # pretrained ISE model for MSMT17
+  output_dir: "./output/"
+  device: "gpu"
+  save_interval: 10
+  eval_during_train: True
+  eval_interval: 10
+  epochs: 120
+  print_batch_step: 10
+  use_visualdl: False
+  # used for static mode and model export
+  image_shape: [3, 128, 256]
+  save_inference_dir: "./inference"
+  eval_mode: "retrieval"
+
+# model architecture
+Arch:
+  name: "RecModel"
+  infer_output_key: "features"
+  infer_add_softmax: False
+  Backbone: 
+    name: "ResNet50_last_stage_stride1"
+    pretrained: True
+    stem_act: null
+  BackboneStopLayer:
+    name: "avg_pool"
+  Neck:
+    name: "BNNeck"
+    num_features: 2048
+  Head:
+    name: "FC"  
+    embedding_size: 2048
+    class_num: 751
+ 
+# loss function config for traing/eval process
+Loss:
+  Train:
+    - CELoss:
+        weight: 1.0
+    - SupConLoss:
+        weight: 1.0
+        views: 2
+  Eval:
+    - CELoss:
+        weight: 1.0
+
+Optimizer:
+  name: Momentum
+  momentum: 0.9
+  lr:
+    name: Cosine
+    learning_rate: 0.04
+  regularizer:
+    name: 'L2'
+    coeff: 0.0005
+
+# data loader for train and eval
+DataLoader:
+  Train:
+    dataset:
+        name: "Market1501" # ["Market1501", "MSMT17"]
+        image_root: "./dataset"
+        cls_label_path: "bounding_box_train"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              interpolation: 'bicubic'
+              backend: 'pil'
+          - RandFlipImage:
+              flip_code: 1
+          - Pad:
+              padding: 10
+              fill: 0
+          - RandomCrop:
+              size: [128, 256]
+              pad_if_needed: False
+          - NormalizeImage:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+              order: ''
+          - RandomErasing:
+              EPSILON: 0.5
+              sl: 0.02
+              sh: 0.4
+              r1: 0.3
+              mean: [0.485, 0.456, 0.406] 
+
+    sampler:
+        name: PKSampler
+        batch_size: 16
+        sample_per_id: 4
+        drop_last: True
+        shuffle: True
+    loader:
+        num_workers: 6
+        use_shared_memory: True
+  Eval:
+    Query:
+      dataset: 
+        name: "Market1501" # ["Market1501", "MSMT17"]
+        image_root: "./dataset"
+        cls_label_path: "query"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              interpolation: 'bicubic'
+              backend: 'pil'
+          - NormalizeImage:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+              order: ''
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 6
+        use_shared_memory: True
+
+    Gallery:
+      dataset: 
+        name: "Market1501" # ["Market1501", "MSMT17"]
+        image_root: "./dataset"
+        cls_label_path: "bounding_box_test"
+        transform_ops:
+          - ResizeImage:
+              size: [128, 256]
+              interpolation: 'bicubic'
+              backend: 'pil'
+          - NormalizeImage:
+              mean: [0.485, 0.456, 0.406]
+              std: [0.229, 0.224, 0.225]
+              order: ''
+      sampler:
+        name: DistributedBatchSampler
+        batch_size: 128
+        drop_last: False
+        shuffle: False
+      loader:
+        num_workers: 6
+        use_shared_memory: True
+
+Metric:
+  Eval:
+    - Recallk:
+        topk: [1, 5]
+    - mAP: {}
+
--- a/ppcls/configs/Pedestrian/strong_baseline_baseline.yaml
+++ b/ppcls/configs/Pedestrian/strong_baseline_baseline.yaml
@@ -2,6 +2,8 @@
 Global:
  checkpoints: null
  pretrained_model: null
+  # pretrained_model: "./pd_model_trace/ISE/ISE_M_model" # pretrained ISE model for Market1501
+  # pretrained_model: "./pd_model_trace/ISE/ISE_MS_model" # pretrained ISE model for MSMT17
  output_dir: "./output/"
  device: "gpu"
  save_interval: 10
@@ -11,7 +13,7 @@ Global:
  print_batch_step: 10
  use_visualdl: False
  # used for static mode and model export
-  image_shape: [3, 256, 128]
+  image_shape: [3, 128, 256]
  save_inference_dir: "./inference"
  eval_mode: "retrieval"

@@ -20,17 +22,16 @@ Arch:
  name: "RecModel"
  infer_output_key: "features"
  infer_add_softmax: False
-  Backbone:
-    name: "ResNet50"
+  Backbone: 
+    name: "ResNet50_last_stage_stride1"
    pretrained: True
    stem_act: null
  BackboneStopLayer:
    name: "flatten"
  Head:
-    name: "FC"
+    name: "FC"  
    embedding_size: 2048
    class_num: 751
-
 # loss function config for traing/eval process
 Loss:
  Train:
@@ -46,7 +47,8 @@ Loss:
        weight: 1.0

 Optimizer:
-  name: Adam
+  name: Momentum
+  momentum: 0.9
  lr:
    name: Piecewise
    decay_epochs: [30, 60]
@@ -65,9 +67,6 @@ DataLoader:
        image_root: "./dataset/market1501"
        cls_label_path: "./dataset/market1501/bounding_box_train.txt"
        transform_ops:
-          - DecodeImage:
-              to_rgb: True
-              channel_first: False
          - ResizeImage:
              size: [128, 256]
          - RandFlipImage:
@@ -79,7 +78,6 @@ DataLoader:
              scale: [0.8022, 0.8022]
              ratio: [0.5, 0.5]
          - NormalizeImage:
-              scale: 0.00392157
              mean: [0.485, 0.456, 0.406]
              std: [0.229, 0.224, 0.225]
              order: ''
@@ -99,13 +97,9 @@ DataLoader:
        image_root: "./dataset/market1501"
        cls_label_path: "./dataset/market1501/query.txt"
        transform_ops:
-          - DecodeImage:
-              to_rgb: True
-              channel_first: False
          - ResizeImage:
              size: [128, 256]
          - NormalizeImage:
-              scale: 0.00392157
              mean: [0.485, 0.456, 0.406]
              std: [0.229, 0.224, 0.225]
              order: ''
@@ -124,13 +118,9 @@ DataLoader:
        image_root: "./dataset/market1501"
        cls_label_path: "./dataset/market1501/bounding_box_test.txt"
        transform_ops:
-          - DecodeImage:
-              to_rgb: True
-              channel_first: False
          - ResizeImage:
              size: [128, 256]
          - NormalizeImage:
-              scale: 0.00392157
              mean: [0.485, 0.456, 0.406]
              std: [0.229, 0.224, 0.225]
              order: ''

--- a/ppcls/configs/Pedestrian/strong_baseline_m1.yaml
+++ b/ppcls/configs/Pedestrian/strong_baseline_m1.yaml
--- a/ppcls/configs/Pedestrian/strong_baseline_m1_centerloss.yaml
+++ b/ppcls/configs/Pedestrian/strong_baseline_m1_centerloss.yaml
--- a/ppcls/data/__init__.py
+++ b/ppcls/data/__init__.py
@@ -28,6 +28,7 @@ from ppcls.data.dataloader.vehicle_dataset import CompCars, VeriWild
 from ppcls.data.dataloader.logo_dataset import LogoDataset
 from ppcls.data.dataloader.icartoon_dataset import ICartoonDataset
 from ppcls.data.dataloader.mix_dataset import MixDataset
+from ppcls.data.dataloader.person_dataset import Market1501, MSMT17

 # sampler
 from ppcls.data.dataloader.DistributedRandomIdentitySampler import DistributedRandomIdentitySampler

--- a/ppcls/data/dataloader/__init__.py
+++ b/ppcls/data/dataloader/__init__.py
@@ -7,3 +7,4 @@ from ppcls.data.dataloader.icartoon_dataset import ICartoonDataset
 from ppcls.data.dataloader.mix_dataset import MixDataset
 from ppcls.data.dataloader.mix_sampler import MixSampler
 from ppcls.data.dataloader.pk_sampler import PKSampler
+from ppcls.data.dataloader.person_dataset import Market1501, MSMT17
--- a/ppcls/data/dataloader/person_dataset.py
+++ b/ppcls/data/dataloader/person_dataset.py
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import numpy as np
+import paddle
+from paddle.io import Dataset
+import os
+import cv2
+
+from ppcls.data import preprocess
+from ppcls.data.preprocess import transform
+from ppcls.utils import logger
+from .common_dataset import create_operators
+import os.path as osp
+import glob
+import re
+from PIL import Image
+
+
+class Market1501(Dataset):
+    """
+    Market1501
+    Reference:
+    Zheng et al. Scalable Person Re-identification: A Benchmark. ICCV 2015.
+    URL: http://www.liangzheng.org/Project/project_reid.html
+
+    Dataset statistics:
+    # identities: 1501 (+1 for background)
+    # images: 12936 (train) + 3368 (query) + 15913 (gallery)
+    """
+    _dataset_dir = 'market1501/Market-1501-v15.09.15'
+
+    def __init__(self, image_root, cls_label_path, transform_ops=None):
+        self._img_root = image_root
+        self._cls_path = cls_label_path  # the sub folder in the dataset
+        self._dataset_dir = osp.join(image_root, self._dataset_dir,
+                                     self._cls_path)
+        self._check_before_run()
+        if transform_ops:
+            self._transform_ops = create_operators(transform_ops)
+        self._dtype = paddle.get_default_dtype()
+        self._load_anno(relabel=True if 'train' in self._cls_path else False)
+
+    def _check_before_run(self):
+        """Check if the file is available before going deeper"""
+        if not osp.exists(self._dataset_dir):
+            raise RuntimeError("'{}' is not available".format(
+                self._dataset_dir))
+
+    def _load_anno(self, relabel=False):
+        img_paths = glob.glob(osp.join(self._dataset_dir, '*.jpg'))
+        pattern = re.compile(r'([-\d]+)_c(\d)')
+
+        self.images = []
+        self.labels = []
+        self.cameras = []
+        pid_container = set()
+
+        for img_path in sorted(img_paths):
+            pid, _ = map(int, pattern.search(img_path).groups())
+            if pid == -1: continue  # junk images are just ignored
+            pid_container.add(pid)
+        pid2label = {pid: label for label, pid in enumerate(pid_container)}
+
+        for img_path in sorted(img_paths):
+            pid, camid = map(int, pattern.search(img_path).groups())
+            if pid == -1: continue  # junk images are just ignored
+            assert 0 <= pid <= 1501  # pid == 0 means background
+            assert 1 <= camid <= 6
+            camid -= 1  # index starts from 0
+            if relabel: pid = pid2label[pid]
+            self.images.append(img_path)
+            self.labels.append(pid)
+            self.cameras.append(camid)
+
+        self.num_pids, self.num_imgs, self.num_cams = get_imagedata_info(
+            self.images, self.labels, self.cameras, subfolder=self._cls_path)
+
+    def __getitem__(self, idx):
+        try:
+            img = Image.open(self.images[idx]).convert('RGB')
+            img = np.array(img, dtype="float32").astype(np.uint8)
+            if self._transform_ops:
+                img = transform(img, self._transform_ops)
+            img = img.transpose((2, 0, 1))
+            return (img, self.labels[idx], self.cameras[idx])
+        except Exception as ex:
+            logger.error("Exception occured when parse line: {} with msg: {}".
+                         format(self.images[idx], ex))
+            rnd_idx = np.random.randint(self.__len__())
+            return self.__getitem__(rnd_idx)
+
+    def __len__(self):
+        return len(self.images)
+
+    @property
+    def class_num(self):
+        return len(set(self.labels))
+
+
+class MSMT17(Dataset):
+    """
+    MSMT17
+
+    Reference:
+    Wei et al. Person Transfer GAN to Bridge Domain Gap for Person Re-Identification. CVPR 2018.
+
+    URL: http://www.pkuvmc.com/publications/msmt17.html
+
+    Dataset statistics:
+    # identities: 4101
+    # images: 32621 (train) + 11659 (query) + 82161 (gallery)
+    # cameras: 15
+    """
+    _dataset_dir = 'msmt17/MSMT17_V1'
+
+    def __init__(self, image_root, cls_label_path, transform_ops=None):
+        self._img_root = image_root
+        self._cls_path = cls_label_path  # the sub folder in the dataset
+        self._dataset_dir = osp.join(image_root, self._dataset_dir,
+                                     self._cls_path)
+        self._check_before_run()
+        if transform_ops:
+            self._transform_ops = create_operators(transform_ops)
+        self._dtype = paddle.get_default_dtype()
+        self._load_anno(relabel=True if 'train' in self._cls_path else False)
+
+    def _check_before_run(self):
+        """Check if the file is available before going deeper"""
+        if not osp.exists(self._dataset_dir):
+            raise RuntimeError("'{}' is not available".format(
+                self._dataset_dir))
+
+    def _load_anno(self, relabel=False):
+        img_paths = glob.glob(osp.join(self._dataset_dir, '*.jpg'))
+        pattern = re.compile(r'([-\d]+)_c(\d+)')
+
+        self.images = []
+        self.labels = []
+        self.cameras = []
+        pid_container = set()
+
+        for img_path in img_paths:
+            pid, _ = map(int, pattern.search(img_path).groups())
+            if pid == -1:
+                continue  # junk images are just ignored
+            pid_container.add(pid)
+        pid2label = {pid: label for label, pid in enumerate(pid_container)}
+
+        for img_path in img_paths:
+            pid, camid = map(int, pattern.search(img_path).groups())
+            if pid == -1:
+                continue  # junk images are just ignored
+            assert 1 <= camid <= 15
+            camid -= 1  # index starts from 0
+            if relabel:
+                pid = pid2label[pid]
+            self.images.append(img_path)
+            self.labels.append(pid)
+            self.cameras.append(camid)
+
+        self.num_pids, self.num_imgs, self.num_cams = get_imagedata_info(
+            self.images, self.labels, self.cameras, subfolder=self._cls_path)
+
+    def __getitem__(self, idx):
+        try:
+            img = Image.open(self.images[idx]).convert('RGB')
+            img = np.array(img, dtype="float32").astype(np.uint8)
+            if self._transform_ops:
+                img = transform(img, self._transform_ops)
+            img = img.transpose((2, 0, 1))
+            return (img, self.labels[idx], self.cameras[idx])
+        except Exception as ex:
+            logger.error("Exception occured when parse line: {} with msg: {}".
+                         format(self.images[idx], ex))
+            rnd_idx = np.random.randint(self.__len__())
+            return self.__getitem__(rnd_idx)
+
+    def __len__(self):
+        return len(self.images)
+
+    @property
+    def class_num(self):
+        return len(set(self.labels))
+
+
+def get_imagedata_info(data, labels, cameras, subfolder='train'):
+    pids, cams = [], []
+    for _, pid, camid in zip(data, labels, cameras):
+        pids += [pid]
+        cams += [camid]
+    pids = set(pids)
+    cams = set(cams)
+    num_pids = len(pids)
+    num_cams = len(cams)
+    num_imgs = len(data)
+    print("Dataset statistics:")
+    print("  ----------------------------------------")
+    print("  subset   | # ids | # images | # cameras")
+    print("  ----------------------------------------")
+    print("  {}    | {:5d} | {:8d} | {:9d}".format(subfolder, num_pids,
+                                                   num_imgs, num_cams))
+    print("  ----------------------------------------")
+    return num_pids, num_imgs, num_cams
--- a/ppcls/data/preprocess/ops/operators.py
+++ b/ppcls/data/preprocess/ops/operators.py
@@ -39,7 +39,8 @@ class UnifiedResize(object):
            'bilinear': cv2.INTER_LINEAR,
            'area': cv2.INTER_AREA,
            'bicubic': cv2.INTER_CUBIC,
-            'lanczos': cv2.INTER_LANCZOS4
+            'lanczos': cv2.INTER_LANCZOS4,
+            'random': (cv2.INTER_LINEAR, cv2.INTER_CUBIC)
        }
        _pil_interp_from_str = {
            'nearest': Image.NEAREST,
@@ -47,10 +48,18 @@ class UnifiedResize(object):
            'bicubic': Image.BICUBIC,
            'box': Image.BOX,
            'lanczos': Image.LANCZOS,
-            'hamming': Image.HAMMING
+            'hamming': Image.HAMMING,
+            'random': (Image.BILINEAR, Image.BICUBIC)
        }

+        def _cv2_resize(src, size, resample):
+            if isinstance(resample, tuple):
+                resample = random.choice(resample)
+            return cv2.resize(src, size, interpolation=resample)
+
        def _pil_resize(src, size, resample):
+            if isinstance(resample, tuple):
+                resample = random.choice(resample)
            pil_img = Image.fromarray(src)
            pil_img = pil_img.resize(size, resample)
            return np.asarray(pil_img)
@@ -61,7 +70,7 @@ class UnifiedResize(object):
            # compatible with opencv < version 4.4.0
            elif interpolation is None:
                interpolation = cv2.INTER_LINEAR
-            self.resize_func = partial(cv2.resize, interpolation=interpolation)
+            self.resize_func = partial(_cv2_resize, resample=interpolation)
        elif backend.lower() == "pil":
            if isinstance(interpolation, str):
                interpolation = _pil_interp_from_str[interpolation.lower()]
@@ -93,14 +102,15 @@ class DecodeImage(object):
        self.channel_first = channel_first  # only enabled when to_np is True

    def __call__(self, img):
-        if six.PY2:
-            assert type(img) is str and len(
-                img) > 0, "invalid input 'img' in DecodeImage"
-        else:
-            assert type(img) is bytes and len(
-                img) > 0, "invalid input 'img' in DecodeImage"
-        data = np.frombuffer(img, dtype='uint8')
-        img = cv2.imdecode(data, 1)
+        if not isinstance(img, np.ndarray):
+            if six.PY2:
+                assert type(img) is str and len(
+                    img) > 0, "invalid input 'img' in DecodeImage"
+            else:
+                assert type(img) is bytes and len(
+                    img) > 0, "invalid input 'img' in DecodeImage"
+            data = np.frombuffer(img, dtype='uint8')
+            img = cv2.imdecode(data, 1)
        if self.to_rgb:
            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (
                img.shape)

--- a/ppcls/engine/engine.py
+++ b/ppcls/engine/engine.py
@@ -214,21 +214,19 @@ class Engine(object):
        if self.config["Global"]["pretrained_model"] is not None:
            if self.config["Global"]["pretrained_model"].startswith("http"):
                load_dygraph_pretrain_from_url(
-                    self.model, self.config["Global"]["pretrained_model"])
+                    [self.model, getattr(self, 'train_loss_func', None)],
+                    self.config["Global"]["pretrained_model"])
            else:
                load_dygraph_pretrain(
-                    self.model, self.config["Global"]["pretrained_model"])
+                    [self.model, getattr(self, 'train_loss_func', None)],
+                    self.config["Global"]["pretrained_model"])

        # build optimizer
        if self.mode == 'train':
            self.optimizer, self.lr_sch = build_optimizer(
-                self.config["Optimizer"], self.config["Global"]["epochs"],
-                len(self.train_dataloader), [
-                    self.model, * [
-                        m for m in self.train_loss_func.loss_func
-                        if len(m.parameters()) > 0
-                    ]
-                ])
+                self.config, self.config["Global"]["epochs"],
+                len(self.train_dataloader),
+                [self.model, self.train_loss_func])

        # for amp training
        if self.amp:
@@ -246,6 +244,11 @@ class Engine(object):
                optimizers=self.optimizer,
                level=amp_level,
                save_dtype='float32')
+            if len(self.train_loss_func.parameters()) > 0:
+                self.train_loss_func = paddle.amp.decorate(
+                    models=self.train_loss_func,
+                    level=amp_level,
+                    save_dtype='float32')

        # for distributed
        world_size = dist.get_world_size()
@@ -256,12 +259,10 @@ class Engine(object):
        if self.config["Global"]["distributed"]:
            dist.init_parallel_env()
            self.model = paddle.DataParallel(self.model)
-            # NOTE: parallelize loss which has parameters, such as CenterLoss
-            for i in range(len(self.train_loss_func.loss_func)):
-                if len(self.train_loss_func.loss_func[i].parameters()) > 0:
-                    self.train_loss_func.loss_func[i] = paddle.DataParallel(
-                        self.train_loss_func.loss_func[i])
-
+            if self.mode == 'train' and len(self.train_loss_func.parameters(
+            )) > 0:
+                self.train_loss_func = paddle.DataParallel(
+                    self.train_loss_func)
        # build postprocess for infer
        if self.mode == 'infer':
            self.preprocess_func = create_operators(self.config["Infer"][
@@ -289,20 +290,32 @@ class Engine(object):
        # global iter counter
        self.global_step = 0

-        if self.config["Global"]["checkpoints"] is not None:
-            metric_info = init_model(self.config["Global"], self.model,
-                                     self.optimizer)
+        if self.config.Global.checkpoints is not None:
+            metric_info = init_model(self.config.Global, self.model,
+                                     self.optimizer, self.train_loss_func)
            if metric_info is not None:
                best_metric.update(metric_info)

        self.max_iter = len(self.train_dataloader) - 1 if platform.system(
        ) == "Windows" else len(self.train_dataloader)
+
+        if self.config["Global"].get("warmup_by_epoch", False):
+            for i in range(len(self.lr_sch)):
+                self.lr_sch[i].step()
+            logger.info(
+                "lr_sch step once before first epoch, when Global.warmup_by_epoch=True"
+            )
+
        for epoch_id in range(best_metric["epoch"] + 1,
                              self.config["Global"]["epochs"] + 1):
            acc = 0.0
            # for one epoch train
            self.train_epoch_func(self, epoch_id, print_batch_step)

+            if self.config["Global"].get("warmup_by_epoch", False):
+                for i in range(len(self.lr_sch)):
+                    self.lr_sch[i].step()
+
            if self.use_dali:
                self.train_dataloader.reset()
            metric_msg = ", ".join([
@@ -327,7 +340,8 @@ class Engine(object):
                        best_metric,
                        self.output_dir,
                        model_name=self.config["Arch"]["name"],
-                        prefix="best_model")
+                        prefix="best_model",
+                        loss=self.train_loss_func)
                logger.info("[Eval][Epoch {}][best metric: {}]".format(
                    epoch_id, best_metric["metric"]))
                logger.scaler(
@@ -346,7 +360,8 @@ class Engine(object):
                                     "epoch": epoch_id},
                    self.output_dir,
                    model_name=self.config["Arch"]["name"],
-                    prefix="epoch_{}".format(epoch_id))
+                    prefix="epoch_{}".format(epoch_id),
+                    loss=self.train_loss_func)
            # save the latest model
            save_load.save_model(
                self.model,
@@ -354,7 +369,8 @@ class Engine(object):
                                 "epoch": epoch_id},
                self.output_dir,
                model_name=self.config["Arch"]["name"],
-                prefix="latest")
+                prefix="latest",
+                loss=self.train_loss_func)

        if self.vdl_writer is not None:
            self.vdl_writer.close()

--- a/ppcls/engine/evaluation/classification.py
+++ b/ppcls/engine/evaluation/classification.py
@@ -53,13 +53,20 @@ def classification_eval(engine, epoch_id=0):
            ]
        time_info["reader_cost"].update(time.time() - tic)
        batch_size = batch[0].shape[0]
-        batch[0] = paddle.to_tensor(batch[0]).astype("float32")
+        batch[0] = paddle.to_tensor(batch[0])
        if not engine.config["Global"].get("use_multilabel", False):
            batch[1] = batch[1].reshape([-1, 1]).astype("int64")

        # image input
-        if engine.amp:
+        if engine.amp and (
+                engine.config['AMP'].get("level", "O1").upper() == "O2" or
+                engine.config["AMP"].get("use_fp16_test", False)):
            amp_level = engine.config['AMP'].get("level", "O1").upper()
+
+            if amp_level == "O2":
+                msg = "Only support FP16 evaluation when AMP O2 is enabled."
+                logger.warning(msg)
+
            with paddle.amp.auto_cast(
                    custom_black_list={
                        "flatten_contiguous_range", "greater_than"

--- a/ppcls/engine/evaluation/retrieval.py
+++ b/ppcls/engine/evaluation/retrieval.py
@@ -16,6 +16,8 @@ from __future__ import division
 from __future__ import print_function

 import platform
+
+import numpy as np
 import paddle
 from ppcls.utils import logger

@@ -49,34 +51,51 @@ def retrieval_eval(engine, epoch_id=0):
        metric_dict = {metric_key: 0.}
    else:
        metric_dict = dict()
-        for block_idx, block_fea in enumerate(fea_blocks):
-            similarity_matrix = paddle.matmul(
-                block_fea, gallery_feas, transpose_y=True)
-            if query_query_id is not None:
-                query_id_block = query_id_blocks[block_idx]
-                query_id_mask = (query_id_block != gallery_unique_id.t())
-
-                image_id_block = image_id_blocks[block_idx]
-                image_id_mask = (image_id_block != gallery_img_id.t())
-
-                keep_mask = paddle.logical_or(query_id_mask, image_id_mask)
-                similarity_matrix = similarity_matrix * keep_mask.astype(
-                    "float32")
-            else:
-                keep_mask = None
-
-            metric_tmp = engine.eval_metric_func(similarity_matrix,
-                                                 image_id_blocks[block_idx],
-                                                 gallery_img_id, keep_mask)
+        reranking_flag = engine.config['Global'].get('re_ranking', False)
+        logger.info(f"re_ranking={reranking_flag}")
+        if not reranking_flag:
+            for block_idx, block_fea in enumerate(fea_blocks):
+                similarity_matrix = paddle.matmul(
+                    block_fea, gallery_feas, transpose_y=True)
+                if query_query_id is not None:
+                    query_id_block = query_id_blocks[block_idx]
+                    query_id_mask = (query_id_block != gallery_unique_id.t())

-            for key in metric_tmp:
-                if key not in metric_dict:
-                    metric_dict[key] = metric_tmp[key] * block_fea.shape[
-                        0] / len(query_feas)
+                    image_id_block = image_id_blocks[block_idx]
+                    image_id_mask = (image_id_block != gallery_img_id.t())
+
+                    keep_mask = paddle.logical_or(query_id_mask, image_id_mask)
+                    similarity_matrix = similarity_matrix * keep_mask.astype(
+                        "float32")
                else:
-                    metric_dict[key] += metric_tmp[key] * block_fea.shape[
-                        0] / len(query_feas)
+                    keep_mask = None
+
+                metric_tmp = engine.eval_metric_func(
+                    similarity_matrix, image_id_blocks[block_idx],
+                    gallery_img_id, keep_mask)

+                for key in metric_tmp:
+                    if key not in metric_dict:
+                        metric_dict[key] = metric_tmp[key] * block_fea.shape[
+                            0] / len(query_feas)
+                    else:
+                        metric_dict[key] += metric_tmp[key] * block_fea.shape[
+                            0] / len(query_feas)
+        else:
+            distmat = re_ranking(
+                query_feas, gallery_feas, k1=20, k2=6, lambda_value=0.3)
+            cmc, mAP = eval_func(distmat,
+                                 np.squeeze(query_img_id.numpy()),
+                                 np.squeeze(gallery_img_id.numpy()),
+                                 np.squeeze(query_query_id.numpy()),
+                                 np.squeeze(gallery_unique_id.numpy()))
+            metric_dict["recall1(RK)"] = cmc[0]
+            metric_dict["recall5(RK)"] = cmc[4]
+            metric_dict["mAP(RK)"] = mAP
+
+            for key in metric_tmp:
+                metric_dict[key] = metric_tmp[key] * block_fea.shape[0] / len(
+                    query_feas)
    metric_info_list = []
    for key in metric_dict:
        if metric_key is None:
@@ -88,6 +107,159 @@ def retrieval_eval(engine, epoch_id=0):
    return metric_dict[metric_key]


+def re_ranking(queFea,
+               galFea,
+               k1=20,
+               k2=6,
+               lambda_value=0.5,
+               local_distmat=None,
+               only_local=False):
+    # if feature vector is numpy, you should use 'paddle.tensor' transform it to tensor
+    query_num = queFea.shape[0]
+    all_num = query_num + galFea.shape[0]
+    if only_local:
+        original_dist = local_distmat
+    else:
+        feat = paddle.concat([queFea, galFea])
+        logger.info('using GPU to compute original distance')
+
+        # L2 distance
+        distmat = paddle.pow(feat, 2).sum(axis=1, keepdim=True).expand([all_num, all_num]) + \
+                  paddle.pow(feat, 2).sum(axis=1, keepdim=True).expand([all_num, all_num]).t()
+        distmat = distmat.addmm(x=feat, y=feat.t(), alpha=-2.0, beta=1.0)
+        # Cosine distance
+        # distmat = paddle.matmul(queFea, galFea, transpose_y=True)
+        # if query_query_id is not None:
+        #     query_id_mask = (queCid != galCid.t())
+        #     image_id_mask = (queId != galId.t())
+        #     keep_mask = paddle.logical_or(query_id_mask, image_id_mask)
+        #     distmat = distmat * keep_mask.astype("float32")
+
+        original_dist = distmat.cpu().numpy()
+        del feat
+        if local_distmat is not None:
+            original_dist = original_dist + local_distmat
+
+    gallery_num = original_dist.shape[0]
+    original_dist = np.transpose(original_dist / np.max(original_dist, axis=0))
+    V = np.zeros_like(original_dist).astype(np.float16)
+    initial_rank = np.argsort(original_dist).astype(np.int32)
+    logger.info('starting re_ranking')
+    for i in range(all_num):
+        # k-reciprocal neighbors
+        forward_k_neigh_index = initial_rank[i, :k1 + 1]
+        backward_k_neigh_index = initial_rank[forward_k_neigh_index, :k1 + 1]
+        fi = np.where(backward_k_neigh_index == i)[0]
+        k_reciprocal_index = forward_k_neigh_index[fi]
+        k_reciprocal_expansion_index = k_reciprocal_index
+        for j in range(len(k_reciprocal_index)):
+            candidate = k_reciprocal_index[j]
+            candidate_forward_k_neigh_index = initial_rank[candidate, :int(
+                np.around(k1 / 2)) + 1]
+            candidate_backward_k_neigh_index = initial_rank[
+                candidate_forward_k_neigh_index, :int(np.around(k1 / 2)) + 1]
+            fi_candidate = np.where(
+                candidate_backward_k_neigh_index == candidate)[0]
+            candidate_k_reciprocal_index = candidate_forward_k_neigh_index[
+                fi_candidate]
+            if len(
+                    np.intersect1d(candidate_k_reciprocal_index,
+                                   k_reciprocal_index)) > 2 / 3 * len(
+                                       candidate_k_reciprocal_index):
+                k_reciprocal_expansion_index = np.append(
+                    k_reciprocal_expansion_index, candidate_k_reciprocal_index)
+
+        k_reciprocal_expansion_index = np.unique(k_reciprocal_expansion_index)
+        weight = np.exp(-original_dist[i, k_reciprocal_expansion_index])
+        V[i, k_reciprocal_expansion_index] = weight / np.sum(weight)
+    original_dist = original_dist[:query_num, ]
+    if k2 != 1:
+        V_qe = np.zeros_like(V, dtype=np.float16)
+        for i in range(all_num):
+            V_qe[i, :] = np.mean(V[initial_rank[i, :k2], :], axis=0)
+        V = V_qe
+        del V_qe
+    del initial_rank
+    invIndex = []
+    for i in range(gallery_num):
+        invIndex.append(np.where(V[:, i] != 0)[0])
+
+    jaccard_dist = np.zeros_like(original_dist, dtype=np.float16)
+    for i in range(query_num):
+        temp_min = np.zeros(shape=[1, gallery_num], dtype=np.float16)
+        indNonZero = np.where(V[i, :] != 0)[0]
+        indImages = [invIndex[ind] for ind in indNonZero]
+        for j in range(len(indNonZero)):
+            temp_min[0, indImages[j]] = temp_min[0, indImages[j]] + np.minimum(
+                V[i, indNonZero[j]], V[indImages[j], indNonZero[j]])
+        jaccard_dist[i] = 1 - temp_min / (2 - temp_min)
+
+    final_dist = jaccard_dist * (1 - lambda_value
+                                 ) + original_dist * lambda_value
+    del original_dist
+    del V
+    del jaccard_dist
+    final_dist = final_dist[:query_num, query_num:]
+    return final_dist
+
+
+def eval_func(distmat, q_pids, g_pids, q_camids, g_camids, max_rank=50):
+    """Evaluation with market1501 metric
+        Key: for each query identity, its gallery images from the same camera view are discarded.
+        """
+    num_q, num_g = distmat.shape
+    if num_g < max_rank:
+        max_rank = num_g
+        print("Note: number of gallery samples is quite small, got {}".format(
+            num_g))
+    indices = np.argsort(distmat, axis=1)
+    matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)
+
+    # compute cmc curve for each query
+    all_cmc = []
+    all_AP = []
+    num_valid_q = 0.  # number of valid query
+    for q_idx in range(num_q):
+        # get query pid and camid
+        q_pid = q_pids[q_idx]
+        q_camid = q_camids[q_idx]
+
+        # remove gallery samples that have the same pid and camid with query
+        order = indices[q_idx]
+        remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
+        keep = np.invert(remove)
+
+        # compute cmc curve
+        # binary vector, positions with value 1 are correct matches
+        orig_cmc = matches[q_idx][keep]
+        if not np.any(orig_cmc):
+            # this condition is true when query identity does not appear in gallery
+            continue
+
+        cmc = orig_cmc.cumsum()
+        cmc[cmc > 1] = 1
+
+        all_cmc.append(cmc[:max_rank])
+        num_valid_q += 1.
+
+        # compute average precision
+        # reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
+        num_rel = orig_cmc.sum()
+        tmp_cmc = orig_cmc.cumsum()
+        tmp_cmc = [x / (i + 1.) for i, x in enumerate(tmp_cmc)]
+        tmp_cmc = np.asarray(tmp_cmc) * orig_cmc
+        AP = tmp_cmc.sum() / num_rel
+        all_AP.append(AP)
+
+    assert num_valid_q > 0, "Error: all query identities do not appear in gallery"
+
+    all_cmc = np.asarray(all_cmc).astype(np.float32)
+    all_cmc = all_cmc.sum(0) / num_valid_q
+    mAP = np.mean(all_AP)
+
+    return all_cmc, mAP
+
+
 def cal_feature(engine, name='gallery'):
    all_feas = None
    all_image_id = None

--- a/ppcls/engine/train/train.py
+++ b/ppcls/engine/train/train.py
@@ -21,7 +21,6 @@ from ppcls.utils import profiler

 def train_epoch(engine, epoch_id, print_batch_step):
    tic = time.time()
-    v_current = [int(i) for i in paddle.__version__.split(".")]
    for iter_id, batch in enumerate(engine.train_dataloader):
        if iter_id >= engine.max_iter:
            break
@@ -58,10 +57,16 @@ def train_epoch(engine, epoch_id, print_batch_step):
        if engine.amp:
            scaled = engine.scaler.scale(loss_dict["loss"])
            scaled.backward()
+            # set BNneck.bias grad to zero
+            engine.model.neck.feat_bn.bias.grad.set_value(
+                paddle.zeros_like(engine.model.neck.feat_bn.bias.grad))
            for i in range(len(engine.optimizer)):
                engine.scaler.minimize(engine.optimizer[i], scaled)
        else:
            loss_dict["loss"].backward()
+            # set BNneck.bias grad to zero
+            engine.model.neck.feat_bn.bias.grad.set_value(
+                paddle.zeros_like(engine.model.neck.feat_bn.bias.grad))
            for i in range(len(engine.optimizer)):
                engine.optimizer[i].step()

@@ -70,8 +75,9 @@ def train_epoch(engine, epoch_id, print_batch_step):
            engine.optimizer[i].clear_grad()

        # step lr
-        for i in range(len(engine.lr_sch)):
-            engine.lr_sch[i].step()
+        if engine.config["Global"].get("warmup_by_epoch", False) is False:
+            for i in range(len(engine.lr_sch)):
+                engine.lr_sch[i].step()

        # below code just for logging
        # update metric_for_logger

--- a/ppcls/engine/train/utils.py
+++ b/ppcls/engine/train/utils.py
@@ -38,12 +38,10 @@ def update_loss(trainer, loss_dict, batch_size):


 def log_info(trainer, batch_size, epoch_id, iter_id):
-    if len(trainer.lr_sch) <= 1:
-        lr_msg = "lr: {:.8f}".format(trainer.lr_sch[0].get_lr())
-    else:
-        lr_msg = "lr_model: {:.8f}".format(trainer.lr_sch[0].get_lr())
-        lr_msg += ", lr_loss: {:.8f}".format(trainer.lr_sch[1].get_lr())
-
+    lr_msg = ", ".join([
+        "lr_{}: {:.8f}".format(i + 1, lr.get_lr())
+        for i, lr in enumerate(trainer.lr_sch)
+    ])
    metric_msg = ", ".join([
        "{}: {:.5f}".format(key, trainer.output_info[key].avg)
        for key in trainer.output_info
@@ -63,21 +61,11 @@ def log_info(trainer, batch_size, epoch_id, iter_id):
        epoch_id, trainer.config["Global"]["epochs"], iter_id,
        len(trainer.train_dataloader), lr_msg, metric_msg, time_msg, ips_msg,
        eta_msg))
-    if len(trainer.lr_sch) <= 1:
-        logger.scaler(
-            name="lr",
-            value=trainer.lr_sch[0].get_lr(),
-            step=trainer.global_step,
-            writer=trainer.vdl_writer)
-    else:
-        logger.scaler(
-            name="lr_model",
-            value=trainer.lr_sch[0].get_lr(),
-            step=trainer.global_step,
-            writer=trainer.vdl_writer)
+
+    for i, lr in enumerate(trainer.lr_sch):
        logger.scaler(
-            name="lr_loss",
-            value=trainer.lr_sch[1].get_lr(),
+            name="lr_{}".format(i + 1),
+            value=lr.get_lr(),
            step=trainer.global_step,
            writer=trainer.vdl_writer)
    for key in trainer.output_info:

--- a/ppcls/loss/__init__.py
+++ b/ppcls/loss/__init__.py
@@ -11,7 +11,7 @@ from .emlloss import EmlLoss
 from .msmloss import MSMLoss
 from .npairsloss import NpairsLoss
 from .trihardloss import TriHardLoss
-from .triplet import TripletLoss, TripletLossV2
+from .triplet import TripletLoss, TripletLossV2, TripletLossV3
 from .supconloss import SupConLoss
 from .pairwisecosface import PairwiseCosface
 from .dmlloss import DMLLoss
@@ -47,6 +47,7 @@ class CombinedLoss(nn.Layer):
                param.keys())
            self.loss_weight.append(param.pop("weight"))
            self.loss_func.append(eval(name)(**param))
+            self.loss_func = nn.LayerList(self.loss_func)

    def __call__(self, input, batch):
        loss_dict = {}

--- a/ppcls/loss/centerloss.py
+++ b/ppcls/loss/centerloss.py
@@ -20,7 +20,6 @@ from typing import Dict

 import paddle
 import paddle.nn as nn
-from paddle import Tensor


 class CenterLoss(nn.Layer):
@@ -42,16 +41,16 @@ class CenterLoss(nn.Layer):
            default_initializer=nn.initializer.Assign(random_init_centers))
        self.add_parameter("centers", self.centers)

-    def __call__(self, input: Dict[str, Tensor],
-                 target: Tensor) -> Dict[str, Tensor]:
+    def __call__(self, input: Dict[str, paddle.Tensor],
+                 target: paddle.Tensor) -> Dict[str, paddle.Tensor]:
        """compute center loss.

        Args:
-            input (Dict[str, Tensor]): {'features': (batch_size, feature_dim), ...}.
-            target (Tensor): ground truth label with shape (batch_size, ).
+            input (Dict[str, paddle.Tensor]): {'features': (batch_size, feature_dim), ...}.
+            target (paddle.Tensor): ground truth label with shape (batch_size, ).

        Returns:
-            Dict[str, Tensor]: {'CenterLoss': loss}.
+            Dict[str, paddle.Tensor]: {'CenterLoss': loss}.
        """
        feats = input['backbone']
        labels = target
@@ -61,31 +60,15 @@ class CenterLoss(nn.Layer):
            labels = paddle.squeeze(labels, axis=[-1])

        batch_size = feats.shape[0]
-        # calc feat * feat
-        dist1 = paddle.sum(paddle.square(feats), axis=1, keepdim=True)
-        dist1 = paddle.expand(dist1, [batch_size, self.num_classes])
+        distmat = paddle.pow(feats, 2).sum(axis=1, keepdim=True).expand([batch_size, self.num_classes]) + \
+            paddle.pow(self.centers, 2).sum(axis=1, keepdim=True).expand([self.num_classes, batch_size]).t()
+        distmat = distmat.addmm(x=feats, y=self.centers.t(), beta=1, alpha=-2)

-        # dist2 of centers
-        dist2 = paddle.sum(paddle.square(self.centers), axis=1,
-                           keepdim=True)  # num_classes
-        dist2 = paddle.expand(dist2, [self.num_classes, batch_size])
-        dist2 = paddle.transpose(dist2, [1, 0])
+        classes = paddle.arange(self.num_classes).astype(labels.dtype)
+        labels = labels.unsqueeze(1).expand([batch_size, self.num_classes])
+        mask = labels.equal(classes.expand([batch_size, self.num_classes]))

-        # first x * x + y * y
-        distmat = paddle.add(dist1, dist2)
-
-        tmp = paddle.matmul(feats, paddle.transpose(self.centers, [1, 0]))
-        distmat = distmat - 2.0 * tmp
-
-        # generate the mask
-        classes = paddle.arange(self.num_classes)
-        labels = paddle.expand(
-            paddle.unsqueeze(labels, 1), (batch_size, self.num_classes))
-        mask = paddle.equal(
-            paddle.expand(classes, [batch_size, self.num_classes]),
-            labels).astype("float32")  # get mask
-
-        dist = paddle.multiply(distmat, mask)
-        loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size
+        dist = distmat * mask.astype(feats.dtype)
+        loss = dist.clip(min=1e-12, max=1e+12).sum() / batch_size
        # return loss
        return {'CenterLoss': loss}
--- a/ppcls/loss/triplet.py
+++ b/ppcls/loss/triplet.py
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+from typing import Tuple

 import paddle
 import paddle.nn as nn
@@ -135,3 +136,122 @@ class TripletLoss(nn.Layer):
        y = paddle.ones_like(dist_an)
        loss = self.ranking_loss(dist_an, dist_ap, y)
        return {"TripletLoss": loss}
+
+
+class TripletLossV3(nn.Layer):
+    """Modified from Tong Xiao's open-reid (https://github.com/Cysu/open-reid).
+    Related Triplet Loss theory can be found in paper 'In Defense of the Triplet
+    Loss for Person Re-Identification'."""
+
+    def __init__(self, margin=None, normalize_feature=False):
+        super(TripletLossV3, self).__init__()
+        self.normalize_feature = normalize_feature
+        self.margin = margin
+        if margin is not None:
+            self.ranking_loss = nn.MarginRankingLoss(margin=margin)
+        else:
+            self.ranking_loss = nn.SoftMarginLoss()
+
+    def forward(self, input, target):
+        global_feat = input["backbone"]
+        if self.normalize_feature:
+            global_feat = self._normalize(global_feat, axis=-1)
+        dist_mat = self._euclidean_dist(global_feat, global_feat)
+        dist_ap, dist_an = self._hard_example_mining(dist_mat, target)
+        y = paddle.ones_like(dist_an)
+        if self.margin is not None:
+            loss = self.ranking_loss(dist_an, dist_ap, y)
+
+        return {"TripletLossV3": loss}
+
+    def _normalize(self, x: paddle.Tensor, axis: int=-1) -> paddle.Tensor:
+        """Normalizing to unit length along the specified dimension.
+
+        Args:
+            x (paddle.Tensor): (batch_size, feature_dim)
+            axis (int, optional): normalization dim. Defaults to -1.
+
+        Returns:
+            paddle.Tensor: (batch_size, feature_dim)
+        """
+        x = 1. * x / (paddle.norm(
+            x, 2, axis, keepdim=True).expand_as(x) + 1e-12)
+        return x
+
+    def _euclidean_dist(self, x: paddle.Tensor,
+                        y: paddle.Tensor) -> paddle.Tensor:
+        """compute euclidean distance between two batched vectors
+
+        Args:
+            x (paddle.Tensor): (N, feature_dim)
+            y (paddle.Tensor): (M, feature_dim)
+
+        Returns:
+            paddle.Tensor: (N, M)
+        """
+        m, n = x.shape[0], y.shape[0]
+        d = x.shape[1]
+        xx = paddle.pow(x, 2).sum(1, keepdim=True).expand([m, n])
+        yy = paddle.pow(y, 2).sum(1, keepdim=True).expand([n, m]).t()
+        dist = xx + yy
+        dist = dist.addmm(x, y.t(), alpha=-2, beta=1)
+        # dist = dist - 2*(x@y.t())
+        dist = dist.clip(min=1e-12).sqrt()  # for numerical stability
+        return dist
+
+    def _hard_example_mining(
+            self,
+            dist_mat: paddle.Tensor,
+            labels: paddle.Tensor,
+            return_inds: bool=False) -> Tuple[paddle.Tensor, paddle.Tensor]:
+        """For each anchor, find the hardest positive and negative sample.
+
+        Args:
+            dist_mat (paddle.Tensor): pair wise distance between samples, [N, N]
+            labels (paddle.Tensor): labels, [N, ]
+            return_inds (bool, optional): whether to return the indices . Defaults to False.
+
+        Returns:
+            Tuple[paddle.Tensor, paddle.Tensor]: [(N, ), (N, )]
+
+        NOTE: Only consider the case in which all labels have same num of samples,
+        thus we can cope with all anchors in parallel.
+        """
+        assert len(dist_mat.shape) == 2
+        assert dist_mat.shape[0] == dist_mat.shape[1]
+        N = dist_mat.shape[0]
+
+        # shape [N, N]
+        is_pos = labels.expand([N, N]).equal(labels.expand([N, N]).t())
+        is_neg = labels.expand([N, N]).not_equal(labels.expand([N, N]).t())
+
+        # `dist_ap` means distance(anchor, positive)
+        # both `dist_ap` and `relative_p_inds` with shape [N, 1]
+        dist_ap = paddle.max(dist_mat[is_pos].reshape([N, -1]),
+                             1,
+                             keepdim=True)
+        # `dist_an` means distance(anchor, negative)
+        # both `dist_an` and `relative_n_inds` with shape [N, 1]
+        dist_an = paddle.min(dist_mat[is_neg].reshape([N, -1]),
+                             1,
+                             keepdim=True)
+        # shape [N]
+        dist_ap = dist_ap.squeeze(1)
+        dist_an = dist_an.squeeze(1)
+
+        if return_inds:
+            # shape [N, N]
+            ind = (labels.new().resize_as_(labels)
+                   .copy_(paddle.arange(0, N).long())
+                   .unsqueeze(0).expand(N, N))
+            # shape [N, 1]
+            p_inds = paddle.gather(ind[is_pos].reshape([N, -1]), 1,
+                                   relative_p_inds.data)
+            n_inds = paddle.gather(ind[is_neg].reshape([N, -1]), 1,
+                                   relative_n_inds.data)
+            # shape [N]
+            p_inds = p_inds.squeeze(1)
+            n_inds = n_inds.squeeze(1)
+            return dist_ap, dist_an, p_inds, n_inds
+
+        return dist_ap, dist_an
--- a/ppcls/optimizer/__init__.py
+++ b/ppcls/optimizer/__init__.py
@@ -18,6 +18,7 @@ from __future__ import print_function

 import copy
 import paddle
+from typing import Dict, List

 from ppcls.utils import logger

@@ -44,97 +45,86 @@ def build_lr_scheduler(lr_config, epochs, step_each_epoch):
 # model_list is None in static graph
 def build_optimizer(config, epochs, step_each_epoch, model_list=None):
    config = copy.deepcopy(config)
-    if 'name' in config:
-        # NOTE: build optimizer and lr for model only.
+    optim_config = config["Optimizer"]
+    if isinstance(optim_config, dict):
+        # convert {'name': xxx, **optim_cfg} to [{'name': {'scope': xxx, **optim_cfg}}]
+        optim_name = optim_config.pop("name")
+        optim_config: List[Dict[str, Dict]] = [{
+            optim_name: {
+                'scope': "all",
+                **
+                optim_config
+            }
+        }]
+    optim_list = []
+    lr_list = []
+    """NOTE:
+    Currently only support optim objets below.
+    1. single optimizer config.
+    2. next level uner Arch, such as Arch.backbone, Arch.neck, Arch.head.
+    3. loss which has parameters, such as CenterLoss.
+    """
+    for optim_item in optim_config:
+        # optim_cfg = {optim_name: {'scope': xxx, **optim_cfg}}
        # step1 build lr
-        lr = build_lr_scheduler(config.pop('lr'), epochs, step_each_epoch)
-        logger.debug("build model's lr ({}) success..".format(lr))
+        optim_name = list(optim_item.keys())[0]  # get optim_name
+        optim_scope = optim_item[optim_name].pop('scope')  # get optim_scope
+        optim_cfg = optim_item[optim_name]  # get optim_cfg
+
+        lr = build_lr_scheduler(optim_cfg.pop('lr'), epochs, step_each_epoch)
+        logger.info("build lr ({}) for scope ({}) success..".format(
+            lr.__class__.__name__, optim_scope))
        # step2 build regularization
-        if 'regularizer' in config and config['regularizer'] is not None:
-            if 'weight_decay' in config:
+        if 'regularizer' in optim_cfg and optim_cfg['regularizer'] is not None:
+            if 'weight_decay' in optim_cfg:
                logger.warning(
                    "ConfigError: Only one of regularizer and weight_decay can be set in Optimizer Config. \"weight_decay\" has been ignored."
                )
-            reg_config = config.pop('regularizer')
+            reg_config = optim_cfg.pop('regularizer')
            reg_name = reg_config.pop('name') + 'Decay'
            reg = getattr(paddle.regularizer, reg_name)(**reg_config)
-            config["weight_decay"] = reg
-            logger.debug("build model's regularizer ({}) success..".format(
-                reg))
+            optim_cfg["weight_decay"] = reg
+            logger.info("build regularizer ({}) for scope ({}) success..".
+                        format(reg.__class__.__name__, optim_scope))
        # step3 build optimizer
-        optim_name = config.pop('name')
-        if 'clip_norm' in config:
-            clip_norm = config.pop('clip_norm')
+        if 'clip_norm' in optim_cfg:
+            clip_norm = optim_cfg.pop('clip_norm')
            grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
        else:
            grad_clip = None
+        optim_model = []
+        for i in range(len(model_list)):
+            if len(model_list[i].parameters()) == 0:
+                continue
+            if optim_scope == "all":
+                # optimizer for all
+                optim_model.append(model_list[i])
+            else:
+                if "Loss" in optim_scope:
+                    # optimizer for loss
+                    if hasattr(model_list[i], 'loss_func'):
+                        for j in range(len(model_list[i].loss_func)):
+                            if model_list[i].loss_func[
+                                    j].__class__.__name__ == optim_scope:
+                                optim_model.append(model_list[i].loss_func[j])
+                elif optim_scope == "model":
+                    # opmizer for entire model
+                    if not model_list[i].__class__.__name__.lower().endswith(
+                            "loss"):
+                        optim_model.append(model_list[i])
+                else:
+                    # opmizer for module in model, such as backbone, neck, head...
+                    if hasattr(model_list[i], optim_scope):
+                        optim_model.append(getattr(model_list[i], optim_scope))
+
+        assert len(optim_model) == 1, \
+            "Invalid optim model for optim scope({}), number of optim_model={}".\
+                format(optim_scope, [m.__class__.__name__ for m in optim_model])
        optim = getattr(optimizer, optim_name)(
            learning_rate=lr, grad_clip=grad_clip,
-            **config)(model_list=model_list[0:1])
-        optim = [optim, ]
-        lr = [lr, ]
-        logger.debug("build model's optimizer ({}) success..".format(optim))
-    else:
-        # NOTE: build optimizer and lr for model and loss.
-        config_model = config['model']
-        config_loss = config['loss']
-        # step1 build lr
-        lr_model = build_lr_scheduler(
-            config_model.pop('lr'), epochs, step_each_epoch)
-        logger.debug("build model's lr ({}) success..".format(lr_model))
-        # step2 build regularization
-        if 'regularizer' in config_model and config_model[
-                'regularizer'] is not None:
-            if 'weight_decay' in config_model:
-                logger.warning(
-                    "ConfigError: Only one of regularizer and weight_decay can be set in Optimizer Config. \"weight_decay\" has been ignored."
-                )
-            reg_config = config_model.pop('regularizer')
-            reg_name = reg_config.pop('name') + 'Decay'
-            reg_model = getattr(paddle.regularizer, reg_name)(**reg_config)
-            config_model["weight_decay"] = reg_model
-            logger.debug("build model's regularizer ({}) success..".format(
-                reg_model))
-        # step3 build optimizer
-        optim_name = config_model.pop('name')
-        if 'clip_norm' in config_model:
-            clip_norm = config_model.pop('clip_norm')
-            grad_clip_model = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
-        else:
-            grad_clip_model = None
-        optim_model = getattr(optimizer, optim_name)(
-            learning_rate=lr_model, grad_clip=grad_clip_model,
-            **config_model)(model_list=model_list[0:1])
-
-        # step4 build lr for loss
-        lr_loss = build_lr_scheduler(
-            config_loss.pop('lr'), epochs, step_each_epoch)
-        logger.debug("build loss's lr ({}) success..".format(lr_loss))
-        # step5 build regularization for loss
-        if 'regularizer' in config_loss and config_loss[
-                'regularizer'] is not None:
-            if 'weight_decay' in config_loss:
-                logger.warning(
-                    "ConfigError: Only one of regularizer and weight_decay can be set in Optimizer Config. \"weight_decay\" has been ignored."
-                )
-            reg_config = config_loss.pop('regularizer')
-            reg_name = reg_config.pop('name') + 'Decay'
-            reg_loss = getattr(paddle.regularizer, reg_name)(**reg_config)
-            config_loss["weight_decay"] = reg_loss
-            logger.debug("build loss's regularizer ({}) success..".format(
-                reg_loss))
-        # step6 build optimizer for loss
-        optim_name = config_loss.pop('name')
-        if 'clip_norm' in config_loss:
-            clip_norm = config_loss.pop('clip_norm')
-            grad_clip_loss = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
-        else:
-            grad_clip_loss = None
-        optim_loss = getattr(optimizer, optim_name)(
-            learning_rate=lr_loss, grad_clip=grad_clip_loss,
-            **config_loss)(model_list=model_list[1:2])
-
-        optim = [optim_model, optim_loss]
-        lr = [lr_model, lr_loss]
-        logger.debug("build loss's optimizer ({}) success..".format(optim))
-    return optim, lr
+            **optim_cfg)(model_list=optim_model)
+        logger.info("build optimizer ({}) for scope ({}) success..".format(
+            optim.__class__.__name__, optim_scope))
+        optim_list.append(optim)
+        lr_list.append(lr)
+    return optim_list, lr_list
--- a/ppcls/optimizer/learning_rate.py
+++ b/ppcls/optimizer/learning_rate.py
@@ -215,6 +215,7 @@ class Piecewise(object):
                 epochs,
                 warmup_epoch=0,
                 warmup_start_lr=0.0,
+                 warmup_by_epoch=False,
                 last_epoch=-1,
                 **kwargs):
        super().__init__()
@@ -222,27 +223,61 @@ class Piecewise(object):
            msg = f"When using warm up, the value of \"Global.epochs\" must be greater than value of \"Optimizer.lr.warmup_epoch\". The value of \"Optimizer.lr.warmup_epoch\" has been set to {epochs}."
            logger.warning(msg)
            warmup_epoch = epochs
-        self.boundaries = [step_each_epoch * e for e in decay_epochs]
+        self.boundaries_steps = [step_each_epoch * e for e in decay_epochs]
+        self.boundaries_epoch = decay_epochs
        self.values = values
        self.last_epoch = last_epoch
        self.warmup_steps = round(warmup_epoch * step_each_epoch)
+        self.warmup_epoch = warmup_epoch
        self.warmup_start_lr = warmup_start_lr
+        self.warmup_by_epoch = warmup_by_epoch

    def __call__(self):
-        learning_rate = lr.PiecewiseDecay(
-            boundaries=self.boundaries,
-            values=self.values,
-            last_epoch=self.last_epoch)
-        if self.warmup_steps > 0:
-            learning_rate = lr.LinearWarmup(
-                learning_rate=learning_rate,
-                warmup_steps=self.warmup_steps,
-                start_lr=self.warmup_start_lr,
-                end_lr=self.values[0],
+        if self.warmup_by_epoch is False:
+            learning_rate = lr.PiecewiseDecay(
+                boundaries=self.boundaries_steps,
+                values=self.values,
                last_epoch=self.last_epoch)
+            if self.warmup_steps > 0:
+                learning_rate = lr.LinearWarmup(
+                    learning_rate=learning_rate,
+                    warmup_steps=self.warmup_steps,
+                    start_lr=self.warmup_start_lr,
+                    end_lr=self.values[0],
+                    last_epoch=self.last_epoch)
+        else:
+            learning_rate = lr.PiecewiseDecay(
+                boundaries=self.boundaries_epoch,
+                values=self.values,
+                last_epoch=self.last_epoch)
+            if self.warmup_epoch > 0:
+                learning_rate = lr.LinearWarmup(
+                    learning_rate=learning_rate,
+                    warmup_steps=self.warmup_epoch,
+                    start_lr=self.warmup_start_lr,
+                    end_lr=self.values[0],
+                    last_epoch=self.last_epoch)
        return learning_rate


+class Constant(LRScheduler):
+    """
+    Constant learning rate
+    Args:
+        lr (float): The initial learning rate. It is a python float number.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+
+    def __init__(self, learning_rate, last_epoch=-1, by_epoch=False, **kwargs):
+        self.learning_rate = learning_rate
+        self.last_epoch = last_epoch
+        self.by_epoch = by_epoch
+        super().__init__()
+
+    def get_lr(self):
+        return self.learning_rate
+
+
 class MultiStepDecay(LRScheduler):
    """
    Update the learning rate by ``gamma`` once ``epoch`` reaches one of the milestones.

--- a/ppcls/static/train.py
+++ b/ppcls/static/train.py
@@ -161,12 +161,22 @@ def main(args):
    # load pretrained models or checkpoints
    init_model(global_config, train_prog, exe)

-    if 'AMP' in config and config.AMP.get("level", "O1") == "O2":
+    if 'AMP' in config:
+        if config["AMP"].get("level", "O1").upper() == "O2":
+            use_fp16_test = True
+            msg = "Only support FP16 evaluation when AMP O2 is enabled."
+            logger.warning(msg)
+        elif "use_fp16_test" in config["AMP"]:
+            use_fp16_test = config["AMP"].get["use_fp16_test"]
+        else:
+            use_fp16_test = False
+
        optimizer.amp_init(
            device,
            scope=paddle.static.global_scope(),
            test_program=eval_prog
-            if global_config["eval_during_train"] else None)
+            if global_config["eval_during_train"] else None,
+            use_fp16_test=use_fp16_test)

    if not global_config.get("is_distributed", True):
        compiled_train_prog = program.compile(
@@ -182,7 +192,7 @@ def main(args):
        program.run(train_dataloader, exe, compiled_train_prog, train_feeds,
                    train_fetchs, epoch_id, 'train', config, vdl_writer,
                    lr_scheduler, args.profiler_options)
-        # 2. evaate with eval dataset
+        # 2. evaluate with eval dataset
        if global_config["eval_during_train"] and epoch_id % global_config[
                "eval_interval"] == 0:
            top1_acc = program.run(eval_dataloader, exe, compiled_eval_prog,

--- a/ppcls/utils/config.py
+++ b/ppcls/utils/config.py
@@ -137,9 +137,12 @@ def override(dl, ks, v):
        if len(ks) == 1:
            # assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl))
            if not ks[0] in dl:
-                print('A new filed ({}) detected!'.format(ks[0], dl))
+                print('A new field ({}) detected!'.format(ks[0], dl))
            dl[ks[0]] = str2num(v)
        else:
+            if ks[0] not in dl.keys():
+                dl[ks[0]] = {}
+                print("A new Series field ({}) detected!".format(ks[0], dl))
            override(dl[ks[0]], ks[1:], v)



--- a/ppcls/utils/save_load.py
+++ b/ppcls/utils/save_load.py
@@ -18,9 +18,6 @@ from __future__ import print_function

 import errno
 import os
-import re
-import shutil
-import tempfile

 import paddle
 from ppcls.utils import logger
@@ -47,10 +44,15 @@ def _mkdir_if_not_exist(path):

 def load_dygraph_pretrain(model, path=None):
    if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
-        raise ValueError("Model pretrain path {} does not "
-                         "exists.".format(path + '.pdparams'))
+        raise ValueError("Model pretrain path {}.pdparams does not "
+                         "exists.".format(path))
    param_state_dict = paddle.load(path + ".pdparams")
-    model.set_dict(param_state_dict)
+    if isinstance(model, list):
+        for m in model:
+            if hasattr(m, 'set_dict'):
+                m.set_dict(param_state_dict)
+    else:
+        model.set_dict(param_state_dict)
    return


@@ -85,7 +87,7 @@ def load_distillation_model(model, pretrained_model):
            pretrained_model))


-def init_model(config, net, optimizer=None):
+def init_model(config, net, optimizer=None, loss: paddle.nn.Layer=None):
    """
    load model from checkpoint or pretrained_model
    """
@@ -95,10 +97,13 @@ def init_model(config, net, optimizer=None):
            "Given dir {}.pdparams not exist.".format(checkpoints)
        assert os.path.exists(checkpoints + ".pdopt"), \
            "Given dir {}.pdopt not exist.".format(checkpoints)
-        para_dict = paddle.load(checkpoints + ".pdparams")
+        # load state dict
        opti_dict = paddle.load(checkpoints + ".pdopt")
+        para_dict = paddle.load(checkpoints + ".pdparams")
        metric_dict = paddle.load(checkpoints + ".pdstates")
-        net.set_dict(para_dict)
+        # set state dict
+        net.set_state_dict(para_dict)
+        loss.set_state_dict(para_dict)
        for i in range(len(optimizer)):
            optimizer[i].set_state_dict(opti_dict)
        logger.info("Finish load checkpoints from {}".format(checkpoints))
@@ -121,7 +126,8 @@ def save_model(net,
               metric_info,
               model_path,
               model_name="",
-               prefix='ppcls'):
+               prefix='ppcls',
+               loss: paddle.nn.Layer=None):
    """
    save model to the target path
    """
@@ -131,7 +137,14 @@ def save_model(net,
    _mkdir_if_not_exist(model_path)
    model_path = os.path.join(model_path, prefix)

-    paddle.save(net.state_dict(), model_path + ".pdparams")
+    params_state_dict = net.state_dict()
+    loss_state_dict = loss.state_dict()
+    keys_inter = set(params_state_dict.keys()) & set(loss_state_dict.keys())
+    assert len(keys_inter) == 0, \
+        f"keys in model and loss state_dict must be unique, but got intersection {keys_inter}"
+    params_state_dict.update(loss_state_dict)
+
+    paddle.save(params_state_dict, model_path + ".pdparams")
    paddle.save([opt.state_dict() for opt in optimizer], model_path + ".pdopt")
    paddle.save(metric_info, model_path + ".pdstates")
    logger.info("Already save model in {}".format(model_path))
--- a/test_tipc/common_func.sh
+++ b/test_tipc/common_func.sh
@@ -16,6 +16,14 @@ function func_parser_value(){
    echo ${tmp}
 }

+function func_parser_value_lite(){
+    strs=$1
+    IFS=$2
+    array=(${strs})
+    tmp=${array[1]}
+    echo ${tmp}
+}
+
 function func_set_params(){
    key=$1
    value=$2

--- a/test_tipc/config/AlexNet/AlexNet_train_amp_infer_python.txt
+++ b/test_tipc/config/AlexNet/AlexNet_train_amp_infer_python.txt
+===========================train_params===========================
+model_name:AlexNet
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:amp_train
+amp_train:tools/train.py -c ppcls/configs/ImageNet/AlexNet/AlexNet.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/AlexNet/AlexNet.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/AlexNet/AlexNet.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:True|False
+-o Global.cpu_num_threads:1|6
+-o Global.batch_size:1|16
+-o Global.use_tensorrt:True|False
+-o Global.use_fp16:True|False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val
+-o Global.save_log_path:null
+-o Global.benchmark:True
+null:null
+null:null
--- a/test_tipc/config/CSPNet/CSPDarkNet53_train_amp_infer_python.txt
+++ b/test_tipc/config/CSPNet/CSPDarkNet53_train_amp_infer_python.txt
+===========================train_params===========================
+model_name:CSPDarkNet53
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:amp_train
+amp_train:tools/train.py -c ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/CSPNet/CSPDarkNet53.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/CSPDarkNet53_pretrained.pdparams
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.transform_ops.0.ResizeImage.resize_short=288 -o PreProcess.transform_ops.1.CropImage.size=256
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:True|False
+-o Global.cpu_num_threads:1|6
+-o Global.batch_size:1|16
+-o Global.use_tensorrt:True|False
+-o Global.use_fp16:True|False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val
+-o Global.save_log_path:null
+-o Global.benchmark:True
+null:null
+null:null
--- a/test_tipc/config/CSWinTransformer/CSWinTransformer_base_224_train_infer_python.txt
+++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_base_224_train_infer_python.txt
@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.tran
 -o Global.benchmark:True
 null:null
 null:null
+===========================train_benchmark_params==========================
+batch_size:32
+fp_items:fp32
+epoch:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/config/CSWinTransformer/CSWinTransformer_base_384_train_infer_python.txt
+++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_base_384_train_infer_python.txt
@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.tran
 -o Global.benchmark:True
 null:null
 null:null
+===========================train_benchmark_params==========================
+batch_size:32
+fp_items:fp32
+epoch:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/config/CSWinTransformer/CSWinTransformer_large_224_train_infer_python.txt
+++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_large_224_train_infer_python.txt
@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.tran
 -o Global.benchmark:True
 null:null
 null:null
+===========================train_benchmark_params==========================
+batch_size:32
+fp_items:fp32
+epoch:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/config/CSWinTransformer/CSWinTransformer_large_384_train_infer_python.txt
+++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_large_384_train_infer_python.txt
@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.tran
 -o Global.benchmark:True
 null:null
 null:null
+===========================train_benchmark_params==========================
+batch_size:8
+fp_items:fp32
+epoch:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/config/CSWinTransformer/CSWinTransformer_small_224_train_infer_python.txt
+++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_small_224_train_infer_python.txt
@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.tran
 -o Global.benchmark:True
 null:null
 null:null
+===========================train_benchmark_params==========================
+batch_size:64
+fp_items:fp32
+epoch:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt
+++ b/test_tipc/config/CSWinTransformer/CSWinTransformer_tiny_224_train_infer_python.txt
@@ -50,3 +50,9 @@ inference:python/predict_cls.py -c configs/inference_cls.yaml -o PreProcess.tran
 -o Global.benchmark:True
 null:null
 null:null
+===========================train_benchmark_params==========================
+batch_size:128
+fp_items:fp32
+epoch:1
+--profiler_options:batch_range=[10,20];state=GPU;tracer_option=Default;profile_path=model.profile
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096
--- a/test_tipc/config/DLA/DLA102_train_amp_infer_python.txt
+++ b/test_tipc/config/DLA/DLA102_train_amp_infer_python.txt
+===========================train_params===========================
+model_name:DLA102
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:amp_train
+amp_train:tools/train.py -c ppcls/configs/ImageNet/DLA/DLA102.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/DLA/DLA102.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/DLA/DLA102.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102_pretrained.pdparams
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:True|False
+-o Global.cpu_num_threads:1|6
+-o Global.batch_size:1|16
+-o Global.use_tensorrt:True|False
+-o Global.use_fp16:True|False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val
+-o Global.save_log_path:null
+-o Global.benchmark:True
+null:null
+null:null
--- a/test_tipc/config/DLA/DLA102x2_train_amp_infer_python.txt
+++ b/test_tipc/config/DLA/DLA102x2_train_amp_infer_python.txt
+===========================train_params===========================
+model_name:DLA102x2
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:amp_train
+amp_train:tools/train.py -c ppcls/configs/ImageNet/DLA/DLA102x2.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/DLA/DLA102x2.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/DLA/DLA102x2.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x2_pretrained.pdparams
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:True|False
+-o Global.cpu_num_threads:1|6
+-o Global.batch_size:1|16
+-o Global.use_tensorrt:True|False
+-o Global.use_fp16:True|False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val
+-o Global.save_log_path:null
+-o Global.benchmark:True
+null:null
+null:null
--- a/test_tipc/config/DLA/DLA102x_train_amp_infer_python.txt
+++ b/test_tipc/config/DLA/DLA102x_train_amp_infer_python.txt
+===========================train_params===========================
+model_name:DLA102x
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:amp_train
+amp_train:tools/train.py -c ppcls/configs/ImageNet/DLA/DLA102x.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/DLA/DLA102x.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/DLA/DLA102x.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x_pretrained.pdparams
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:True|False
+-o Global.cpu_num_threads:1|6
+-o Global.batch_size:1|16
+-o Global.use_tensorrt:True|False
+-o Global.use_fp16:True|False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val
+-o Global.save_log_path:null
+-o Global.benchmark:True
+null:null
+null:null
--- a/test_tipc/config/DLA/DLA169_train_amp_infer_python.txt
+++ b/test_tipc/config/DLA/DLA169_train_amp_infer_python.txt
+===========================train_params===========================
+model_name:DLA169
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:amp_train
+amp_train:tools/train.py -c ppcls/configs/ImageNet/DLA/DLA169.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/DLA/DLA169.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/DLA/DLA169.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:True|False
+-o Global.cpu_num_threads:1|6
+-o Global.batch_size:1|16
+-o Global.use_tensorrt:True|False
+-o Global.use_fp16:True|False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val
+-o Global.save_log_path:null
+-o Global.benchmark:True
+null:null
+null:null
--- a/test_tipc/config/DLA/DLA34_train_amp_infer_python.txt
+++ b/test_tipc/config/DLA/DLA34_train_amp_infer_python.txt
+===========================train_params===========================
+model_name:DLA34
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:amp_train
+amp_train:tools/train.py -c ppcls/configs/ImageNet/DLA/DLA34.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/DLA/DLA34.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/DLA/DLA34.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:True|False
+-o Global.cpu_num_threads:1|6
+-o Global.batch_size:1|16
+-o Global.use_tensorrt:True|False
+-o Global.use_fp16:True|False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val
+-o Global.save_log_path:null
+-o Global.benchmark:True
+null:null
+null:null
--- a/test_tipc/config/DLA/DLA46_c_train_amp_infer_python.txt
+++ b/test_tipc/config/DLA/DLA46_c_train_amp_infer_python.txt
+===========================train_params===========================
+model_name:DLA46_c
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:amp_train
+amp_train:tools/train.py -c ppcls/configs/ImageNet/DLA/DLA46_c.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/DLA/DLA46_c.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/DLA/DLA46_c.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46_c_pretrained.pdparams
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:True|False
+-o Global.cpu_num_threads:1|6
+-o Global.batch_size:1|16
+-o Global.use_tensorrt:True|False
+-o Global.use_fp16:True|False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val
+-o Global.save_log_path:null
+-o Global.benchmark:True
+null:null
+null:null
--- a/test_tipc/config/DLA/DLA46x_c_train_amp_infer_python.txt
+++ b/test_tipc/config/DLA/DLA46x_c_train_amp_infer_python.txt
+===========================train_params===========================
+model_name:DLA46x_c
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:amp_train
+amp_train:tools/train.py -c ppcls/configs/ImageNet/DLA/DLA46x_c.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/DLA/DLA46x_c.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/DLA/DLA46x_c.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46x_c_pretrained.pdparams
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:True|False
+-o Global.cpu_num_threads:1|6
+-o Global.batch_size:1|16
+-o Global.use_tensorrt:True|False
+-o Global.use_fp16:True|False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val
+-o Global.save_log_path:null
+-o Global.benchmark:True
+null:null
+null:null
--- a/test_tipc/config/DLA/DLA60_train_amp_infer_python.txt
+++ b/test_tipc/config/DLA/DLA60_train_amp_infer_python.txt
+===========================train_params===========================
+model_name:DLA60
+python:python3.7
+gpu_list:0|0,1
+-o Global.device:gpu
+-o Global.auto_cast:null
+-o Global.epochs:lite_train_lite_infer=2|whole_train_whole_infer=120
+-o Global.output_dir:./output/
+-o DataLoader.Train.sampler.batch_size:8
+-o Global.pretrained_model:null
+train_model_name:latest
+train_infer_img_dir:./dataset/ILSVRC2012/val
+null:null
+##
+trainer:amp_train
+amp_train:tools/train.py -c ppcls/configs/ImageNet/DLA/DLA60.yaml -o Global.seed=1234 -o DataLoader.Train.sampler.shuffle=False -o DataLoader.Train.loader.num_workers=0 -o DataLoader.Train.loader.use_shared_memory=False -o AMP.scale_loss=128 -o AMP.use_dynamic_loss_scaling=True -o AMP.level=O2
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params=========================== 
+eval:tools/eval.py -c ppcls/configs/ImageNet/DLA/DLA60.yaml
+null:null
+##
+===========================infer_params==========================
+-o Global.save_inference_dir:./inference
+-o Global.pretrained_model:
+norm_export:tools/export_model.py -c ppcls/configs/ImageNet/DLA/DLA60.yaml
+quant_export:null
+fpgm_export:null
+distill_export:null
+kl_quant:null
+export2:null
+pretrained_model_url:https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60_pretrained.pdparams
+infer_model:../inference/
+infer_export:True
+infer_quant:Fasle
+inference:python/predict_cls.py -c configs/inference_cls.yaml
+-o Global.use_gpu:True|False
+-o Global.enable_mkldnn:True|False
+-o Global.cpu_num_threads:1|6
+-o Global.batch_size:1|16
+-o Global.use_tensorrt:True|False
+-o Global.use_fp16:True|False
+-o Global.inference_model_dir:../inference
+-o Global.infer_imgs:../dataset/ILSVRC2012/val
+-o Global.save_log_path:null
+-o Global.benchmark:True
+null:null
+null:null
--- a/test_tipc/config/DLA/DLA60x_c_train_amp_infer_python.txt
+++ b/test_tipc/config/DLA/DLA60x_c_train_amp_infer_python.txt
--- a/test_tipc/config/DLA/DLA60x_train_amp_infer_python.txt
+++ b/test_tipc/config/DLA/DLA60x_train_amp_infer_python.txt
--- a/test_tipc/config/DPN/DPN107_train_amp_infer_python.txt
+++ b/test_tipc/config/DPN/DPN107_train_amp_infer_python.txt
--- a/test_tipc/config/DPN/DPN131_train_amp_infer_python.txt
+++ b/test_tipc/config/DPN/DPN131_train_amp_infer_python.txt
--- a/test_tipc/config/DPN/DPN68_train_amp_infer_python.txt
+++ b/test_tipc/config/DPN/DPN68_train_amp_infer_python.txt
--- a/test_tipc/config/DPN/DPN92_train_amp_infer_python.txt
+++ b/test_tipc/config/DPN/DPN92_train_amp_infer_python.txt
--- a/test_tipc/config/DPN/DPN98_train_amp_infer_python.txt
+++ b/test_tipc/config/DPN/DPN98_train_amp_infer_python.txt
--- a/test_tipc/config/DarkNet/DarkNet53_train_amp_infer_python.txt
+++ b/test_tipc/config/DarkNet/DarkNet53_train_amp_infer_python.txt
--- a/test_tipc/config/DeiT/DeiT_base_patch16_224_train_amp_infer_python.txt
+++ b/test_tipc/config/DeiT/DeiT_base_patch16_224_train_amp_infer_python.txt
--- a/test_tipc/config/DeiT/DeiT_base_patch16_384_train_amp_infer_python.txt
+++ b/test_tipc/config/DeiT/DeiT_base_patch16_384_train_amp_infer_python.txt
--- a/test_tipc/config/DeiT/DeiT_small_patch16_224_train_amp_infer_python.txt
+++ b/test_tipc/config/DeiT/DeiT_small_patch16_224_train_amp_infer_python.txt
--- a/test_tipc/config/DeiT/DeiT_tiny_patch16_224_train_amp_infer_python.txt
+++ b/test_tipc/config/DeiT/DeiT_tiny_patch16_224_train_amp_infer_python.txt
--- a/test_tipc/config/DenseNet/DenseNet121_train_amp_infer_python.txt
+++ b/test_tipc/config/DenseNet/DenseNet121_train_amp_infer_python.txt
--- a/test_tipc/config/DenseNet/DenseNet161_train_amp_infer_python.txt
+++ b/test_tipc/config/DenseNet/DenseNet161_train_amp_infer_python.txt
--- a/test_tipc/config/DenseNet/DenseNet169_train_amp_infer_python.txt
+++ b/test_tipc/config/DenseNet/DenseNet169_train_amp_infer_python.txt
--- a/test_tipc/config/DenseNet/DenseNet201_train_amp_infer_python.txt
+++ b/test_tipc/config/DenseNet/DenseNet201_train_amp_infer_python.txt
--- a/test_tipc/config/DenseNet/DenseNet264_train_amp_infer_python.txt
+++ b/test_tipc/config/DenseNet/DenseNet264_train_amp_infer_python.txt
--- a/test_tipc/config/ESNet/ESNet_x0_25_train_amp_infer_python.txt
+++ b/test_tipc/config/ESNet/ESNet_x0_25_train_amp_infer_python.txt
--- a/test_tipc/config/ESNet/ESNet_x0_5_train_amp_infer_python.txt
+++ b/test_tipc/config/ESNet/ESNet_x0_5_train_amp_infer_python.txt
--- a/test_tipc/config/ESNet/ESNet_x0_75_train_amp_infer_python.txt
+++ b/test_tipc/config/ESNet/ESNet_x0_75_train_amp_infer_python.txt
--- a/test_tipc/config/ESNet/ESNet_x1_0_train_amp_infer_python.txt
+++ b/test_tipc/config/ESNet/ESNet_x1_0_train_amp_infer_python.txt
--- a/test_tipc/config/EfficientNet/EfficientNetB0_train_amp_infer_python.txt
+++ b/test_tipc/config/EfficientNet/EfficientNetB0_train_amp_infer_python.txt
--- a/test_tipc/config/EfficientNet/EfficientNetB1_train_amp_infer_python.txt
+++ b/test_tipc/config/EfficientNet/EfficientNetB1_train_amp_infer_python.txt
--- a/test_tipc/config/EfficientNet/EfficientNetB2_train_amp_infer_python.txt
+++ b/test_tipc/config/EfficientNet/EfficientNetB2_train_amp_infer_python.txt
--- a/test_tipc/config/EfficientNet/EfficientNetB3_train_amp_infer_python.txt
+++ b/test_tipc/config/EfficientNet/EfficientNetB3_train_amp_infer_python.txt
--- a/test_tipc/config/EfficientNet/EfficientNetB4_train_amp_infer_python.txt
+++ b/test_tipc/config/EfficientNet/EfficientNetB4_train_amp_infer_python.txt
--- a/test_tipc/config/EfficientNet/EfficientNetB5_train_amp_infer_python.txt
+++ b/test_tipc/config/EfficientNet/EfficientNetB5_train_amp_infer_python.txt
--- a/test_tipc/config/EfficientNet/EfficientNetB6_train_amp_infer_python.txt
+++ b/test_tipc/config/EfficientNet/EfficientNetB6_train_amp_infer_python.txt
--- a/test_tipc/config/EfficientNet/EfficientNetB7_train_amp_infer_python.txt
+++ b/test_tipc/config/EfficientNet/EfficientNetB7_train_amp_infer_python.txt
--- a/test_tipc/config/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_train_amp_infer_python.txt
+++ b/test_tipc/config/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5_train_amp_infer_python.txt
--- a/test_tipc/config/GhostNet/GhostNet_x0_5_train_amp_infer_python.txt
+++ b/test_tipc/config/GhostNet/GhostNet_x0_5_train_amp_infer_python.txt
--- a/test_tipc/config/GhostNet/GhostNet_x1_0_train_amp_infer_python.txt
+++ b/test_tipc/config/GhostNet/GhostNet_x1_0_train_amp_infer_python.txt
--- a/test_tipc/config/GhostNet/GhostNet_x1_3_train_amp_infer_python.txt
+++ b/test_tipc/config/GhostNet/GhostNet_x1_3_train_amp_infer_python.txt
--- a/test_tipc/config/HRNet/HRNet_W18_C_train_amp_infer_python.txt
+++ b/test_tipc/config/HRNet/HRNet_W18_C_train_amp_infer_python.txt
--- a/test_tipc/config/HRNet/HRNet_W30_C_train_amp_infer_python.txt
+++ b/test_tipc/config/HRNet/HRNet_W30_C_train_amp_infer_python.txt
--- a/test_tipc/config/HRNet/HRNet_W32_C_train_amp_infer_python.txt
+++ b/test_tipc/config/HRNet/HRNet_W32_C_train_amp_infer_python.txt
--- a/test_tipc/config/HRNet/HRNet_W40_C_train_amp_infer_python.txt
+++ b/test_tipc/config/HRNet/HRNet_W40_C_train_amp_infer_python.txt
--- a/test_tipc/config/HRNet/HRNet_W44_C_train_amp_infer_python.txt
+++ b/test_tipc/config/HRNet/HRNet_W44_C_train_amp_infer_python.txt
--- a/test_tipc/config/HRNet/HRNet_W48_C_train_amp_infer_python.txt
+++ b/test_tipc/config/HRNet/HRNet_W48_C_train_amp_infer_python.txt
--- a/test_tipc/config/HRNet/HRNet_W64_C_train_amp_infer_python.txt
+++ b/test_tipc/config/HRNet/HRNet_W64_C_train_amp_infer_python.txt
--- a/test_tipc/config/HarDNet/HarDNet39_ds_train_amp_infer_python.txt
+++ b/test_tipc/config/HarDNet/HarDNet39_ds_train_amp_infer_python.txt
--- a/test_tipc/config/HarDNet/HarDNet68_ds_train_amp_infer_python.txt
+++ b/test_tipc/config/HarDNet/HarDNet68_ds_train_amp_infer_python.txt
--- a/test_tipc/config/HarDNet/HarDNet68_train_amp_infer_python.txt
+++ b/test_tipc/config/HarDNet/HarDNet68_train_amp_infer_python.txt
--- a/test_tipc/config/HarDNet/HarDNet85_train_amp_infer_python.txt
+++ b/test_tipc/config/HarDNet/HarDNet85_train_amp_infer_python.txt
--- a/test_tipc/config/Inception/GoogLeNet_train_amp_infer_python.txt
+++ b/test_tipc/config/Inception/GoogLeNet_train_amp_infer_python.txt
--- a/test_tipc/config/Inception/InceptionV3_train_amp_infer_python.txt
+++ b/test_tipc/config/Inception/InceptionV3_train_amp_infer_python.txt
--- a/test_tipc/config/Inception/InceptionV4_train_amp_infer_python.txt
+++ b/test_tipc/config/Inception/InceptionV4_train_amp_infer_python.txt
--- a/test_tipc/config/LeViT/LeViT_128S_train_amp_infer_python.txt
+++ b/test_tipc/config/LeViT/LeViT_128S_train_amp_infer_python.txt
--- a/test_tipc/config/LeViT/LeViT_128_train_amp_infer_python.txt
+++ b/test_tipc/config/LeViT/LeViT_128_train_amp_infer_python.txt
--- a/test_tipc/config/LeViT/LeViT_192_train_amp_infer_python.txt
+++ b/test_tipc/config/LeViT/LeViT_192_train_amp_infer_python.txt
--- a/test_tipc/config/LeViT/LeViT_256_train_amp_infer_python.txt
+++ b/test_tipc/config/LeViT/LeViT_256_train_amp_infer_python.txt
--- a/test_tipc/config/LeViT/LeViT_384_train_amp_infer_python.txt
+++ b/test_tipc/config/LeViT/LeViT_384_train_amp_infer_python.txt
--- a/test_tipc/config/MixNet/MixNet_L_train_amp_infer_python.txt
+++ b/test_tipc/config/MixNet/MixNet_L_train_amp_infer_python.txt
--- a/test_tipc/config/MixNet/MixNet_M_train_amp_infer_python.txt
+++ b/test_tipc/config/MixNet/MixNet_M_train_amp_infer_python.txt
--- a/test_tipc/config/MixNet/MixNet_S_train_amp_infer_python.txt
+++ b/test_tipc/config/MixNet/MixNet_S_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV1/MobileNetV1_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV1/MobileNetV1_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV1/MobileNetV1_x0_25_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV1/MobileNetV1_x0_25_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV1/MobileNetV1_x0_5_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV1/MobileNetV1_x0_5_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV1/MobileNetV1_x0_75_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV1/MobileNetV1_x0_75_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV2/MobileNetV2_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV2/MobileNetV2_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV2/MobileNetV2_x0_25_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV2/MobileNetV2_x0_25_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV2/MobileNetV2_x0_5_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV2/MobileNetV2_x0_5_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV2/MobileNetV2_x0_75_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV2/MobileNetV2_x0_75_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV2/MobileNetV2_x1_5_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV2/MobileNetV2_x1_5_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV2/MobileNetV2_x2_0_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV2/MobileNetV2_x2_0_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x0_35_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x0_35_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x0_5_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x0_5_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x0_75_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x0_75_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_FPGM_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_FPGM_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_PACT_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_PACT_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_0_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_25_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_large_x1_25_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_small_x0_35_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_small_x0_35_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_small_x0_5_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_small_x0_5_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_small_x0_75_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_small_x0_75_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_small_x1_0_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_small_x1_0_train_amp_infer_python.txt
--- a/test_tipc/config/MobileNetV3/MobileNetV3_small_x1_25_train_amp_infer_python.txt
+++ b/test_tipc/config/MobileNetV3/MobileNetV3_small_x1_25_train_amp_infer_python.txt
--- a/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt
+++ b/test_tipc/config/MobileViT/MobileViT_S_train_infer_python.txt
--- a/test_tipc/config/MobileViT/MobileViT_XS_train_infer_python.txt
+++ b/test_tipc/config/MobileViT/MobileViT_XS_train_infer_python.txt
--- a/test_tipc/config/MobileViT/MobileViT_XXS_train_infer_python.txt
+++ b/test_tipc/config/MobileViT/MobileViT_XXS_train_infer_python.txt
--- a/test_tipc/config/PPLCNet/MobileNetV3_large_x1_0_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/PPLCNet/MobileNetV3_large_x1_0_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x0_25_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x0_25_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x0_25_train_amp_infer_python.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x0_25_train_amp_infer_python.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x0_35_train_amp_infer_python.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x0_35_train_amp_infer_python.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x0_5_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x0_5_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x0_5_train_amp_infer_python.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x0_5_train_amp_infer_python.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x0_75_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x0_75_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x0_75_train_amp_infer_python.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x0_75_train_amp_infer_python.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x1_0_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x1_0_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x1_0_train_amp_infer_python.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x1_0_train_amp_infer_python.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x1_5_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x1_5_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x1_5_train_amp_infer_python.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x1_5_train_amp_infer_python.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x2_0_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x2_0_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x2_0_train_amp_infer_python.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x2_0_train_amp_infer_python.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x2_5_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x2_5_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/PPLCNet/PPLCNet_x2_5_train_amp_infer_python.txt
+++ b/test_tipc/config/PPLCNet/PPLCNet_x2_5_train_amp_infer_python.txt
--- a/test_tipc/config/PVTV2/PVT_V2_B0.txt
+++ b/test_tipc/config/PVTV2/PVT_V2_B0.txt
--- a/test_tipc/config/PVTV2/PVT_V2_B1.txt
+++ b/test_tipc/config/PVTV2/PVT_V2_B1.txt
--- a/test_tipc/config/PVTV2/PVT_V2_B2.txt
+++ b/test_tipc/config/PVTV2/PVT_V2_B2.txt
--- a/test_tipc/config/PVTV2/PVT_V2_B2_Linear.txt
+++ b/test_tipc/config/PVTV2/PVT_V2_B2_Linear.txt
--- a/test_tipc/config/PVTV2/PVT_V2_B3.txt
+++ b/test_tipc/config/PVTV2/PVT_V2_B3.txt
--- a/test_tipc/config/PVTV2/PVT_V2_B4.txt
+++ b/test_tipc/config/PVTV2/PVT_V2_B4.txt
--- a/test_tipc/config/PVTV2/PVT_V2_B5.txt
+++ b/test_tipc/config/PVTV2/PVT_V2_B5.txt
--- a/test_tipc/config/ReXNet/ReXNet_1_0_train_amp_infer_python.txt
+++ b/test_tipc/config/ReXNet/ReXNet_1_0_train_amp_infer_python.txt
--- a/test_tipc/config/ReXNet/ReXNet_1_3_train_amp_infer_python.txt
+++ b/test_tipc/config/ReXNet/ReXNet_1_3_train_amp_infer_python.txt
--- a/test_tipc/config/ReXNet/ReXNet_1_5_train_amp_infer_python.txt
+++ b/test_tipc/config/ReXNet/ReXNet_1_5_train_amp_infer_python.txt
--- a/test_tipc/config/ReXNet/ReXNet_2_0_train_amp_infer_python.txt
+++ b/test_tipc/config/ReXNet/ReXNet_2_0_train_amp_infer_python.txt
--- a/test_tipc/config/ReXNet/ReXNet_3_0_train_amp_infer_python.txt
+++ b/test_tipc/config/ReXNet/ReXNet_3_0_train_amp_infer_python.txt
--- a/test_tipc/config/RedNet/RedNet101_train_amp_infer_python.txt
+++ b/test_tipc/config/RedNet/RedNet101_train_amp_infer_python.txt
--- a/test_tipc/config/RedNet/RedNet152_train_amp_infer_python.txt
+++ b/test_tipc/config/RedNet/RedNet152_train_amp_infer_python.txt
--- a/test_tipc/config/RedNet/RedNet26_train_amp_infer_python.txt
+++ b/test_tipc/config/RedNet/RedNet26_train_amp_infer_python.txt
--- a/test_tipc/config/RedNet/RedNet38_train_amp_infer_python.txt
+++ b/test_tipc/config/RedNet/RedNet38_train_amp_infer_python.txt
--- a/test_tipc/config/RedNet/RedNet50_train_amp_infer_python.txt
+++ b/test_tipc/config/RedNet/RedNet50_train_amp_infer_python.txt
--- a/test_tipc/config/Res2Net/Res2Net101_vd_26w_4s_train_amp_infer_python.txt
+++ b/test_tipc/config/Res2Net/Res2Net101_vd_26w_4s_train_amp_infer_python.txt
--- a/test_tipc/config/Res2Net/Res2Net200_vd_26w_4s_train_amp_infer_python.txt
+++ b/test_tipc/config/Res2Net/Res2Net200_vd_26w_4s_train_amp_infer_python.txt
--- a/test_tipc/config/Res2Net/Res2Net50_14w_8s_train_amp_infer_python.txt
+++ b/test_tipc/config/Res2Net/Res2Net50_14w_8s_train_amp_infer_python.txt
--- a/test_tipc/config/Res2Net/Res2Net50_26w_4s_train_amp_infer_python.txt
+++ b/test_tipc/config/Res2Net/Res2Net50_26w_4s_train_amp_infer_python.txt
--- a/test_tipc/config/Res2Net/Res2Net50_vd_26w_4s_train_amp_infer_python.txt
+++ b/test_tipc/config/Res2Net/Res2Net50_vd_26w_4s_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeSt/ResNeSt50_fast_1s1x64d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeSt/ResNeSt50_fast_1s1x64d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeSt/ResNeSt50_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeSt/ResNeSt50_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt101_32x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt101_32x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt101_64x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt101_64x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt101_vd_32x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt101_vd_32x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt101_vd_64x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt101_vd_64x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt152_32x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt152_32x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt152_64x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt152_64x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt152_vd_32x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt152_vd_32x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt152_vd_64x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt152_vd_64x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt50_32x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt50_32x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt50_64x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt50_64x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt50_vd_32x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt50_vd_32x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNeXt/ResNeXt50_vd_64x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNeXt/ResNeXt50_vd_64x4d_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet101_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet101_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet101_vd_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet101_vd_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet152_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet152_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet152_vd_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet152_vd_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet18_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet18_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet18_vd_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet18_vd_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet200_vd_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet200_vd_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet34_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet34_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet34_vd_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet34_vd_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet50_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/ResNet/ResNet50_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/ResNet/ResNet50_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet50_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet50_vd_FPGM_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet50_vd_FPGM_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet50_vd_PACT_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet50_vd_PACT_train_amp_infer_python.txt
--- a/test_tipc/config/ResNet/ResNet50_vd_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/ResNet/ResNet50_vd_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/ResNet/ResNet50_vd_train_amp_infer_python.txt
+++ b/test_tipc/config/ResNet/ResNet50_vd_train_amp_infer_python.txt
--- a/test_tipc/config/SENet/SENet154_vd_train_amp_infer_python.txt
+++ b/test_tipc/config/SENet/SENet154_vd_train_amp_infer_python.txt
--- a/test_tipc/config/SENet/SE_ResNeXt101_32x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/SENet/SE_ResNeXt101_32x4d_train_amp_infer_python.txt
--- a/test_tipc/config/SENet/SE_ResNeXt50_32x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/SENet/SE_ResNeXt50_32x4d_train_amp_infer_python.txt
--- a/test_tipc/config/SENet/SE_ResNeXt50_vd_32x4d_train_amp_infer_python.txt
+++ b/test_tipc/config/SENet/SE_ResNeXt50_vd_32x4d_train_amp_infer_python.txt
--- a/test_tipc/config/SENet/SE_ResNet18_vd_train_amp_infer_python.txt
+++ b/test_tipc/config/SENet/SE_ResNet18_vd_train_amp_infer_python.txt
--- a/test_tipc/config/SENet/SE_ResNet34_vd_train_amp_infer_python.txt
+++ b/test_tipc/config/SENet/SE_ResNet34_vd_train_amp_infer_python.txt
--- a/test_tipc/config/SENet/SE_ResNet50_vd_train_amp_infer_python.txt
+++ b/test_tipc/config/SENet/SE_ResNet50_vd_train_amp_infer_python.txt
--- a/test_tipc/config/ShuffleNet/ShuffleNetV2_swish_train_amp_infer_python.txt
+++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_swish_train_amp_infer_python.txt
--- a/test_tipc/config/ShuffleNet/ShuffleNetV2_x0_25_train_amp_infer_python.txt
+++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_x0_25_train_amp_infer_python.txt
--- a/test_tipc/config/ShuffleNet/ShuffleNetV2_x0_33_train_amp_infer_python.txt
+++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_x0_33_train_amp_infer_python.txt
--- a/test_tipc/config/ShuffleNet/ShuffleNetV2_x0_5_train_amp_infer_python.txt
+++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_x0_5_train_amp_infer_python.txt
--- a/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_amp_infer_python.txt
+++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_0_train_amp_infer_python.txt
--- a/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_5_train_amp_infer_python.txt
+++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_x1_5_train_amp_infer_python.txt
--- a/test_tipc/config/ShuffleNet/ShuffleNetV2_x2_0_train_amp_infer_python.txt
+++ b/test_tipc/config/ShuffleNet/ShuffleNetV2_x2_0_train_amp_infer_python.txt
--- a/test_tipc/config/SqueezeNet/SqueezeNet1_0_train_amp_infer_python.txt
+++ b/test_tipc/config/SqueezeNet/SqueezeNet1_0_train_amp_infer_python.txt
--- a/test_tipc/config/SqueezeNet/SqueezeNet1_1_train_amp_infer_python.txt
+++ b/test_tipc/config/SqueezeNet/SqueezeNet1_1_train_amp_infer_python.txt
--- a/test_tipc/config/SwinTransformer/SwinTransformer_base_patch4_window12_384_train_amp_infer_python.txt
+++ b/test_tipc/config/SwinTransformer/SwinTransformer_base_patch4_window12_384_train_amp_infer_python.txt
--- a/test_tipc/config/SwinTransformer/SwinTransformer_base_patch4_window7_224_train_amp_infer_python.txt
+++ b/test_tipc/config/SwinTransformer/SwinTransformer_base_patch4_window7_224_train_amp_infer_python.txt
--- a/test_tipc/config/SwinTransformer/SwinTransformer_large_patch4_window12_384_train_amp_infer_python.txt
+++ b/test_tipc/config/SwinTransformer/SwinTransformer_large_patch4_window12_384_train_amp_infer_python.txt
--- a/test_tipc/config/SwinTransformer/SwinTransformer_large_patch4_window7_224_train_amp_infer_python.txt
+++ b/test_tipc/config/SwinTransformer/SwinTransformer_large_patch4_window7_224_train_amp_infer_python.txt
--- a/test_tipc/config/SwinTransformer/SwinTransformer_small_patch4_window7_224_train_amp_infer_python.txt
+++ b/test_tipc/config/SwinTransformer/SwinTransformer_small_patch4_window7_224_train_amp_infer_python.txt
--- a/test_tipc/config/SwinTransformer/SwinTransformer_tiny_patch4_window7_224_lite_arm_cpu_cpp.txt
+++ b/test_tipc/config/SwinTransformer/SwinTransformer_tiny_patch4_window7_224_lite_arm_cpu_cpp.txt
--- a/test_tipc/config/SwinTransformer/SwinTransformer_tiny_patch4_window7_224_train_amp_infer_python.txt
+++ b/test_tipc/config/SwinTransformer/SwinTransformer_tiny_patch4_window7_224_train_amp_infer_python.txt
--- a/test_tipc/config/TNT/TNT_small_train_amp_infer_python.txt
+++ b/test_tipc/config/TNT/TNT_small_train_amp_infer_python.txt
--- a/test_tipc/config/Twins/alt_gvt_base_train_amp_infer_python.txt
+++ b/test_tipc/config/Twins/alt_gvt_base_train_amp_infer_python.txt
--- a/test_tipc/config/Twins/alt_gvt_large_train_amp_infer_python.txt
+++ b/test_tipc/config/Twins/alt_gvt_large_train_amp_infer_python.txt
--- a/test_tipc/config/Twins/alt_gvt_small_train_amp_infer_python.txt
+++ b/test_tipc/config/Twins/alt_gvt_small_train_amp_infer_python.txt
--- a/test_tipc/config/Twins/pcpvt_base_train_amp_infer_python.txt
+++ b/test_tipc/config/Twins/pcpvt_base_train_amp_infer_python.txt
--- a/test_tipc/config/Twins/pcpvt_large_train_amp_infer_python.txt
+++ b/test_tipc/config/Twins/pcpvt_large_train_amp_infer_python.txt
--- a/test_tipc/config/Twins/pcpvt_small_train_amp_infer_python.txt
+++ b/test_tipc/config/Twins/pcpvt_small_train_amp_infer_python.txt
--- a/test_tipc/config/VAN/VAN_tiny.txt
+++ b/test_tipc/config/VAN/VAN_tiny.txt
--- a/test_tipc/config/VGG/VGG11_train_amp_infer_python.txt
+++ b/test_tipc/config/VGG/VGG11_train_amp_infer_python.txt
--- a/test_tipc/config/VGG/VGG13_train_amp_infer_python.txt
+++ b/test_tipc/config/VGG/VGG13_train_amp_infer_python.txt
--- a/test_tipc/config/VGG/VGG16_train_amp_infer_python.txt
+++ b/test_tipc/config/VGG/VGG16_train_amp_infer_python.txt
--- a/test_tipc/config/VGG/VGG19_train_amp_infer_python.txt
+++ b/test_tipc/config/VGG/VGG19_train_amp_infer_python.txt
--- a/test_tipc/config/VisionTransformer/ViT_base_patch16_224_train_amp_infer_python.txt
+++ b/test_tipc/config/VisionTransformer/ViT_base_patch16_224_train_amp_infer_python.txt
--- a/test_tipc/config/VisionTransformer/ViT_base_patch16_384_train_amp_infer_python.txt
+++ b/test_tipc/config/VisionTransformer/ViT_base_patch16_384_train_amp_infer_python.txt
--- a/test_tipc/config/VisionTransformer/ViT_base_patch32_384_train_amp_infer_python.txt
+++ b/test_tipc/config/VisionTransformer/ViT_base_patch32_384_train_amp_infer_python.txt
--- a/test_tipc/config/VisionTransformer/ViT_large_patch16_224_train_amp_infer_python.txt
+++ b/test_tipc/config/VisionTransformer/ViT_large_patch16_224_train_amp_infer_python.txt
--- a/test_tipc/config/VisionTransformer/ViT_large_patch16_384_train_amp_infer_python.txt
+++ b/test_tipc/config/VisionTransformer/ViT_large_patch16_384_train_amp_infer_python.txt
--- a/test_tipc/config/VisionTransformer/ViT_large_patch32_384_train_amp_infer_python.txt
+++ b/test_tipc/config/VisionTransformer/ViT_large_patch32_384_train_amp_infer_python.txt
--- a/test_tipc/config/VisionTransformer/ViT_small_patch16_224_train_amp_infer_python.txt
+++ b/test_tipc/config/VisionTransformer/ViT_small_patch16_224_train_amp_infer_python.txt
--- a/test_tipc/config/Xception/Xception41_deeplab_train_amp_infer_python.txt
+++ b/test_tipc/config/Xception/Xception41_deeplab_train_amp_infer_python.txt
--- a/test_tipc/config/Xception/Xception41_train_amp_infer_python.txt
+++ b/test_tipc/config/Xception/Xception41_train_amp_infer_python.txt
--- a/test_tipc/config/Xception/Xception65_deeplab_train_amp_infer_python.txt
+++ b/test_tipc/config/Xception/Xception65_deeplab_train_amp_infer_python.txt
--- a/test_tipc/config/Xception/Xception65_train_amp_infer_python.txt
+++ b/test_tipc/config/Xception/Xception65_train_amp_infer_python.txt
--- a/test_tipc/config/Xception/Xception71_train_amp_infer_python.txt
+++ b/test_tipc/config/Xception/Xception71_train_amp_infer_python.txt
--- a/test_tipc/docs/test_lite_arm_cpu_cpp.md
+++ b/test_tipc/docs/test_lite_arm_cpu_cpp.md
--- a/test_tipc/prepare_lite_arm_cpu_cpp.sh
+++ b/test_tipc/prepare_lite_arm_cpu_cpp.sh
--- a/test_tipc/prepare_lite_cpp.sh
+++ b/test_tipc/prepare_lite_cpp.sh
--- a/test_tipc/test_lite_arm_cpp.sh
+++ b/test_tipc/test_lite_arm_cpp.sh
--- a/test_tipc/test_lite_arm_cpu_cpp.sh
+++ b/test_tipc/test_lite_arm_cpu_cpp.sh