feat: add swin-transformer (#5727)

* feat: add swin-transformer * fix repo * refine docs * fix: update resize method and image, add inference codes in docs.

feat: add swin-transformer (#5727)
* feat: add swin-transformer * fix repo * refine docs * fix: update resize method and image, add inference codes in docs.
c7e73758 · qizhaoaoe · GitHub · cc2f20c4 · c7e73758 · c7e73758
12 changed file
--- a/modelcenter/PLSC-SwinTransformer/APP/__init__.py
+++ b/modelcenter/PLSC-SwinTransformer/APP/__init__.py
--- a/modelcenter/PLSC-SwinTransformer/APP/app.py
+++ b/modelcenter/PLSC-SwinTransformer/APP/app.py
+import numpy as np
+import gradio as gr
+
+from download import get_model_path, get_data_path
+
+from plsc.data.preprocess import Resize
+from plsc.engine.inference import Predictor
+
+predictor = None
+
+
+def model_inference(image):
+    global predictor
+
+    if predictor is None:
+
+        model_path = "paddlecv://models/swin/v2.5/swin_base_patch4_window7_224_infer.pdmodel"
+        params_path = "paddlecv://models/swin/v2.5/swin_base_patch4_window7_224_infer.pdiparams"
+        label_path = "paddlecv://dataset/imagenet2012_labels.txt"
+        infer_model = get_model_path(model_path)
+        infer_params = get_model_path(params_path)
+
+        def parse_labels(label_path):
+            labels = []
+            with open(label_path, 'r') as f:
+                for line in f:
+                    if len(line) < 2:
+                        continue
+                    label = line.strip().split(',')[1]
+                    labels.append(label)
+            return np.array(labels)
+
+        labels = parse_labels(get_data_path(label_path))
+
+        def preprocess(img):
+            resize = Resize(size=224, interpolation="bicubic", backend="pil")
+            img = np.array(resize(img))
+            scale = 1.0 / 255.0
+            mean = np.array([0.485, 0.456, 0.406])
+            std = np.array([0.229, 0.224, 0.225])
+            img = (img * scale - mean) / std
+            img = img[np.newaxis, :, :, :]
+            img = img.transpose((0, 3, 1, 2))
+            return {'x': img.astype('float32')}
+
+        def postprocess(logits):
+            def softmax(x, epsilon=1e-6):
+                exp_x = np.exp(x)
+                sfm = (exp_x + epsilon) / (np.sum(exp_x) + epsilon)
+                return sfm
+
+            pred = np.array(logits).squeeze()
+            pred = softmax(pred)
+            class_idx = pred.argsort()[::-1]
+            class_idx_top5 = class_idx[:5]
+            return class_idx_top5, pred[class_idx_top5], labels[class_idx_top5]
+
+        predictor = Predictor(
+            model_file=infer_model,
+            params_file=infer_params,
+            preprocess_fn=preprocess,
+            postprocess_fn=postprocess)
+
+    class_ids, scores, classes = predictor.predict(image)
+    json_out = {
+        "class_ids": class_ids.tolist(),
+        "scores": scores.tolist(),
+        "labels": classes.tolist()
+    }
+    return image, json_out
+
+
+def clear_all():
+    return None, None, None
+
+
+with gr.Blocks() as demo:
+    gr.Markdown("Classification based on SwinTransformer")
+
+    with gr.Column(scale=1, min_width=100):
+        img_in = gr.Image(
+            value="https://plsc.bj.bcebos.com/dataset/test_images/zebra.png",
+            label="Input").style(height=200)
+
+        with gr.Row():
+            btn1 = gr.Button("Clear")
+            btn2 = gr.Button("Submit")
+
+        img_out = gr.Image(label="Output").style(height=200)
+        json_out = gr.JSON(label="jsonOutput")
+
+    btn2.click(fn=model_inference, inputs=img_in, outputs=[img_out, json_out])
+    btn1.click(fn=clear_all, inputs=None, outputs=[img_in, img_out, json_out])
+    gr.Button.style(1)
+
+demo.launch()
--- a/modelcenter/PLSC-SwinTransformer/APP/app.yaml
+++ b/modelcenter/PLSC-SwinTransformer/APP/app.yaml
+【PLSC-SwinTransformer-App-YAML】
+
+APP_Info:
+    title: PLSC-SwinTransformer-App
+    colorFrom: blue
+    colorTo: yellow
+    sdk: gradio
+    sdk_version: 3.9.1
+    app_file: app.py
+    license: apache-2.0
+    device: cpu
\ No newline at end of file
--- a/modelcenter/PLSC-SwinTransformer/APP/download.py
+++ b/modelcenter/PLSC-SwinTransformer/APP/download.py
+#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import os.path as osp
+import sys
+import yaml
+import time
+import shutil
+import requests
+import tqdm
+import hashlib
+import base64
+import binascii
+import tarfile
+import zipfile
+
+__all__ = [
+    'get_model_path',
+    'get_config_path',
+    'get_dict_path',
+    'get_data_path',
+]
+
+WEIGHTS_HOME = osp.expanduser("~/.cache/paddlecv/models/plsc")
+CONFIGS_HOME = osp.expanduser("~/.cache/paddlecv/configs/plsc")
+DICTS_HOME = osp.expanduser("~/.cache/paddlecv/dicts/plsc/")
+DATA_HOME = osp.expanduser("~/.cache/paddlecv/dataset/plsc")
+# dict of {dataset_name: (download_info, sub_dirs)}
+# download info: [(url, md5sum)]
+
+DOWNLOAD_RETRY_LIMIT = 3
+
+PMP_DOWNLOAD_URL_PREFIX = 'https://plsc.bj.bcebos.com/'
+
+
+def is_url(path):
+    """
+    Whether path is URL.
+    Args:
+        path (string): URL string or not.
+    """
+    return path.startswith('http://') \
+            or path.startswith('https://') \
+            or path.startswith('paddlecv://')
+
+
+def parse_url(url):
+    url = url.replace("paddlecv://", PMP_DOWNLOAD_URL_PREFIX)
+    return url
+
+
+def get_model_path(path):
+    """Get model path from WEIGHTS_HOME, if not exists,
+    download it from url.
+    """
+    if not is_url(path):
+        return path
+    url = parse_url(path)
+    path, _ = get_path(url, WEIGHTS_HOME, path_depth=3)
+    return path
+
+
+def get_data_path(path):
+    """Get model path from DATA_HOME, if not exists,
+    download it from url.
+    """
+    if not is_url(path):
+        return path
+    url = parse_url(path)
+    path, _ = get_path(url, DATA_HOME, path_depth=1)
+    return path
+
+
+def get_config_path(path):
+    """Get config path from CONFIGS_HOME, if not exists,
+    download it from url.
+    """
+    if not is_url(path):
+        return path
+    url = parse_url(path)
+    path, _ = get_path(url, CONFIGS_HOME)
+    return path
+
+
+def get_dict_path(path):
+    """Get config path from CONFIGS_HOME, if not exists,
+    download it from url.
+    """
+    if not is_url(path):
+        return path
+    url = parse_url(path)
+    path, _ = get_path(url, DICTS_HOME)
+    return path
+
+
+def map_path(url, root_dir, path_depth=1):
+    # parse path after download to decompress under root_dir
+    assert path_depth > 0, "path_depth should be a positive integer"
+    dirname = url
+    for _ in range(path_depth):
+        dirname = osp.dirname(dirname)
+    fpath = osp.relpath(url, dirname)
+    path = osp.join(root_dir, fpath)
+    dirname = osp.dirname(path)
+    return path, dirname
+
+
+def get_path(url, root_dir, md5sum=None, check_exist=True, path_depth=1):
+    """ Download from given url to root_dir.
+    if file or directory specified by url is exists under
+    root_dir, return the path directly, otherwise download
+    from url, return the path.
+    url (str): download url
+    root_dir (str): root dir for downloading, it should be
+                    WEIGHTS_HOME
+    md5sum (str): md5 sum of download package
+    """
+    # parse path after download to decompress under root_dir
+    fullpath, dirname = map_path(url, root_dir, path_depth)
+
+    if osp.exists(fullpath) and check_exist:
+        if not osp.isfile(fullpath) or \
+                _check_exist_file_md5(fullpath, md5sum, url):
+            return fullpath, True
+        else:
+            os.remove(fullpath)
+
+    fullname = _download(url, dirname, md5sum)
+    return fullpath, False
+
+
+def _download(url, path, md5sum=None):
+    """
+    Download from url, save to path.
+    url (str): download url
+    path (str): download to given path
+    """
+    if not osp.exists(path):
+        os.makedirs(path)
+
+    fname = osp.split(url)[-1]
+    fullname = osp.join(path, fname)
+    retry_cnt = 0
+
+    while not (osp.exists(fullname) and _check_exist_file_md5(fullname, md5sum,
+                                                              url)):
+        if retry_cnt < DOWNLOAD_RETRY_LIMIT:
+            retry_cnt += 1
+        else:
+            raise RuntimeError("Download from {} failed. "
+                               "Retry limit reached".format(url))
+
+        # NOTE: windows path join may incur \, which is invalid in url
+        if sys.platform == "win32":
+            url = url.replace('\\', '/')
+
+        req = requests.get(url, stream=True)
+        if req.status_code != 200:
+            raise RuntimeError("Downloading from {} failed with code "
+                               "{}!".format(url, req.status_code))
+
+        # For protecting download interupted, download to
+        # tmp_fullname firstly, move tmp_fullname to fullname
+        # after download finished
+        tmp_fullname = fullname + "_tmp"
+        total_size = req.headers.get('content-length')
+        with open(tmp_fullname, 'wb') as f:
+            if total_size:
+                for chunk in tqdm.tqdm(
+                        req.iter_content(chunk_size=1024),
+                        total=(int(total_size) + 1023) // 1024,
+                        unit='KB'):
+                    f.write(chunk)
+            else:
+                for chunk in req.iter_content(chunk_size=1024):
+                    if chunk:
+                        f.write(chunk)
+        shutil.move(tmp_fullname, fullname)
+    return fullname
+
+
+def _check_exist_file_md5(filename, md5sum, url):
+    # if md5sum is None, and file to check is model file, 
+    # read md5um from url and check, else check md5sum directly
+    return _md5check_from_url(filename, url) if md5sum is None \
+            and filename.endswith('pdparams') \
+            else _md5check(filename, md5sum)
+
+
+def _md5check_from_url(filename, url):
+    # For model in bcebos URLs, MD5 value is contained
+    # in request header as 'content_md5'
+    req = requests.get(url, stream=True)
+    content_md5 = req.headers.get('content-md5')
+    req.close()
+    if not content_md5 or _md5check(
+            filename,
+            binascii.hexlify(base64.b64decode(content_md5.strip('"'))).decode(
+            )):
+        return True
+    else:
+        return False
+
+
+def _md5check(fullname, md5sum=None):
+    if md5sum is None:
+        return True
+
+    md5 = hashlib.md5()
+    with open(fullname, 'rb') as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            md5.update(chunk)
+    calc_md5sum = md5.hexdigest()
+
+    if calc_md5sum != md5sum:
+        return False
+    return True
--- a/modelcenter/PLSC-SwinTransformer/APP/requirements.txt
+++ b/modelcenter/PLSC-SwinTransformer/APP/requirements.txt
+plsc==2.4
+gradio
+opencv-python
+paddlepaddle
+PyYAML
+shapely
+scipy
+Cython
+numpy
+setuptools
+pillow
+tqdm
\ No newline at end of file
--- a/modelcenter/PLSC-SwinTransformer/benchmark_cn.md
+++ b/modelcenter/PLSC-SwinTransformer/benchmark_cn.md
+# 1. Benchmark
+
+## 1.1 软硬件环境
+
+* 单机 8卡 A100(40G)
+* CUDA 11.2
+* CUDNN 8.1
+
+## 1.2 数据集
+- 测试使用的数据集为ImageNet.
+
+## 1.3 指标
+
+
+| Model |DType | Phase | Dataset | gpu | img/sec | Top1 Acc | Official |
+| --- | --- | --- | --- | --- | --- | --- | --- |
+| Swin-B |FP16 O1|pretrain  |ImageNet2012  |A100*N1C8  |  2155| 0.83362 | 0.835 |
+| Swin-B |FP16 O2|pretrain  | ImageNet2012 | A100*N1C8 | 3006 | 0.83223     | 0.835 |
+
+# 2. 相关使用说明
+
+https://github.com/PaddlePaddle/PLSC/blob/master/task/classification/swin/README.md
--- a/modelcenter/PLSC-SwinTransformer/benchmark_en.md
+++ b/modelcenter/PLSC-SwinTransformer/benchmark_en.md
+# 1. Benchmark
+
+## 1.1 Environment
+
+* 8 A100(40G) on single Node
+* CUDA 11.2
+* CUDNN 8.1
+
+## 1.2 DataSet
+- We train the Swin Transformer on ImageNet.
+
+## 1.3 Benchmark
+
+
+| Model |DType | Phase | Dataset | gpu | img/sec | Top1 Acc | Official |
+| --- | --- | --- | --- | --- | --- | --- | --- |
+| Swin-B |FP16 O1|pretrain  |ImageNet2012  |A100*N1C8  |  2155| 0.83362 | 0.835 |
+| Swin-B |FP16 O2|pretrain  | ImageNet2012 | A100*N1C8 | 3006 | 0.83223     | 0.835 |
+
+# 2. Reference
+
+https://github.com/PaddlePaddle/PLSC/blob/master/task/classification/swin/README.md
--- a/modelcenter/PLSC-SwinTransformer/download_cn.md
+++ b/modelcenter/PLSC-SwinTransformer/download_cn.md
+# 模型列表
+
+|模型名称|模型简介|模型配置|预训练checkpoint下载地址|
+| --- | --- | --- | --- |
+| ViT-B |输入size为224，patch=4, FP16-O1|[config](https://github.com/PaddlePaddle/PLSC/blob/master/task/classification/swin/configs/swin_base_patch4_window7_224_in1k_1n8c_dp_fp16o1.yaml) |[download](https://plsc.bj.bcebos.com/models/swin/v2.5/swin_base_patch4_window7_224_fp16o1.pdparams) |  
+| ViT-B |输入size为224，patch=4, FP16-O2|[config](https://github.com/PaddlePaddle/PLSC/blob/master/task/classification/swin/configs/swin_base_patch4_window7_224_in1k_1n8c_dp_fp16o2.yaml)| [download](https://plsc.bj.bcebos.com/models/swin/v2.5/swin_base_patch4_window7_224_fp16o2.pdparams) |
--- a/modelcenter/PLSC-SwinTransformer/download_en.md
+++ b/modelcenter/PLSC-SwinTransformer/download_en.md
+# Model List
+
+|Model Name| Introduction                    |Config|Pretrained checkpoint Download|
+| --- |---------------------------------| --- | --- |
+| ViT-B | input size=224，patch=4, FP16-O1 |[config](https://github.com/PaddlePaddle/PLSC/blob/master/task/classification/swin/configs/swin_base_patch4_window7_224_in1k_1n8c_dp_fp16o1.yaml) |[download](https://plsc.bj.bcebos.com/models/swin/v2.5/swin_base_patch4_window7_224_fp16o1.pdparams) |  
+| ViT-B | input size=224，patch=4, FP16-O2 |[config](https://github.com/PaddlePaddle/PLSC/blob/master/task/classification/swin/configs/swin_base_patch4_window7_224_in1k_1n8c_dp_fp16o2.yaml)| [download](https://plsc.bj.bcebos.com/models/swin/v2.5/swin_base_patch4_window7_224_fp16o2.pdparams) |
--- a/modelcenter/PLSC-SwinTransformer/info.yaml
+++ b/modelcenter/PLSC-SwinTransformer/info.yaml
+---
+Model_Info:
+   name: "PLSC-SwinTransformer"
+   description:  "PaddlePaddle 重新实现 Microsoft 官方 Repo 中的 Swin Transformer 算法 《Swin Transformer: Hierarchical Vision Transformer using Shifted Windows》"
+   description_en: "PaddlePaddle reimplementation of Google's repository for the Swin Transformer model that was released with the paper Swin Transformer: Hierarchical Vision Transformer using Shifted Windows."
+   update_time:
+   icon: "https://plsc.bj.bcebos.com/assets/modelcenter-icon.png"
+   from_repo: "PLSC"
+Task:
+- tag: 计算机视觉
+  tag_en: Computer Vision
+  sub_tag: 图像分类
+  sub_tag_en: Image Classification
+Example:
+- tag:
+  tag_en:
+  sub_tag:
+  sub_tag_en:
+  title:
+  title_en:
+  url:
+  url_en:
+Datasets: ImageNet 1K
+Publisher: Baidu
+License: Apache 2.0
+Paper:
+- title: "Swin Transformer: Hierarchical Vision Transformer using Shifted Windows"
+  url: https://arxiv.org/pdf/2103.14030.pdf
+IfTraining: 1
+IfOnlineDemo: 1
\ No newline at end of file
--- a/modelcenter/PLSC-SwinTransformer/introduction_cn.ipynb
+++ b/modelcenter/PLSC-SwinTransformer/introduction_cn.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ae69ce68",
+   "metadata": {},
+   "source": [
+    "## 1. PLSC-SwinTransformer模型简介\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "35485bc6",
+   "metadata": {},
+   "source": [
+    "PLSC-SwinTransformer实现了基于[Swin Transformer](https://github.com/microsoft/Swin-Transformer)的视觉分类模型。Swin Transformer是一个层级结构的Vision Transformer(ViT)，Swin代表的是滑动窗口。与ViT不同，Swin基于非重叠的局部窗口计算自注意力，并且跨窗口进行连接保证窗口间信息共享，因此Swin Transormer相比于基于全局的ViT更高效。Swin Transformer可以作为CV领域的一个通用的backbone。模型结构如下，\n",
+    "\n",
+    "![Figure 1 from paper](https://github.com/microsoft/Swin-Transformer/blob/main/figures/teaser.png?raw=true)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "97e174e6",
+   "metadata": {},
+   "source": [
+    "## 2. 模型效果 "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78137a72",
+   "metadata": {},
+   "source": [
+    "| Model |DType | Phase | Dataset | gpu | img/sec | Top1 Acc | Official |\n",
+    "| --- | --- | --- | --- | --- | --- | --- | --- |\n",
+    "| Swin-B |FP16 O1|pretrain  |ImageNet2012  |A100*N1C8  |  2155| 0.83362 | 0.835 |\n",
+    "| Swin-B |FP16 O2|pretrain  | ImageNet2012 | A100*N1C8 | 3006 | 0.83223\t | 0.835 |\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ace3c48d",
+   "metadata": {},
+   "source": [
+    "## 3. 模型如何使用"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a97a5f56",
+   "metadata": {},
+   "source": [
+    "### 3.1 安装PLSC"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "492fa769-2fe0-4220-b6d9-bbc32f8cca10",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "git clone https://github.com/PaddlePaddle/PLSC.git\n",
+    "cd /path/to/PLSC/\n",
+    "# [optional] pip install -r requirements.txt\n",
+    "python setup.py develop\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6b22824d",
+   "metadata": {},
+   "source": [
+    "### 3.2 模型训练"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d68ca5fb",
+   "metadata": {},
+   "source": [
+    "1. 进入任务目录\n",
+    "\n",
+    "```\n",
+    "cd task/classification/swin\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9048df01",
+   "metadata": {},
+   "source": [
+    "2. 准备数据\n",
+    "\n",
+    "将数据整理成以下格式：\n",
+    "```text\n",
+    "dataset/\n",
+    "└── ILSVRC2012\n",
+    "    ├── train\n",
+    "    ├── val\n",
+    "    ├── train_list.txt\n",
+    "    └── val_list.txt\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bea743ea",
+   "metadata": {},
+   "source": [
+    "3. 执行训练命令\n",
+    "\n",
+    "```shell\n",
+    "export PADDLE_NNODES=1\n",
+    "export PADDLE_MASTER=\"127.0.0.1:12538\"\n",
+    "export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7\n",
+    "\n",
+    "python -m paddle.distributed.launch \\\n",
+    "    --nnodes=$PADDLE_NNODES \\\n",
+    "    --master=$PADDLE_MASTER \\\n",
+    "    --devices=$CUDA_VISIBLE_DEVICES \\\n",
+    "    plsc-train \\\n",
+    "    -c ./configs/swin_base_patch4_window7_224_in1k_1n8c_dp_fp16o1.yaml\n",
+    "```\n",
+    "\n",
+    "更多模型的训练教程可参考文档：[Swin训练文档](https://github.com/PaddlePaddle/PLSC/blob/master/task/classification/swin/README.md)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "186a0c17",
+   "metadata": {},
+   "source": [
+    "### 3.3 模型推理"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e97c527c",
+   "metadata": {},
+   "source": [
+    "1. 下载预训练模型和图片\n",
+    "\n",
+    "```shell\n",
+    "# download pretrained model\n",
+    "mkdir -p pretrained/swin/Swin_base/\n",
+    "wget -O ./pretrained/swin/Swin_base/swin_base_patch4_window7_224_fp16o1.pdparams \n",
+    "https://plsc.bj.bcebos.com/models/swin/v2.5/swin_base_patch4_window7_224_fp16o1.pdparams\n",
+    "\n",
+    "# download image\n",
+    "mkdir -p images/\n",
+    "wget -O ./images/zebra.png https://plsc.bj.bcebos.com/dataset/test_images/zebra.png \n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a07c6549",
+   "metadata": {},
+   "source": [
+    "2. 导出推理模型\n",
+    "\n",
+    "```shell\n",
+    "plsc-export -c ./configs/swin_base_patch4_window7_224_in1k_1n8c_dp_fp16o1.yaml -o Global.pretrained_model=./pretrained/swin/Swin_base/swin_base_patch4_window7_224_fp16o1 -o Model.data_format=NCHW -o FP16.level=O0\n",
+    "```\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ded8e73-3dba-49ce-bfb3-fcf7f3f0fc1d",
+   "metadata": {},
+   "source": [
+    "3. 图片预测"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9533d4df-acb3-474f-b591-f210639a0a02",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "from plsc.data.dataset import default_loader\n",
+    "from plsc.data.preprocess import Resize\n",
+    "from plsc.engine.inference import Predictor\n",
+    "\n",
+    "\n",
+    "def preprocess(img):\n",
+    "    resize = Resize(size=224, \n",
+    "                    interpolation=\"bicubic\", \n",
+    "                    backend=\"pil\")\n",
+    "    img = np.array(resize(img))\n",
+    "    scale = 1.0 / 255.0\n",
+    "    mean = np.array([0.485, 0.456, 0.406])\n",
+    "    std = np.array([0.229, 0.224, 0.225])\n",
+    "    img = (img * scale - mean) / std\n",
+    "    img = img[np.newaxis, :, :, :]\n",
+    "    img = img.transpose((0, 3, 1, 2))\n",
+    "    return {'x': img.astype('float32')}\n",
+    "\n",
+    "\n",
+    "def postprocess(logits):\n",
+    "    \n",
+    "    def softmax(x, epsilon=1e-6):\n",
+    "        exp_x = np.exp(x)\n",
+    "        sfm = (exp_x + epsilon) / (np.sum(exp_x) + epsilon)\n",
+    "        return sfm\n",
+    "\n",
+    "    pred = np.array(logits).squeeze()\n",
+    "    pred = softmax(pred)\n",
+    "    pred_class_idx = pred.argsort()[::-1][0]\n",
+    "    return pred_class_idx, pred[pred_class_idx]\n",
+    "\n",
+    "\n",
+    "infer_model = \"./output/swin_base_patch4_window7_224/swin_base_patch4_window7_224.pdmodel\"\n",
+    "infer_params = \"./output/swin_base_patch4_window7_224/swin_base_patch4_window7_224.pdiparams\"\n",
+    "\n",
+    "predictor = Predictor(\n",
+    "    model_file=infer_model,\n",
+    "    params_file=infer_params,\n",
+    "    preprocess_fn=preprocess,\n",
+    "    postprocess_fn=postprocess)\n",
+    "\n",
+    "image = default_loader(\"./images/zebra.png \")\n",
+    "pred_class_idx, pred_score = predictor.predict(image)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d375934d",
+   "metadata": {},
+   "source": [
+    "## 4. 相关论文及引用信息\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "29f05b07-d323-45e4-b00d-0728eafb5af7",
+   "metadata": {},
+   "source": [
+    "```text\n",
+    "@inproceedings{liu2021Swin,\n",
+    "  title={Swin Transformer: Hierarchical Vision Transformer using Shifted Windows},\n",
+    "  author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining},\n",
+    "  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},\n",
+    "  year={2021}\n",
+    "}\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/modelcenter/PLSC-SwinTransformer/introduction_en.ipynb
+++ b/modelcenter/PLSC-SwinTransformer/introduction_en.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ae69ce68",
+   "metadata": {},
+   "source": [
+    "## 1. PLSC-SwinTransformer Introduction\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "35485bc6",
+   "metadata": {},
+   "source": [
+    "PLSC-SwinTransformer reimplementation of [microsoft's repository for the Swin-Transformer](https://github.com/microsoft/Swin-Transformer) model that was released with the paper [Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/pdf/2103.14030.pdf).\n",
+    "\n",
+    "Swin Transformer (the name Swin stands for Shifted window) capably serves as a general-purpose backbone for computer vision. It is basically a hierarchical Transformer whose representation is computed with shifted windows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping local windows while also allowing for cross-window connection.\n",
+    "\n",
+    "![Figure 1 from paper](https://github.com/microsoft/Swin-Transformer/blob/main/figures/teaser.png?raw=true)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "97e174e6",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## 2. Model Effects"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78137a72",
+   "metadata": {},
+   "source": [
+    "| Model |DType | Phase | Dataset | gpu | img/sec | Top1 Acc | Official |\n",
+    "| --- | --- | --- | --- | --- | --- | --- | --- |\n",
+    "| Swin-B |FP16 O1|pretrain  |ImageNet2012  |A100*N1C8  |  2155| 0.83362 | 0.835 |\n",
+    "| Swin-B |FP16 O2|pretrain  | ImageNet2012 | A100*N1C8 | 3006 | 0.83223\t | 0.835 |\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ace3c48d",
+   "metadata": {},
+   "source": [
+    "## 3. How to use the Model"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a97a5f56",
+   "metadata": {},
+   "source": [
+    "### 3.1 Install PLSC"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "492fa769-2fe0-4220-b6d9-bbc32f8cca10",
+   "metadata": {},
+   "source": [
+    "```\n",
+    "git clone https://github.com/PaddlePaddle/PLSC.git\n",
+    "cd /path/to/PLSC/\n",
+    "# [optional] pip install -r requirements.txt\n",
+    "python setup.py develop\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6b22824d",
+   "metadata": {},
+   "source": [
+    "### 3.2 Model Training"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d68ca5fb",
+   "metadata": {},
+   "source": [
+    "1. Enter into the task directory\n",
+    "\n",
+    "```\n",
+    "cd task/classification/swin\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9048df01",
+   "metadata": {},
+   "source": [
+    "2. Prepare the data\n",
+    "\n",
+    "Organize the data into the following format:\n",
+    "\n",
+    "\n",
+    "```text\n",
+    "dataset/\n",
+    "└── ILSVRC2012\n",
+    "    ├── train\n",
+    "    ├── val\n",
+    "    ├── train_list.txt\n",
+    "    └── val_list.txt\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bea743ea",
+   "metadata": {},
+   "source": [
+    "3. Run the command\n",
+    "\n",
+    "```shell\n",
+    "export PADDLE_NNODES=1\n",
+    "export PADDLE_MASTER=\"127.0.0.1:12538\"\n",
+    "export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7\n",
+    "\n",
+    "python -m paddle.distributed.launch \\\n",
+    "    --nnodes=$PADDLE_NNODES \\\n",
+    "    --master=$PADDLE_MASTER \\\n",
+    "    --devices=$CUDA_VISIBLE_DEVICES \\\n",
+    "    plsc-train \\\n",
+    "    -c ./configs/swin_base_patch4_window7_224_in1k_1n8c_dp_fp16o1.yaml\n",
+    "```\n",
+    "\n",
+    "More courses about model training can be learned here [Swin](https://github.com/PaddlePaddle/PLSC/blob/master/task/classification/swin/README.md)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "186a0c17",
+   "metadata": {},
+   "source": [
+    "### 3.3 Model Inference"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e97c527c",
+   "metadata": {},
+   "source": [
+    "1. Download pretrained model and image\n",
+    "\n",
+    "\n",
+    "```shell\n",
+    "# download pretrained model\n",
+    "mkdir -p pretrained/swin/Swin_base/\n",
+    "wget -O ./pretrained/swin/Swin_base/swin_base_patch4_window7_224_fp16o1.pdparams \n",
+    "https://plsc.bj.bcebos.com/models/swin/v2.5/swin_base_patch4_window7_224_fp16o1.pdparams\n",
+    "\n",
+    "# download image\n",
+    "mkdir -p images/\n",
+    "wget -O ./images/zebra.png https://plsc.bj.bcebos.com/dataset/test_images/zebra.png\n",
+    "```\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a07c6549",
+   "metadata": {},
+   "source": [
+    "2. Export model for inference\n",
+    "\n",
+    "```shell\n",
+    "plsc-export -c ./configs/swin_base_patch4_window7_224_in1k_1n8c_dp_fp16o1.yaml -o Global.pretrained_model=./pretrained/swin/Swin_base/swin_base_patch4_window7_224_fp16o1 -o Model.data_format=NCHW -o FP16.level=O0\n",
+    "```\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e92efe35-ea6d-4aee-9a4d-a2c79f40f473",
+   "metadata": {},
+   "source": [
+    "3. Image inference"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "22f4a080-ad97-4e00-a9fa-697601f579ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "from plsc.data.dataset import default_loader\n",
+    "from plsc.data.preprocess import Resize\n",
+    "from plsc.engine.inference import Predictor\n",
+    "\n",
+    "\n",
+    "def preprocess(img):\n",
+    "    resize = Resize(size=224, \n",
+    "                    interpolation=\"bicubic\", \n",
+    "                    backend=\"pil\")\n",
+    "    img = np.array(resize(img))\n",
+    "    scale = 1.0 / 255.0\n",
+    "    mean = np.array([0.485, 0.456, 0.406])\n",
+    "    std = np.array([0.229, 0.224, 0.225])\n",
+    "    img = (img * scale - mean) / std\n",
+    "    img = img[np.newaxis, :, :, :]\n",
+    "    img = img.transpose((0, 3, 1, 2))\n",
+    "    return {'x': img.astype('float32')}\n",
+    "\n",
+    "\n",
+    "def postprocess(logits):\n",
+    "    \n",
+    "    def softmax(x, epsilon=1e-6):\n",
+    "        exp_x = np.exp(x)\n",
+    "        sfm = (exp_x + epsilon) / (np.sum(exp_x) + epsilon)\n",
+    "        return sfm\n",
+    "\n",
+    "    pred = np.array(logits).squeeze()\n",
+    "    pred = softmax(pred)\n",
+    "    pred_class_idx = pred.argsort()[::-1][0]\n",
+    "    return pred_class_idx, pred[pred_class_idx]\n",
+    "\n",
+    "\n",
+    "infer_model = \"./output/swin_base_patch4_window7_224/swin_base_patch4_window7_224.pdmodel\"\n",
+    "infer_params = \"./output/swin_base_patch4_window7_224/swin_base_patch4_window7_224.pdiparams\"\n",
+    "\n",
+    "predictor = Predictor(\n",
+    "    model_file=infer_model,\n",
+    "    params_file=infer_params,\n",
+    "    preprocess_fn=preprocess,\n",
+    "    postprocess_fn=postprocess)\n",
+    "\n",
+    "image = default_loader(\"./images/zebra.png\")\n",
+    "pred_class_idx, pred_score = predictor.predict(image)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d375934d",
+   "metadata": {},
+   "source": [
+    "## 4. Related papers and citations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "29f05b07-d323-45e4-b00d-0728eafb5af7",
+   "metadata": {},
+   "source": [
+    "```text\n",
+    "@inproceedings{liu2021Swin,\n",
+    "  title={Swin Transformer: Hierarchical Vision Transformer using Shifted Windows},\n",
+    "  author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining},\n",
+    "  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)},\n",
+    "  year={2021}\n",
+    "}\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}