From aa768240b85ad59bf6c608eb34b2f71192a2acf1 Mon Sep 17 00:00:00 2001 From: Jiaqi Liu <709153940@qq.com> Date: Thu, 17 Nov 2022 16:22:21 +0800 Subject: [PATCH] Add ERNIE-3.0 for model center (#5577) * add ernie-3.0 * update ERNIE 3.0 --- modelcenter/ERNIE-3.0/.gitkeep | 0 modelcenter/ERNIE-3.0/benchmark_cn.md | 53 + modelcenter/ERNIE-3.0/benchmark_en.md | 51 + modelcenter/ERNIE-3.0/download_cn.md | 9 + modelcenter/ERNIE-3.0/download_en.md | 9 + modelcenter/ERNIE-3.0/info.yaml | 33 + modelcenter/ERNIE-3.0/introduction_cn.ipynb | 1447 ++++++++++ modelcenter/ERNIE-3.0/introduction_en.ipynb | 2708 +++++++++++++++++++ 8 files changed, 4310 insertions(+) delete mode 100644 modelcenter/ERNIE-3.0/.gitkeep create mode 100644 modelcenter/ERNIE-3.0/benchmark_cn.md create mode 100644 modelcenter/ERNIE-3.0/benchmark_en.md create mode 100644 modelcenter/ERNIE-3.0/download_cn.md create mode 100644 modelcenter/ERNIE-3.0/download_en.md create mode 100644 modelcenter/ERNIE-3.0/info.yaml create mode 100644 modelcenter/ERNIE-3.0/introduction_cn.ipynb create mode 100644 modelcenter/ERNIE-3.0/introduction_en.ipynb diff --git a/modelcenter/ERNIE-3.0/.gitkeep b/modelcenter/ERNIE-3.0/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/modelcenter/ERNIE-3.0/benchmark_cn.md b/modelcenter/ERNIE-3.0/benchmark_cn.md new file mode 100644 index 00000000..b6041b10 --- /dev/null +++ b/modelcenter/ERNIE-3.0/benchmark_cn.md @@ -0,0 +1,53 @@ +## 1. 推理 Benchmark + + +### 1.1 软硬件环境 + +1. 计算卡:T4、CUDA11.2、CuDNN8.2 + +2. CPU 信息:Intel(R) Xeon(R) Gold 6271C CPU + +3. PaddlePaddle 版本:2.3 + +4. PaddleNLP 版本:2.3 + +5. 性能数据单位是 QPS。QPS 测试方法:固定 batch size 为 32,测试运行时间 total_time,计算 QPS = total_samples / total_time + +6. 精度数据单位:文本分类是 Accuracy,序列标注是 F1-Score,阅读理解是 EM (Exact Match) + +### 1.2 数据集 + +数据集:CLUE TNEWS(文本分类)、MSRA_NER(序列标注)、CLUE CMRC2018(阅读理解) + +### 1.3 指标 + +##### CPU 性能 + +测试环境及说明如上,测试 CPU 性能时,线程数设置为12。 + +| | TNEWS 性能 | TNEWS 精度 | MSRA_NER 性能 | MSRA_NER 精度 | CMRC2018 性能 | CMRC2018 精度 | +| -------------------------- | ------------ | ------------ | ------------- | ------------- | ------------- | ------------- | +| ERNIE 3.0-Medium+FP32 | 311.95(1.0X) | 57.45 | 90.91(1.0x) | 93.04 | 33.74(1.0x) | 66.95 | +| ERNIE 3.0-Medium+INT8 | 600.35(1.9x) | 56.57(-0.88) | 141.00(1.6x) | 92.64(-0.40) | 56.51(1.7x) | 66.23(-0.72) | +| ERNIE 3.0-Medium+裁剪+FP32 | 408.65(1.3x) | 57.31(-0.14) | 122.13(1.3x) | 93.27(+0.23) | 48.47(1.4x) | 65.55(-1.40) | +| ERNIE 3.0-Medium+裁剪+INT8 | 704.42(2.3x) | 56.69(-0.76) | 215.58(2.4x) | 92.39(-0.65) | 75.23(2.2x) | 63.47(-3.48) | + + +三类任务(分类、序列标注、阅读理解)经过相同压缩过程后,加速比达到 2.3 左右。 + +##### GPU 性能 + +| | TNEWS 性能 | TNEWS 精度 | MSRA_NER 性能 | MSRA_NER 精度 | CMRC2018 性能 | CMRC2018 精度 | +| -------------------------- | ------------- | ------------ | ------------- | ------------- | ------------- | ------------- | +| ERNIE 3.0-Medium+FP32 | 1123.85(1.0x) | 57.45 | 366.75(1.0x) | 93.04 | 146.84(1.0x) | 66.95 | +| ERNIE 3.0-Medium+FP16 | 2672.41(2.4x) | 57.45(0.00) | 840.11(2.3x) | 93.05(0.01) | 303.43(2.1x) | 66.95(0.00) | +| ERNIE 3.0-Medium+INT8 | 3226.26(2.9x) | 56.99(-0.46) | 889.33(2.4x) | 92.70(-0.34) | 348.84(2.4x) | 66.32(-0.63 | +| ERNIE 3.0-Medium+裁剪+FP32 | 1424.01(1.3x) | 57.31(-0.14) | 454.27(1.2x) | 93.27(+0.23) | 183.77(1.3x) | 65.92(-1.03) | +| ERNIE 3.0-Medium+裁剪+FP16 | 3577.62(3.2x) | 57.27(-0.18) | 1138.77(3.1x) | 93.27(+0.23) | 445.71(3.0x) | 65.89(-1.06) | +| ERNIE 3.0-Medium+裁剪+INT8 | 3635.48(3.2x) | 57.26(-0.19) | 1105.26(3.0x) | 93.20(+0.16) | 444.27(3.0x) | 66.17(-0.78) | + + +三类任务(分类、序列标注、阅读理解)经过裁剪 + 量化后加速比均达到 3 倍左右,所有任务上平均精度损失可控制在 0.5 以内(0.46)。 + +## 2. 相关使用说明 +1. https://github.com/PaddlePaddle/PaddleNLP/blob/develop/model_zoo/ernie-3.0/README.md#%E6%80%A7%E8%83%BD%E6%B5%8B%E8%AF%95 diff --git a/modelcenter/ERNIE-3.0/benchmark_en.md b/modelcenter/ERNIE-3.0/benchmark_en.md new file mode 100644 index 00000000..88131703 --- /dev/null +++ b/modelcenter/ERNIE-3.0/benchmark_en.md @@ -0,0 +1,51 @@ +## 1. Inference Benchmark + + +### 1.1 Environment + +1. 计算卡:T4、CUDA11.2、CuDNN8.2 + +2. CPU:Intel(R) Xeon(R) Gold 6271C CPU + +3. PaddlePaddle Version:2.3 + +4. PaddleNLP Version:2.3 + +5. The unit of performance data is QPS. How to calculate QPS: fixed batch size of 32, test running time total_time, calculated QPS = total_samples / total_time. + +6. Metrics:Accuracy for sequence classification,F1-Score for token classification, EM (Exact Match) for question answering. + +### 1.2 数据集 + +Dataset:CLUE TNEWS(sequence classofication)、MSRA_NER(token classification)、CLUE CMRC2018(question answering) + +### 1.3 Benchmark + +##### CPU Performance + +The test environment and instructions are as above. When testing the CPU performance, the number of threads is set to 12. + +| | TNEWS Performance | TNEWS Accuracy | MSRA_NER Performance | MSRA_NER F1 Score | CMRC2018 Performance | CMRC2018 EM | +| -------------------------- | ------------ | ------------ | ------------- | ------------- | ------------- | ------------- | +| ERNIE 3.0-Medium+FP32 | 311.95(1.0X) | 57.45 | 90.91(1.0x) | 93.04 | 33.74(1.0x) | 66.95 | +| ERNIE 3.0-Medium+INT8 | 600.35(1.9x) | 56.57(-0.88) | 141.00(1.6x) | 92.64(-0.40) | 56.51(1.7x) | 66.23(-0.72) | +| ERNIE 3.0-Medium+prune+FP32 | 408.65(1.3x) | 57.31(-0.14) | 122.13(1.3x) | 93.27(+0.23) | 48.47(1.4x) | 65.55(-1.40) | +| ERNIE 3.0-Medium+prune+INT8 | 704.42(2.3x) | 56.69(-0.76) | 215.58(2.4x) | 92.39(-0.65) | 75.23(2.2x) | 63.47(-3.48) | + +After same compression, the speedup ratio of three models reaches about 2.3. + +##### GPU Performance + +| | TNEWS Performance | TNEWS Accuracy | MSRA_NER Performance | MSRA_NER F1 Score | CMRC2018 Performance | CMRC2018 EM | +| -------------------------- | ------------- | ------------ | ------------- | ------------- | ------------- | ------------- | +| ERNIE 3.0-Medium+FP32 | 1123.85(1.0x) | 57.45 | 366.75(1.0x) | 93.04 | 146.84(1.0x) | 66.95 | +| ERNIE 3.0-Medium+FP16 | 2672.41(2.4x) | 57.45(0.00) | 840.11(2.3x) | 93.05(0.01) | 303.43(2.1x) | 66.95(0.00) | +| ERNIE 3.0-Medium+INT8 | 3226.26(2.9x) | 56.99(-0.46) | 889.33(2.4x) | 92.70(-0.34) | 348.84(2.4x) | 66.32(-0.63 | +| ERNIE 3.0-Medium+prune+FP32 | 1424.01(1.3x) | 57.31(-0.14) | 454.27(1.2x) | 93.27(+0.23) | 183.77(1.3x) | 65.92(-1.03) | +| ERNIE 3.0-Medium+prune+FP16 | 3577.62(3.2x) | 57.27(-0.18) | 1138.77(3.1x) | 93.27(+0.23) | 445.71(3.0x) | 65.89(-1.06) | +| ERNIE 3.0-Medium+prune+INT8 | 3635.48(3.2x) | 57.26(-0.19) | 1105.26(3.0x) | 93.20(+0.16) | 444.27(3.0x) | 66.17(-0.78) | + +The three tasks have a speedup of about 3 times after pruning and quantization, and the average accuracy loss could be controlled within 0.5 (0.46). + +## 2. Reference +1. https://github.com/PaddlePaddle/PaddleNLP/blob/develop/model_zoo/ernie-3.0/README.md#%E6%80%A7%E8%83%BD%E6%B5%8B%E8%AF%95 diff --git a/modelcenter/ERNIE-3.0/download_cn.md b/modelcenter/ERNIE-3.0/download_cn.md new file mode 100644 index 00000000..47ef2374 --- /dev/null +++ b/modelcenter/ERNIE-3.0/download_cn.md @@ -0,0 +1,9 @@ +# 下载 + +| 模型名称 | 模型结构 | 参数量 | 模型大小 |下载地址 | +|-----------------|---------------------------------|---------|---------| +|ERNIE 3.0-Base | 12-layer, 768-hidden, 12-heads | 117.9M |452.4M|[预训练模型](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_base_zh.pdparams)| +|ERNIE 3.0-Medium | 6-layer, 768-hidden, 12-heads | 75.4M |312.5MB|[预训练模型](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_base_zh.pdparams)| +|ERNIE 3.0-Mini | 6-layer, 384-hidden, 12-heads | 26.9M |109.0MB|[预训练模型](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_mini_zh.pdparams)| +|ERNIE 3.0-Micro | 4-layer, 384-hidden, 12-heads | 23.4M |95.48MB|[预训练模型](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_micro_zh.pdparams)| +|ERNIE 3.0-Nano | 4-layer, 312-hidden, 12-heads | 17.9M |72.4MB|[预训练模型](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_nano_zh.pdparams)| diff --git a/modelcenter/ERNIE-3.0/download_en.md b/modelcenter/ERNIE-3.0/download_en.md new file mode 100644 index 00000000..e1c5e8c6 --- /dev/null +++ b/modelcenter/ERNIE-3.0/download_en.md @@ -0,0 +1,9 @@ +# Download + +| model | model arc | Number of parameters | Model Size | download | +|-----------------|--------------------------------------|----------------------|------------------|---------------------------------------------------| +|ERNIE 3.0-Base | 12-layer, 768-hidden, 12-heads | 117.9 M | 452.4MB |[Pretrained Model](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_base_zh.pdparams)| +|ERNIE 3.0-Medium | 6-layer, 768-hidden, 12-heads | 75.4 M | 312.5MB |[Pretrained Model](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_medium.pdparams)| +|ERNIE 3.0-Mini | 6-layer, 384-hidden, 12-heads | 26.9 M | 109.0MB |[Pretrained Model](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_mini_zh.pdparams)| +|ERNIE 3.0-Micro | 4-layer, 384-hidden, 12-heads | 23.4 M | 95.48MB |[Pretrained Model](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_micro_zh.pdparams)| +|ERNIE 3.0-Nano | 4-layer, 312-hidden, 12-heads | 17.9 M | 72.4MB |[Pretrained Model](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_nano_zh.pdparams)| diff --git a/modelcenter/ERNIE-3.0/info.yaml b/modelcenter/ERNIE-3.0/info.yaml new file mode 100644 index 00000000..44079a51 --- /dev/null +++ b/modelcenter/ERNIE-3.0/info.yaml @@ -0,0 +1,33 @@ +--- +Model_Info: + name: "ERNIE 3.0" + description: "ERNIE 3.0 轻量级模型" + description_en: "ERNIE Tiny" + icon: "@后续UE统一设计之后,会存到bos上某个位置" + from_repo: "PaddleNLP" +Task: + - tag_en: "Natural Language Processing" + tag: "自然语言处理" + sub_tag_en: "Pretrained Model" + sub_tag: "预训练模型" +Example: + - title: "【快速上手ERNIE 3.0】中文情感分析实战" + url: "https://aistudio.baidu.com/aistudio/projectdetail/3955163" + - title: "【快速上手ERNIE 3.0】法律文本多标签分类实战" + url: "https://aistudio.baidu.com/aistudio/projectdetail/3996601" + - title: "【快速上手ERNIE 3.0】中文语义匹配实战" + url: "https://aistudio.baidu.com/aistudio/projectdetail/3986803" + - title: "【快速上手ERNIE 3.0】MSRA序列标注实战" + url: "https://aistudio.baidu.com/aistudio/projectdetail/3989073" + - title: "【快速上手ERNIE 3.0】机器阅读理解实战" + url: "https://aistudio.baidu.com/aistudio/projectdetail/2017189" + - title: "【快速上手ERNIE 3.0】对话意图识别实战" + url: "https://aistudio.baidu.com/aistudio/projectdetail/2017202?contributionType=1" +Datasets: "" +Pulisher: "Baidu" +License: "apache.2.0" +Paper: + - title: "ERNIE-Tiny: A Progressive Distillation Framework for Pretrained Transformer Compression" + url: "https://arxiv.org/abs/2106.02241" +IfTraining: 0 +IfOnlineDemo: 1 diff --git a/modelcenter/ERNIE-3.0/introduction_cn.ipynb b/modelcenter/ERNIE-3.0/introduction_cn.ipynb new file mode 100644 index 00000000..4cd575d4 --- /dev/null +++ b/modelcenter/ERNIE-3.0/introduction_cn.ipynb @@ -0,0 +1,1447 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1.ERNIE 3.0 轻量级模型简介\n", + "\n", + "PaddleNLP 开源的 [ERNIE 3.0 轻量级模型](https://github.com/paddlepaddle/PaddleNLP/tree/develop/model_zoo/ernie-3.0) 是在文心大模型 ERNIE 3.0 基础上通过在线蒸馏技术得到的轻量级模型,模型结构与 ERNIE 2.0 保持一致,相比 ERNIE 2.0 具有更强的中文效果。\n", + "\n", + "相关技术详解可参考文章[《解析全球最大中文单体模型鹏城-百度·文心技术细节》](https://www.jiqizhixin.com/articles/2021-12-08-9)\n", + "\n", + "# 2.模型效果\n", + "\n", + "ERNIE 3.0 轻量级模型开源 **ERNIE 3.0 _Base_** 、**ERNIE 3.0 _Medium_** 、 **ERNIE 3.0 _Mini_** 、 **ERNIE 3.0 _Micro_** 、 **ERNIE 3.0 _Nano_** 五个模型:\n", + "\n", + "- [**ERNIE 3.0-_Base_**](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_base_zh.pdparams) (_12-layer, 768-hidden, 12-heads_)\n", + "- [**ERNIE 3.0-_Medium_**](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_medium_zh.pdparams) (_6-layer, 768-hidden, 12-heads_)\n", + "- [**ERNIE 3.0-_Mini_**](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_mini_zh.pdparams) (_6-layer, 384-hidden, 12-heads_)\n", + "- [**ERNIE 3.0-_Micro_**](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_micro_zh.pdparams) (_4-layer, 384-hidden, 12-heads_)\n", + "- [**ERNIE 3.0-_Nano_**](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_nano_zh.pdparams) (_4-layer, 312-hidden, 12-heads_)\n", + "\n", + "\n", + "下面是 PaddleNLP 中轻量级中文模型的**效果-时延图**。横坐标表示在 IFLYTEK 数据集 (最大序列长度设置为 128) 上测试的延迟(latency,单位:ms),纵坐标是 CLUE 10 个任务上的平均精度(包含文本分类、文本匹配、自然语言推理、代词消歧、阅读理解等任务),其中 CMRC2018 阅读理解任务的评价指标是 Exact Match(EM),其他任务的评价指标均是 Accuracy。图中越靠**左上**的模型,精度和性能水平越高。\n", + "\n", + "图中模型名下方标注了模型的参数量,测试环境见[性能测试](https://github.com/paddlepaddle/PaddleNLP/tree/develop/model_zoo/ernie-3.0#%E6%80%A7%E8%83%BD%E6%B5%8B%E8%AF%95)。\n", + "\n", + "batch_size=32 时,CPU 下的效果-时延图(线程数 1 和 8):\n", + "\n", + "
\n", + " | \n", + " |
\n", + " | \n", + " |
\n", + " | \n", + " |
\n", + " Arch\n", + " | \n", + "\n", + " Model\n", + " | \n", + "\n", + " AVG\n", + " | \n", + "\n", + " AFQMC\n", + " | \n", + "\n", + " TNEWS\n", + " | \n", + "\n", + " IFLYTEK\n", + " | \n", + "\n", + " CMNLI\n", + " | \n", + "\n", + " OCNLI\n", + " | \n", + "\n", + " CLUEWSC2020\n", + " | \n", + "\n", + " CSL\n", + " | \n", + "\n", + " CMRC2018\n", + " | \n", + "\n", + " CHID\n", + " | \n", + "\n", + " C3\n", + " | \n", + "
24L1024H | \n", + "\n", + " ERNIE 1.0-Large-cw\n", + " | \n", + "\n", + " 79.03\n", + " | \n", + "\n", + " 75.97\n", + " | \n", + "\n", + " 59.65\n", + " | \n", + "\n", + " 62.91\n", + " | \n", + "\n", + " 85.09\n", + " | \n", + "\n", + " 81.73\n", + " | \n", + "\n", + " 93.09\n", + " | \n", + "\n", + " 84.53\n", + " | \n", + "\n", + " 74.22/91.88\n", + " | \n", + "\n", + " 88.57\n", + " | \n", + "\n", + " 84.54\n", + " | \n", + "
\n", + " ERNIE 2.0-Large-zh\n", + " | \n", + "\n", + " 76.90\n", + " | \n", + "\n", + " 76.23\n", + " | \n", + "\n", + " 59.33\n", + " | \n", + "\n", + " 61.91\n", + " | \n", + "\n", + " 83.85\n", + " | \n", + "\n", + " 79.93\n", + " | \n", + "\n", + " 89.82\n", + " | \n", + "\n", + " 83.23\n", + " | \n", + "\n", + " 70.95/90.31\n", + " | \n", + "\n", + " 86.78\n", + " | \n", + "\n", + " 78.12\n", + " | \n", + "|
\n", + " RoBERTa-wwm-ext-large\n", + " | \n", + "\n", + " 76.61\n", + " | \n", + "\n", + " 76.00\n", + " | \n", + "\n", + " 59.33\n", + " | \n", + "\n", + " 62.02\n", + " | \n", + "\n", + " 83.88\n", + " | \n", + "\n", + " 78.81\n", + " | \n", + "\n", + " 90.79\n", + " | \n", + "\n", + " 83.67\n", + " | \n", + "\n", + " 70.58/89.82\n", + " | \n", + "\n", + " 85.72\n", + " | \n", + "\n", + " 75.26\n", + " | \n", + "|
20L1024H | \n", + "\n", + " ERNIE 3.0-Xbase-zh\n", + " | \n", + "\n", + " 78.39\n", + " | \n", + "\n", + " 76.16\n", + " | \n", + "\n", + " 59.55\n", + " | \n", + "\n", + " 61.87\n", + " | \n", + "\n", + " 84.40\n", + " | \n", + "\n", + " 81.73\n", + " | \n", + "\n", + " 88.82\n", + " | \n", + "\n", + " 83.60\n", + " | \n", + "\n", + " 75.99/93.00\n", + " | \n", + "\n", + " 86.78\n", + " | \n", + "\n", + " 84.98\n", + " | \n", + "
12L768H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Base-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 76.05\n", + " | \n", + "\n", + " 75.93\n", + " | \n", + "\n", + " 58.26\n", + " | \n", + "\n", + " 61.56\n", + " | \n", + "\n", + " 83.02\n", + " | \n", + "\n", + " 80.10\n", + " | \n", + "\n", + " 86.18\n", + " | \n", + "\n", + " 82.63\n", + " | \n", + "\n", + " 70.71/90.41\n", + " | \n", + "\n", + " 84.26\n", + " | \n", + "\n", + " 77.88\n", + " | \n", + "
\n", + " ERNIE 1.0-Base-zh-cw\n", + " | \n", + "\n", + " 76.47\n", + " | \n", + "\n", + " 76.07\n", + " | \n", + "\n", + " 57.86\n", + " | \n", + "\n", + " 59.91\n", + " | \n", + "\n", + " 83.41\n", + " | \n", + "\n", + " 79.58\n", + " | \n", + "\n", + " 89.91\n", + " | \n", + "\n", + " 83.42\n", + " | \n", + "\n", + " 72.88/90.78\n", + " | \n", + "\n", + " 84.68\n", + " | \n", + "\n", + " 76.98\n", + " | \n", + "|
\n", + " ERNIE-Gram-zh\n", + " | \n", + "\n", + " 75.72\n", + " | \n", + "\n", + " 75.28\n", + " | \n", + "\n", + " 57.88\n", + " | \n", + "\n", + " 60.87\n", + " | \n", + "\n", + " 82.90\n", + " | \n", + "\n", + " 79.08\n", + " | \n", + "\n", + " 88.82\n", + " | \n", + "\n", + " 82.83\n", + " | \n", + "\n", + " 71.82/90.38\n", + " | \n", + "\n", + " 84.04\n", + " | \n", + "\n", + " 73.69\n", + " | \n", + "|
\n", + " Langboat/Mengzi-BERT-Base\n", + " | \n", + "\n", + " 74.69\n", + " | \n", + "\n", + " 75.35\n", + " | \n", + "\n", + " 57.76\n", + " | \n", + "\n", + " 61.64\n", + " | \n", + "\n", + " 82.41\n", + " | \n", + "\n", + " 77.93\n", + " | \n", + "\n", + " 88.16\n", + " | \n", + "\n", + " 82.20\n", + " | \n", + "\n", + " 67.04/88.35\n", + " | \n", + "\n", + " 83.74\n", + " | \n", + "\n", + " 70.70\n", + " | \n", + "|
\n", + " ERNIE 2.0-Base-zh\n", + " | \n", + "\n", + " 74.32\n", + " | \n", + "\n", + " 75.65\n", + " | \n", + "\n", + " 58.25\n", + " | \n", + "\n", + " 61.64\n", + " | \n", + "\n", + " 82.62\n", + " | \n", + "\n", + " 78.71\n", + " | \n", + "\n", + " 81.91\n", + " | \n", + "\n", + " 82.33\n", + " | \n", + "\n", + " 66.08/87.46\n", + " | \n", + "\n", + " 82.78\n", + " | \n", + "\n", + " 73.19\n", + " | \n", + "|
\n", + " ERNIE 1.0-Base-zh\n", + " | \n", + "\n", + " 74.17\n", + " | \n", + "\n", + " 74.84\n", + " | \n", + "\n", + " 58.91\n", + " | \n", + "\n", + " 62.25\n", + " | \n", + "\n", + " 81.68\n", + " | \n", + "\n", + " 76.58\n", + " | \n", + "\n", + " 85.20\n", + " | \n", + "\n", + " 82.77\n", + " | \n", + "\n", + " 67.32/87.83\n", + " | \n", + "\n", + " 82.47\n", + " | \n", + "\n", + " 69.68\n", + " | \n", + "|
\n", + " RoBERTa-wwm-ext\n", + " | \n", + "\n", + " 74.11\n", + " | \n", + "\n", + " 74.60\n", + " | \n", + "\n", + " 58.08\n", + " | \n", + "\n", + " 61.23\n", + " | \n", + "\n", + " 81.11\n", + " | \n", + "\n", + " 76.92\n", + " | \n", + "\n", + " 88.49\n", + " | \n", + "\n", + " 80.77\n", + " | \n", + "\n", + " 68.39/88.50\n", + " | \n", + "\n", + " 83.43\n", + " | \n", + "\n", + " 68.03\n", + " | \n", + "|
\n", + " BERT-Base-Chinese\n", + " | \n", + "\n", + " 72.57\n", + " | \n", + "\n", + " 74.63\n", + " | \n", + "\n", + " 57.13\n", + " | \n", + "\n", + " 61.29\n", + " | \n", + "\n", + " 80.97\n", + " | \n", + "\n", + " 75.22\n", + " | \n", + "\n", + " 81.91\n", + " | \n", + "\n", + " 81.90\n", + " | \n", + "\n", + " 65.30/86.53\n", + " | \n", + "\n", + " 82.01\n", + " | \n", + "\n", + " 65.38\n", + " | \n", + "|
\n", + " UER/Chinese-RoBERTa-Base\n", + " | \n", + "\n", + " 71.78\n", + " | \n", + "\n", + " 72.89\n", + " | \n", + "\n", + " 57.62\n", + " | \n", + "\n", + " 61.14\n", + " | \n", + "\n", + " 80.01\n", + " | \n", + "\n", + " 75.56\n", + " | \n", + "\n", + " 81.58\n", + " | \n", + "\n", + " 80.80\n", + " | \n", + "\n", + " 63.87/84.95\n", + " | \n", + "\n", + " 81.52\n", + " | \n", + "\n", + " 62.76\n", + " | \n", + "|
8L512H | \n", + "\n", + " UER/Chinese-RoBERTa-Medium\n", + " | \n", + "\n", + " 67.06\n", + " | \n", + "\n", + " 70.64\n", + " | \n", + "\n", + " 56.10\n", + " | \n", + "\n", + " 58.29\n", + " | \n", + "\n", + " 77.35\n", + " | \n", + "\n", + " 71.90\n", + " | \n", + "\n", + " 68.09\n", + " | \n", + "\n", + " 78.63\n", + " | \n", + "\n", + " 57.63/78.91\n", + " | \n", + "\n", + " 75.13\n", + " | \n", + "\n", + " 56.84\n", + " | \n", + "
6L768H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Medium-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 72.49\n", + " | \n", + "\n", + " 73.37\n", + " | \n", + "\n", + " 57.00\n", + " | \n", + "\n", + " 60.67\n", + " | \n", + "\n", + " 80.64\n", + " | \n", + "\n", + " 76.88\n", + " | \n", + "\n", + " 79.28\n", + " | \n", + "\n", + " 81.60\n", + " | \n", + "\n", + " 65.83/87.30\n", + " | \n", + "\n", + " 79.91\n", + " | \n", + "\n", + " 69.73\n", + " | \n", + "
\n", + " HLF/RBT6, Chinese\n", + " | \n", + "\n", + " 70.06\n", + " | \n", + "\n", + " 73.45\n", + " | \n", + "\n", + " 56.82\n", + " | \n", + "\n", + " 59.64\n", + " | \n", + "\n", + " 79.36\n", + " | \n", + "\n", + " 73.32\n", + " | \n", + "\n", + " 76.64\n", + " | \n", + "\n", + " 80.67\n", + " | \n", + "\n", + " 62.72/84.77\n", + " | \n", + "\n", + " 78.17\n", + " | \n", + "\n", + " 59.85\n", + " | \n", + "|
\n", + " TinyBERT6, Chinese\n", + " | \n", + "\n", + " 69.62\n", + " | \n", + "\n", + " 72.22\n", + " | \n", + "\n", + " 55.70\n", + " | \n", + "\n", + " 54.48\n", + " | \n", + "\n", + " 79.12\n", + " | \n", + "\n", + " 74.07\n", + " | \n", + "\n", + " 77.63\n", + " | \n", + "\n", + " 80.17\n", + " | \n", + "\n", + " 63.03/83.75\n", + " | \n", + "\n", + " 77.64\n", + " | \n", + "\n", + " 62.11\n", + " | \n", + "|
\n", + " RoFormerV2 Small\n", + " | \n", + "\n", + " 68.52\n", + " | \n", + "\n", + " 72.47\n", + " | \n", + "\n", + " 56.53\n", + " | \n", + "\n", + " 60.72\n", + " | \n", + "\n", + " 76.37\n", + " | \n", + "\n", + " 72.95\n", + " | \n", + "\n", + " 75.00\n", + " | \n", + "\n", + " 81.07\n", + " | \n", + "\n", + " 62.97/83.64\n", + " | \n", + "\n", + " 67.66\n", + " | \n", + "\n", + " 59.41\n", + " | \n", + "|
\n", + " UER/Chinese-RoBERTa-L6-H768\n", + " | \n", + "\n", + " 67.09\n", + " | \n", + "\n", + " 70.13\n", + " | \n", + "\n", + " 56.54\n", + " | \n", + "\n", + " 60.48\n", + " | \n", + "\n", + " 77.49\n", + " | \n", + "\n", + " 72.00\n", + " | \n", + "\n", + " 72.04\n", + " | \n", + "\n", + " 77.33\n", + " | \n", + "\n", + " 53.74/75.52\n", + " | \n", + "\n", + " 76.73\n", + " | \n", + "\n", + " 54.40\n", + " | \n", + "|
6L384H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Mini-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 66.90\n", + " | \n", + "\n", + " 71.85\n", + " | \n", + "\n", + " 55.24\n", + " | \n", + "\n", + " 54.48\n", + " | \n", + "\n", + " 77.19\n", + " | \n", + "\n", + " 73.08\n", + " | \n", + "\n", + " 71.05\n", + " | \n", + "\n", + " 79.30\n", + " | \n", + "\n", + " 58.53/81.97\n", + " | \n", + "\n", + " 69.71\n", + " | \n", + "\n", + " 58.60\n", + " | \n", + "
4L768H | \n", + "\n", + " HFL/RBT4, Chinese\n", + " | \n", + "\n", + " 67.42\n", + " | \n", + "\n", + " 72.41\n", + " | \n", + "\n", + " 56.50\n", + " | \n", + "\n", + " 58.95\n", + " | \n", + "\n", + " 77.34\n", + " | \n", + "\n", + " 70.78\n", + " | \n", + "\n", + " 71.05\n", + " | \n", + "\n", + " 78.23\n", + " | \n", + "\n", + " 59.30/81.93\n", + " | \n", + "\n", + " 73.18\n", + " | \n", + "\n", + " 56.45\n", + " | \n", + "
4L512H | \n", + "\n", + " UER/Chinese-RoBERTa-Small\n", + " | \n", + "\n", + " 63.25\n", + " | \n", + "\n", + " 69.21\n", + " | \n", + "\n", + " 55.41\n", + " | \n", + "\n", + " 57.552\n", + " | \n", + "\n", + " 73.64\n", + " | \n", + "\n", + " 69.80\n", + " | \n", + "\n", + " 66.78\n", + " | \n", + "\n", + " 74.83\n", + " | \n", + "\n", + " 46.75/69.69\n", + " | \n", + "\n", + " 67.59\n", + " | \n", + "\n", + " 50.92\n", + " | \n", + "
4L384H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Micro-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 64.21\n", + " | \n", + "\n", + " 71.15\n", + " | \n", + "\n", + " 55.05\n", + " | \n", + "\n", + " 53.83\n", + " | \n", + "\n", + " 74.81\n", + " | \n", + "\n", + " 70.41\n", + " | \n", + "\n", + " 69.08\n", + " | \n", + "\n", + " 76.50\n", + " | \n", + "\n", + " 53.77/77.82\n", + " | \n", + "\n", + " 62.26\n", + " | \n", + "\n", + " 55.53\n", + " | \n", + "
4L312H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Nano-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 62.97\n", + " | \n", + "\n", + " 70.51\n", + " | \n", + "\n", + " 54.57\n", + " | \n", + "\n", + " 48.36\n", + " | \n", + "\n", + " 74.97\n", + " | \n", + "\n", + " 70.61\n", + " | \n", + "\n", + " 68.75\n", + " | \n", + "\n", + " 75.93\n", + " | \n", + "\n", + " 52.00/76.35\n", + " | \n", + "\n", + " 58.91\n", + " | \n", + "\n", + " 55.11\n", + " | \n", + "
\n", + " TinyBERT4, Chinese\n", + " | \n", + "\n", + " 60.82\n", + " | \n", + "\n", + " 69.07\n", + " | \n", + "\n", + " 54.02\n", + " | \n", + "\n", + " 39.71\n", + " | \n", + "\n", + " 73.94\n", + " | \n", + "\n", + " 69.59\n", + " | \n", + "\n", + " 70.07\n", + " | \n", + "\n", + " 75.07\n", + " | \n", + "\n", + " 46.04/69.34\n", + " | \n", + "\n", + " 58.53\n", + " | \n", + "\n", + " 52.18\n", + " | \n", + "|
4L256H | \n", + "\n", + " UER/Chinese-RoBERTa-Mini\n", + " | \n", + "\n", + " 53.40\n", + " | \n", + "\n", + " 69.32\n", + " | \n", + "\n", + " 54.22\n", + " | \n", + "\n", + " 41.63\n", + " | \n", + "\n", + " 69.40\n", + " | \n", + "\n", + " 67.36\n", + " | \n", + "\n", + " 65.13\n", + " | \n", + "\n", + " 70.07\n", + " | \n", + "\n", + " 5.96/17.13\n", + " | \n", + "\n", + " 51.19\n", + " | \n", + "\n", + " 39.68\n", + " | \n", + "
3L1024H | \n", + "\n", + " HFL/RBTL3, Chinese\n", + " | \n", + "\n", + " 66.63\n", + " | \n", + "\n", + " 71.11\n", + " | \n", + "\n", + " 56.14\n", + " | \n", + "\n", + " 59.56\n", + " | \n", + "\n", + " 76.41\n", + " | \n", + "\n", + " 71.29\n", + " | \n", + "\n", + " 69.74\n", + " | \n", + "\n", + " 76.93\n", + " | \n", + "\n", + " 58.50/80.90\n", + " | \n", + "\n", + " 71.03\n", + " | \n", + "\n", + " 55.56\n", + " | \n", + "
3L768H | \n", + "\n", + " HFL/RBT3, Chinese\n", + " | \n", + "\n", + " 65.72\n", + " | \n", + "\n", + " 70.95\n", + " | \n", + "\n", + " 55.53\n", + " | \n", + "\n", + " 59.18\n", + " | \n", + "\n", + " 76.20\n", + " | \n", + "\n", + " 70.71\n", + " | \n", + "\n", + " 67.11\n", + " | \n", + "\n", + " 76.63\n", + " | \n", + "\n", + " 55.73/78.63\n", + " | \n", + "\n", + " 70.26\n", + " | \n", + "\n", + " 54.93\n", + " | \n", + "
2L128H | \n", + "\n", + " UER/Chinese-RoBERTa-Tiny\n", + " | \n", + "\n", + " 44.45\n", + " | \n", + "\n", + " 69.02\n", + " | \n", + "\n", + " 51.47\n", + " | \n", + "\n", + " 20.28\n", + " | \n", + "\n", + " 59.95\n", + " | \n", + "\n", + " 57.73\n", + " | \n", + "\n", + " 63.82\n", + " | \n", + "\n", + " 67.43\n", + " | \n", + "\n", + " 3.08/14.33\n", + " | \n", + "\n", + " 23.57\n", + " | \n", + "\n", + " 28.12\n", + " | \n", + "
\n", + " \n", + "
\n", + "\n", + "\n", + "# 4.原理\n", + "\n", + "### 在线蒸馏技术\n", + "\n", + "在线蒸馏技术在模型学习的过程中周期性地将知识信号传递给若干个学生模型同时训练,从而在蒸馏阶段一次性产出多种尺寸的学生模型。相对传统蒸馏技术,该技术极大节省了因大模型额外蒸馏计算以及多个学生的重复知识传递带来的算力消耗。\n", + "\n", + "这种新颖的蒸馏方式利用了文心大模型的规模优势,在蒸馏完成后保证了学生模型的效果和尺寸丰富性,方便不同性能需求的应用场景使用。此外,由于文心大模型的模型尺寸与学生模型差距巨大,模型蒸馏难度极大甚至容易失效。为此,通过引入了助教模型进行蒸馏的技术,利用助教作为知识传递的桥梁以缩短学生模型和大模型表达空间相距过大的问题,从而促进蒸馏效率的提升。\n", + "\n", + "更多技术细节可以参考论文:\n", + "- [ERNIE-Tiny: A Progressive Distillation Framework for Pretrained Transformer Compression](https://arxiv.org/abs/2106.02241)\n", + "- [ERNIE 3.0 Titan: Exploring Larger-scale Knowledge Enhanced Pre-training for Language Understanding and Generation](https://arxiv.org/abs/2112.12731)\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "\n", + "# 5.相关论文及引用信息\n", + "\n", + "\n", + "```text\n", + "@article{sun2021ernie,\n", + " title={Ernie 3.0: Large-scale knowledge enhanced pre-training for language understanding and generation},\n", + " author={Sun, Yu and Wang, Shuohuan and Feng, Shikun and Ding, Siyu and Pang, Chao and Shang, Junyuan and Liu, Jiaxiang and Chen, Xuyi and Zhao, Yanbin and Lu, Yuxiang and others},\n", + " journal={arXiv preprint arXiv:2107.02137},\n", + " year={2021}\n", + "}\n", + "\n", + "@article{su2021ernie,\n", + " title={Ernie-tiny: A progressive distillation framework for pretrained transformer compression},\n", + " author={Su, Weiyue and Chen, Xuyi and Feng, Shikun and Liu, Jiaxiang and Liu, Weixin and Sun, Yu and Tian, Hao and Wu, Hua and Wang, Haifeng},\n", + " journal={arXiv preprint arXiv:2106.02241},\n", + " year={2021}\n", + "}\n", + "\n", + "@article{wang2021ernie,\n", + " title={Ernie 3.0 titan: Exploring larger-scale knowledge enhanced pre-training for language understanding and generation},\n", + " author={Wang, Shuohuan and Sun, Yu and Xiang, Yang and Wu, Zhihua and Ding, Siyu and Gong, Weibao and Feng, Shikun and Shang, Junyuan and Zhao, Yanbin and Pang, Chao and others},\n", + " journal={arXiv preprint arXiv:2112.12731},\n", + " year={2021}\n", + "}\n", + "```\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "py35-paddle1.2.0" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/modelcenter/ERNIE-3.0/introduction_en.ipynb b/modelcenter/ERNIE-3.0/introduction_en.ipynb new file mode 100644 index 00000000..de5ce750 --- /dev/null +++ b/modelcenter/ERNIE-3.0/introduction_en.ipynb @@ -0,0 +1,2708 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1.ERNIE Tiny 3.0 Introduction\n", + "\n", + "[ERNIE Tiny Models](https://github.com/paddlepaddle/PaddleNLP/tree/develop/model_zoo/ernie-3.0) are lightweight models obtained from Wenxin large model ERNIE 3.0 using distillation technology. The model structure is consistent with ERNIE 2.0, and has a stronger Chinese effect than ERNIE 2.0.\n", + "\n", + "For a detailed explanation of related technologies, please refer to the article [_解析全球最大中文单体模型鹏城-百度·文心技术细节_](https://www.jiqizhixin.com/articles/2021-12-08-9)\n", + "\n", + "# 2.Model Effects\n", + "\n", + "ERNIE Tiny 3.0 open sources five models: **ERNIE 3.0 _Base_** 、**ERNIE 3.0 _Medium_** 、 **ERNIE 3.0 _Mini_** 、 **ERNIE 3.0 _Micro_** 、 **ERNIE 3.0 _Nano_** :\n", + "\n", + "- [**ERNIE 3.0-_Base_**](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_base_zh.pdparams) (_12-layer, 768-hidden, 12-heads_)\n", + "- [**ERNIE 3.0-_Medium_**](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_medium_zh.pdparams) (_6-layer, 768-hidden, 12-heads_)\n", + "- [**ERNIE 3.0-_Mini_**](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_mini_zh.pdparams) (_6-layer, 384-hidden, 12-heads_)\n", + "- [**ERNIE 3.0-_Micro_**](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_micro_zh.pdparams) (_4-layer, 384-hidden, 12-heads_)\n", + "- [**ERNIE 3.0-_Nano_**](https://bj.bcebos.com/paddlenlp/models/transformers/ernie_3.0/ernie_3.0_nano_zh.pdparams) (_4-layer, 312-hidden, 12-heads_)\n", + "\n", + "\n", + "Below is the **precision-latency graph** of the small Chinese models in PaddleNLP. The abscissa represents the latency (unit: ms) tested on CLUE IFLYTEK dataset (maximum sequence length is set to 128), and the ordinate is the average accuracy on 10 CLUE tasks (including text classification, text matching, natural language inference, Pronoun disambiguation, machine reading comprehension and other tasks), among which the metric of CMRC2018 is Exact Match (EM), and the metric of other tasks is Accuracy. The closer the model to the top left in the figure, the higher the level of accuracy and performance.The top left model in the figure has the highest level of accuracy and performance.\n", + "\n", + "The number of parameters of the model are marked under the model name in the figure. For the test environment, see [Performance Test](https://github.com/paddlepaddle/PaddleNLP/tree/develop/model_zoo/ernie-3.0#%E6%80%A7%E8%83%BD%E6%B5%8B%E8%AF%95) in details.\n", + "\n", + "\n", + "precision-latency graph under CPU (number of threads: 1 and 8), batch_size = 32:\n", + "\n", + "\n", + " | \n", + " |
\n", + " | \n", + " |
\n", + " | \n", + " |
\n", + " Arch\n", + " | \n", + "\n", + " Model\n", + " | \n", + "\n", + " AVG\n", + " | \n", + "\n", + " AFQMC\n", + " | \n", + "\n", + " TNEWS\n", + " | \n", + "\n", + " IFLYTEK\n", + " | \n", + "\n", + " CMNLI\n", + " | \n", + "\n", + " OCNLI\n", + " | \n", + "\n", + " CLUEWSC2020\n", + " | \n", + "\n", + " CSL\n", + " | \n", + "\n", + " CMRC2018\n", + " | \n", + "\n", + " CHID\n", + " | \n", + "\n", + " C3\n", + " | \n", + "
24L1024H | \n", + "\n", + " ERNIE 1.0-Large-cw\n", + " | \n", + "\n", + " 79.03\n", + " | \n", + "\n", + " 75.97\n", + " | \n", + "\n", + " 59.65\n", + " | \n", + "\n", + " 62.91\n", + " | \n", + "\n", + " 85.09\n", + " | \n", + "\n", + " 81.73\n", + " | \n", + "\n", + " 93.09\n", + " | \n", + "\n", + " 84.53\n", + " | \n", + "\n", + " 74.22/91.88\n", + " | \n", + "\n", + " 88.57\n", + " | \n", + "\n", + " 84.54\n", + " | \n", + "
\n", + " ERNIE 2.0-Large-zh\n", + " | \n", + "\n", + " 76.90\n", + " | \n", + "\n", + " 76.23\n", + " | \n", + "\n", + " 59.33\n", + " | \n", + "\n", + " 61.91\n", + " | \n", + "\n", + " 83.85\n", + " | \n", + "\n", + " 79.93\n", + " | \n", + "\n", + " 89.82\n", + " | \n", + "\n", + " 83.23\n", + " | \n", + "\n", + " 70.95/90.31\n", + " | \n", + "\n", + " 86.78\n", + " | \n", + "\n", + " 78.12\n", + " | \n", + "|
\n", + " RoBERTa-wwm-ext-large\n", + " | \n", + "\n", + " 76.61\n", + " | \n", + "\n", + " 76.00\n", + " | \n", + "\n", + " 59.33\n", + " | \n", + "\n", + " 62.02\n", + " | \n", + "\n", + " 83.88\n", + " | \n", + "\n", + " 78.81\n", + " | \n", + "\n", + " 90.79\n", + " | \n", + "\n", + " 83.67\n", + " | \n", + "\n", + " 70.58/89.82\n", + " | \n", + "\n", + " 85.72\n", + " | \n", + "\n", + " 75.26\n", + " | \n", + "|
20L1024H | \n", + "\n", + " ERNIE 3.0-Xbase-zh\n", + " | \n", + "\n", + " 78.39\n", + " | \n", + "\n", + " 76.16\n", + " | \n", + "\n", + " 59.55\n", + " | \n", + "\n", + " 61.87\n", + " | \n", + "\n", + " 84.40\n", + " | \n", + "\n", + " 81.73\n", + " | \n", + "\n", + " 88.82\n", + " | \n", + "\n", + " 83.60\n", + " | \n", + "\n", + " 75.99/93.00\n", + " | \n", + "\n", + " 86.78\n", + " | \n", + "\n", + " 84.98\n", + " | \n", + "
12L768H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Base-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 76.05\n", + " | \n", + "\n", + " 75.93\n", + " | \n", + "\n", + " 58.26\n", + " | \n", + "\n", + " 61.56\n", + " | \n", + "\n", + " 83.02\n", + " | \n", + "\n", + " 80.10\n", + " | \n", + "\n", + " 86.18\n", + " | \n", + "\n", + " 82.63\n", + " | \n", + "\n", + " 70.71/90.41\n", + " | \n", + "\n", + " 84.26\n", + " | \n", + "\n", + " 77.88\n", + " | \n", + "
\n", + " ERNIE 1.0-Base-zh-cw\n", + " | \n", + "\n", + " 76.47\n", + " | \n", + "\n", + " 76.07\n", + " | \n", + "\n", + " 57.86\n", + " | \n", + "\n", + " 59.91\n", + " | \n", + "\n", + " 83.41\n", + " | \n", + "\n", + " 79.58\n", + " | \n", + "\n", + " 89.91\n", + " | \n", + "\n", + " 83.42\n", + " | \n", + "\n", + " 72.88/90.78\n", + " | \n", + "\n", + " 84.68\n", + " | \n", + "\n", + " 76.98\n", + " | \n", + "|
\n", + " ERNIE-Gram-zh\n", + " | \n", + "\n", + " 75.72\n", + " | \n", + "\n", + " 75.28\n", + " | \n", + "\n", + " 57.88\n", + " | \n", + "\n", + " 60.87\n", + " | \n", + "\n", + " 82.90\n", + " | \n", + "\n", + " 79.08\n", + " | \n", + "\n", + " 88.82\n", + " | \n", + "\n", + " 82.83\n", + " | \n", + "\n", + " 71.82/90.38\n", + " | \n", + "\n", + " 84.04\n", + " | \n", + "\n", + " 73.69\n", + " | \n", + "|
\n", + " Langboat/Mengzi-BERT-Base\n", + " | \n", + "\n", + " 74.69\n", + " | \n", + "\n", + " 75.35\n", + " | \n", + "\n", + " 57.76\n", + " | \n", + "\n", + " 61.64\n", + " | \n", + "\n", + " 82.41\n", + " | \n", + "\n", + " 77.93\n", + " | \n", + "\n", + " 88.16\n", + " | \n", + "\n", + " 82.20\n", + " | \n", + "\n", + " 67.04/88.35\n", + " | \n", + "\n", + " 83.74\n", + " | \n", + "\n", + " 70.70\n", + " | \n", + "|
\n", + " ERNIE 2.0-Base-zh\n", + " | \n", + "\n", + " 74.32\n", + " | \n", + "\n", + " 75.65\n", + " | \n", + "\n", + " 58.25\n", + " | \n", + "\n", + " 61.64\n", + " | \n", + "\n", + " 82.62\n", + " | \n", + "\n", + " 78.71\n", + " | \n", + "\n", + " 81.91\n", + " | \n", + "\n", + " 82.33\n", + " | \n", + "\n", + " 66.08/87.46\n", + " | \n", + "\n", + " 82.78\n", + " | \n", + "\n", + " 73.19\n", + " | \n", + "|
\n", + " ERNIE 1.0-Base-zh\n", + " | \n", + "\n", + " 74.17\n", + " | \n", + "\n", + " 74.84\n", + " | \n", + "\n", + " 58.91\n", + " | \n", + "\n", + " 62.25\n", + " | \n", + "\n", + " 81.68\n", + " | \n", + "\n", + " 76.58\n", + " | \n", + "\n", + " 85.20\n", + " | \n", + "\n", + " 82.77\n", + " | \n", + "\n", + " 67.32/87.83\n", + " | \n", + "\n", + " 82.47\n", + " | \n", + "\n", + " 69.68\n", + " | \n", + "|
\n", + " RoBERTa-wwm-ext\n", + " | \n", + "\n", + " 74.11\n", + " | \n", + "\n", + " 74.60\n", + " | \n", + "\n", + " 58.08\n", + " | \n", + "\n", + " 61.23\n", + " | \n", + "\n", + " 81.11\n", + " | \n", + "\n", + " 76.92\n", + " | \n", + "\n", + " 88.49\n", + " | \n", + "\n", + " 80.77\n", + " | \n", + "\n", + " 68.39/88.50\n", + " | \n", + "\n", + " 83.43\n", + " | \n", + "\n", + " 68.03\n", + " | \n", + "|
\n", + " BERT-Base-Chinese\n", + " | \n", + "\n", + " 72.57\n", + " | \n", + "\n", + " 74.63\n", + " | \n", + "\n", + " 57.13\n", + " | \n", + "\n", + " 61.29\n", + " | \n", + "\n", + " 80.97\n", + " | \n", + "\n", + " 75.22\n", + " | \n", + "\n", + " 81.91\n", + " | \n", + "\n", + " 81.90\n", + " | \n", + "\n", + " 65.30/86.53\n", + " | \n", + "\n", + " 82.01\n", + " | \n", + "\n", + " 65.38\n", + " | \n", + "|
\n", + " UER/Chinese-RoBERTa-Base\n", + " | \n", + "\n", + " 71.78\n", + " | \n", + "\n", + " 72.89\n", + " | \n", + "\n", + " 57.62\n", + " | \n", + "\n", + " 61.14\n", + " | \n", + "\n", + " 80.01\n", + " | \n", + "\n", + " 75.56\n", + " | \n", + "\n", + " 81.58\n", + " | \n", + "\n", + " 80.80\n", + " | \n", + "\n", + " 63.87/84.95\n", + " | \n", + "\n", + " 81.52\n", + " | \n", + "\n", + " 62.76\n", + " | \n", + "|
8L512H | \n", + "\n", + " UER/Chinese-RoBERTa-Medium\n", + " | \n", + "\n", + " 67.06\n", + " | \n", + "\n", + " 70.64\n", + " | \n", + "\n", + " 56.10\n", + " | \n", + "\n", + " 58.29\n", + " | \n", + "\n", + " 77.35\n", + " | \n", + "\n", + " 71.90\n", + " | \n", + "\n", + " 68.09\n", + " | \n", + "\n", + " 78.63\n", + " | \n", + "\n", + " 57.63/78.91\n", + " | \n", + "\n", + " 75.13\n", + " | \n", + "\n", + " 56.84\n", + " | \n", + "
6L768H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Medium-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 72.49\n", + " | \n", + "\n", + " 73.37\n", + " | \n", + "\n", + " 57.00\n", + " | \n", + "\n", + " 60.67\n", + " | \n", + "\n", + " 80.64\n", + " | \n", + "\n", + " 76.88\n", + " | \n", + "\n", + " 79.28\n", + " | \n", + "\n", + " 81.60\n", + " | \n", + "\n", + " 65.83/87.30\n", + " | \n", + "\n", + " 79.91\n", + " | \n", + "\n", + " 69.73\n", + " | \n", + "
\n", + " HLF/RBT6, Chinese\n", + " | \n", + "\n", + " 70.06\n", + " | \n", + "\n", + " 73.45\n", + " | \n", + "\n", + " 56.82\n", + " | \n", + "\n", + " 59.64\n", + " | \n", + "\n", + " 79.36\n", + " | \n", + "\n", + " 73.32\n", + " | \n", + "\n", + " 76.64\n", + " | \n", + "\n", + " 80.67\n", + " | \n", + "\n", + " 62.72/84.77\n", + " | \n", + "\n", + " 78.17\n", + " | \n", + "\n", + " 59.85\n", + " | \n", + "|
\n", + " TinyBERT6, Chinese\n", + " | \n", + "\n", + " 69.62\n", + " | \n", + "\n", + " 72.22\n", + " | \n", + "\n", + " 55.70\n", + " | \n", + "\n", + " 54.48\n", + " | \n", + "\n", + " 79.12\n", + " | \n", + "\n", + " 74.07\n", + " | \n", + "\n", + " 77.63\n", + " | \n", + "\n", + " 80.17\n", + " | \n", + "\n", + " 63.03/83.75\n", + " | \n", + "\n", + " 77.64\n", + " | \n", + "\n", + " 62.11\n", + " | \n", + "|
\n", + " RoFormerV2 Small\n", + " | \n", + "\n", + " 68.52\n", + " | \n", + "\n", + " 72.47\n", + " | \n", + "\n", + " 56.53\n", + " | \n", + "\n", + " 60.72\n", + " | \n", + "\n", + " 76.37\n", + " | \n", + "\n", + " 72.95\n", + " | \n", + "\n", + " 75.00\n", + " | \n", + "\n", + " 81.07\n", + " | \n", + "\n", + " 62.97/83.64\n", + " | \n", + "\n", + " 67.66\n", + " | \n", + "\n", + " 59.41\n", + " | \n", + "|
\n", + " UER/Chinese-RoBERTa-L6-H768\n", + " | \n", + "\n", + " 67.09\n", + " | \n", + "\n", + " 70.13\n", + " | \n", + "\n", + " 56.54\n", + " | \n", + "\n", + " 60.48\n", + " | \n", + "\n", + " 77.49\n", + " | \n", + "\n", + " 72.00\n", + " | \n", + "\n", + " 72.04\n", + " | \n", + "\n", + " 77.33\n", + " | \n", + "\n", + " 53.74/75.52\n", + " | \n", + "\n", + " 76.73\n", + " | \n", + "\n", + " 54.40\n", + " | \n", + "|
6L384H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Mini-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 66.90\n", + " | \n", + "\n", + " 71.85\n", + " | \n", + "\n", + " 55.24\n", + " | \n", + "\n", + " 54.48\n", + " | \n", + "\n", + " 77.19\n", + " | \n", + "\n", + " 73.08\n", + " | \n", + "\n", + " 71.05\n", + " | \n", + "\n", + " 79.30\n", + " | \n", + "\n", + " 58.53/81.97\n", + " | \n", + "\n", + " 69.71\n", + " | \n", + "\n", + " 58.60\n", + " | \n", + "
4L768H | \n", + "\n", + " HFL/RBT4, Chinese\n", + " | \n", + "\n", + " 67.42\n", + " | \n", + "\n", + " 72.41\n", + " | \n", + "\n", + " 56.50\n", + " | \n", + "\n", + " 58.95\n", + " | \n", + "\n", + " 77.34\n", + " | \n", + "\n", + " 70.78\n", + " | \n", + "\n", + " 71.05\n", + " | \n", + "\n", + " 78.23\n", + " | \n", + "\n", + " 59.30/81.93\n", + " | \n", + "\n", + " 73.18\n", + " | \n", + "\n", + " 56.45\n", + " | \n", + "
4L512H | \n", + "\n", + " UER/Chinese-RoBERTa-Small\n", + " | \n", + "\n", + " 63.25\n", + " | \n", + "\n", + " 69.21\n", + " | \n", + "\n", + " 55.41\n", + " | \n", + "\n", + " 57.552\n", + " | \n", + "\n", + " 73.64\n", + " | \n", + "\n", + " 69.80\n", + " | \n", + "\n", + " 66.78\n", + " | \n", + "\n", + " 74.83\n", + " | \n", + "\n", + " 46.75/69.69\n", + " | \n", + "\n", + " 67.59\n", + " | \n", + "\n", + " 50.92\n", + " | \n", + "
4L384H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Micro-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 64.21\n", + " | \n", + "\n", + " 71.15\n", + " | \n", + "\n", + " 55.05\n", + " | \n", + "\n", + " 53.83\n", + " | \n", + "\n", + " 74.81\n", + " | \n", + "\n", + " 70.41\n", + " | \n", + "\n", + " 69.08\n", + " | \n", + "\n", + " 76.50\n", + " | \n", + "\n", + " 53.77/77.82\n", + " | \n", + "\n", + " 62.26\n", + " | \n", + "\n", + " 55.53\n", + " | \n", + "
4L312H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Nano-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 62.97\n", + " | \n", + "\n", + " 70.51\n", + " | \n", + "\n", + " 54.57\n", + " | \n", + "\n", + " 48.36\n", + " | \n", + "\n", + " 74.97\n", + " | \n", + "\n", + " 70.61\n", + " | \n", + "\n", + " 68.75\n", + " | \n", + "\n", + " 75.93\n", + " | \n", + "\n", + " 52.00/76.35\n", + " | \n", + "\n", + " 58.91\n", + " | \n", + "\n", + " 55.11\n", + " | \n", + "
\n", + " TinyBERT4, Chinese\n", + " | \n", + "\n", + " 60.82\n", + " | \n", + "\n", + " 69.07\n", + " | \n", + "\n", + " 54.02\n", + " | \n", + "\n", + " 39.71\n", + " | \n", + "\n", + " 73.94\n", + " | \n", + "\n", + " 69.59\n", + " | \n", + "\n", + " 70.07\n", + " | \n", + "\n", + " 75.07\n", + " | \n", + "\n", + " 46.04/69.34\n", + " | \n", + "\n", + " 58.53\n", + " | \n", + "\n", + " 52.18\n", + " | \n", + "|
4L256H | \n", + "\n", + " UER/Chinese-RoBERTa-Mini\n", + " | \n", + "\n", + " 53.40\n", + " | \n", + "\n", + " 69.32\n", + " | \n", + "\n", + " 54.22\n", + " | \n", + "\n", + " 41.63\n", + " | \n", + "\n", + " 69.40\n", + " | \n", + "\n", + " 67.36\n", + " | \n", + "\n", + " 65.13\n", + " | \n", + "\n", + " 70.07\n", + " | \n", + "\n", + " 5.96/17.13\n", + " | \n", + "\n", + " 51.19\n", + " | \n", + "\n", + " 39.68\n", + " | \n", + "
3L1024H | \n", + "\n", + " HFL/RBTL3, Chinese\n", + " | \n", + "\n", + " 66.63\n", + " | \n", + "\n", + " 71.11\n", + " | \n", + "\n", + " 56.14\n", + " | \n", + "\n", + " 59.56\n", + " | \n", + "\n", + " 76.41\n", + " | \n", + "\n", + " 71.29\n", + " | \n", + "\n", + " 69.74\n", + " | \n", + "\n", + " 76.93\n", + " | \n", + "\n", + " 58.50/80.90\n", + " | \n", + "\n", + " 71.03\n", + " | \n", + "\n", + " 55.56\n", + " | \n", + "
3L768H | \n", + "\n", + " HFL/RBT3, Chinese\n", + " | \n", + "\n", + " 65.72\n", + " | \n", + "\n", + " 70.95\n", + " | \n", + "\n", + " 55.53\n", + " | \n", + "\n", + " 59.18\n", + " | \n", + "\n", + " 76.20\n", + " | \n", + "\n", + " 70.71\n", + " | \n", + "\n", + " 67.11\n", + " | \n", + "\n", + " 76.63\n", + " | \n", + "\n", + " 55.73/78.63\n", + " | \n", + "\n", + " 70.26\n", + " | \n", + "\n", + " 54.93\n", + " | \n", + "
2L128H | \n", + "\n", + " UER/Chinese-RoBERTa-Tiny\n", + " | \n", + "\n", + " 44.45\n", + " | \n", + "\n", + " 69.02\n", + " | \n", + "\n", + " 51.47\n", + " | \n", + "\n", + " 20.28\n", + " | \n", + "\n", + " 59.95\n", + " | \n", + "\n", + " 57.73\n", + " | \n", + "\n", + " 63.82\n", + " | \n", + "\n", + " 67.43\n", + " | \n", + "\n", + " 3.08/14.33\n", + " | \n", + "\n", + " 23.57\n", + " | \n", + "\n", + " 28.12\n", + " | \n", + "
\n", + " | \n", + " |
\n", + " | \n", + " |
\n", + " | \n", + " |
\n", + " Arch\n", + " | \n", + "\n", + " Model\n", + " | \n", + "\n", + " AVG\n", + " | \n", + "\n", + " AFQMC\n", + " | \n", + "\n", + " TNEWS\n", + " | \n", + "\n", + " IFLYTEK\n", + " | \n", + "\n", + " CMNLI\n", + " | \n", + "\n", + " OCNLI\n", + " | \n", + "\n", + " CLUEWSC2020\n", + " | \n", + "\n", + " CSL\n", + " | \n", + "\n", + " CMRC2018\n", + " | \n", + "\n", + " CHID\n", + " | \n", + "\n", + " C3\n", + " | \n", + "
24L1024H | \n", + "\n", + " ERNIE 1.0-Large-cw\n", + " | \n", + "\n", + " 79.03\n", + " | \n", + "\n", + " 75.97\n", + " | \n", + "\n", + " 59.65\n", + " | \n", + "\n", + " 62.91\n", + " | \n", + "\n", + " 85.09\n", + " | \n", + "\n", + " 81.73\n", + " | \n", + "\n", + " 93.09\n", + " | \n", + "\n", + " 84.53\n", + " | \n", + "\n", + " 74.22/91.88\n", + " | \n", + "\n", + " 88.57\n", + " | \n", + "\n", + " 84.54\n", + " | \n", + "
\n", + " ERNIE 2.0-Large-zh\n", + " | \n", + "\n", + " 76.90\n", + " | \n", + "\n", + " 76.23\n", + " | \n", + "\n", + " 59.33\n", + " | \n", + "\n", + " 61.91\n", + " | \n", + "\n", + " 83.85\n", + " | \n", + "\n", + " 79.93\n", + " | \n", + "\n", + " 89.82\n", + " | \n", + "\n", + " 83.23\n", + " | \n", + "\n", + " 70.95/90.31\n", + " | \n", + "\n", + " 86.78\n", + " | \n", + "\n", + " 78.12\n", + " | \n", + "|
\n", + " RoBERTa-wwm-ext-large\n", + " | \n", + "\n", + " 76.61\n", + " | \n", + "\n", + " 76.00\n", + " | \n", + "\n", + " 59.33\n", + " | \n", + "\n", + " 62.02\n", + " | \n", + "\n", + " 83.88\n", + " | \n", + "\n", + " 78.81\n", + " | \n", + "\n", + " 90.79\n", + " | \n", + "\n", + " 83.67\n", + " | \n", + "\n", + " 70.58/89.82\n", + " | \n", + "\n", + " 85.72\n", + " | \n", + "\n", + " 75.26\n", + " | \n", + "|
20L1024H | \n", + "\n", + " ERNIE 3.0-Xbase-zh\n", + " | \n", + "\n", + " 78.39\n", + " | \n", + "\n", + " 76.16\n", + " | \n", + "\n", + " 59.55\n", + " | \n", + "\n", + " 61.87\n", + " | \n", + "\n", + " 84.40\n", + " | \n", + "\n", + " 81.73\n", + " | \n", + "\n", + " 88.82\n", + " | \n", + "\n", + " 83.60\n", + " | \n", + "\n", + " 75.99/93.00\n", + " | \n", + "\n", + " 86.78\n", + " | \n", + "\n", + " 84.98\n", + " | \n", + "
12L768H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Base-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 76.05\n", + " | \n", + "\n", + " 75.93\n", + " | \n", + "\n", + " 58.26\n", + " | \n", + "\n", + " 61.56\n", + " | \n", + "\n", + " 83.02\n", + " | \n", + "\n", + " 80.10\n", + " | \n", + "\n", + " 86.18\n", + " | \n", + "\n", + " 82.63\n", + " | \n", + "\n", + " 70.71/90.41\n", + " | \n", + "\n", + " 84.26\n", + " | \n", + "\n", + " 77.88\n", + " | \n", + "
\n", + " ERNIE 1.0-Base-zh-cw\n", + " | \n", + "\n", + " 76.47\n", + " | \n", + "\n", + " 76.07\n", + " | \n", + "\n", + " 57.86\n", + " | \n", + "\n", + " 59.91\n", + " | \n", + "\n", + " 83.41\n", + " | \n", + "\n", + " 79.58\n", + " | \n", + "\n", + " 89.91\n", + " | \n", + "\n", + " 83.42\n", + " | \n", + "\n", + " 72.88/90.78\n", + " | \n", + "\n", + " 84.68\n", + " | \n", + "\n", + " 76.98\n", + " | \n", + "|
\n", + " ERNIE-Gram-zh\n", + " | \n", + "\n", + " 75.72\n", + " | \n", + "\n", + " 75.28\n", + " | \n", + "\n", + " 57.88\n", + " | \n", + "\n", + " 60.87\n", + " | \n", + "\n", + " 82.90\n", + " | \n", + "\n", + " 79.08\n", + " | \n", + "\n", + " 88.82\n", + " | \n", + "\n", + " 82.83\n", + " | \n", + "\n", + " 71.82/90.38\n", + " | \n", + "\n", + " 84.04\n", + " | \n", + "\n", + " 73.69\n", + " | \n", + "|
\n", + " Langboat/Mengzi-BERT-Base\n", + " | \n", + "\n", + " 74.69\n", + " | \n", + "\n", + " 75.35\n", + " | \n", + "\n", + " 57.76\n", + " | \n", + "\n", + " 61.64\n", + " | \n", + "\n", + " 82.41\n", + " | \n", + "\n", + " 77.93\n", + " | \n", + "\n", + " 88.16\n", + " | \n", + "\n", + " 82.20\n", + " | \n", + "\n", + " 67.04/88.35\n", + " | \n", + "\n", + " 83.74\n", + " | \n", + "\n", + " 70.70\n", + " | \n", + "|
\n", + " ERNIE 2.0-Base-zh\n", + " | \n", + "\n", + " 74.32\n", + " | \n", + "\n", + " 75.65\n", + " | \n", + "\n", + " 58.25\n", + " | \n", + "\n", + " 61.64\n", + " | \n", + "\n", + " 82.62\n", + " | \n", + "\n", + " 78.71\n", + " | \n", + "\n", + " 81.91\n", + " | \n", + "\n", + " 82.33\n", + " | \n", + "\n", + " 66.08/87.46\n", + " | \n", + "\n", + " 82.78\n", + " | \n", + "\n", + " 73.19\n", + " | \n", + "|
\n", + " ERNIE 1.0-Base-zh\n", + " | \n", + "\n", + " 74.17\n", + " | \n", + "\n", + " 74.84\n", + " | \n", + "\n", + " 58.91\n", + " | \n", + "\n", + " 62.25\n", + " | \n", + "\n", + " 81.68\n", + " | \n", + "\n", + " 76.58\n", + " | \n", + "\n", + " 85.20\n", + " | \n", + "\n", + " 82.77\n", + " | \n", + "\n", + " 67.32/87.83\n", + " | \n", + "\n", + " 82.47\n", + " | \n", + "\n", + " 69.68\n", + " | \n", + "|
\n", + " RoBERTa-wwm-ext\n", + " | \n", + "\n", + " 74.11\n", + " | \n", + "\n", + " 74.60\n", + " | \n", + "\n", + " 58.08\n", + " | \n", + "\n", + " 61.23\n", + " | \n", + "\n", + " 81.11\n", + " | \n", + "\n", + " 76.92\n", + " | \n", + "\n", + " 88.49\n", + " | \n", + "\n", + " 80.77\n", + " | \n", + "\n", + " 68.39/88.50\n", + " | \n", + "\n", + " 83.43\n", + " | \n", + "\n", + " 68.03\n", + " | \n", + "|
\n", + " BERT-Base-Chinese\n", + " | \n", + "\n", + " 72.57\n", + " | \n", + "\n", + " 74.63\n", + " | \n", + "\n", + " 57.13\n", + " | \n", + "\n", + " 61.29\n", + " | \n", + "\n", + " 80.97\n", + " | \n", + "\n", + " 75.22\n", + " | \n", + "\n", + " 81.91\n", + " | \n", + "\n", + " 81.90\n", + " | \n", + "\n", + " 65.30/86.53\n", + " | \n", + "\n", + " 82.01\n", + " | \n", + "\n", + " 65.38\n", + " | \n", + "|
\n", + " UER/Chinese-RoBERTa-Base\n", + " | \n", + "\n", + " 71.78\n", + " | \n", + "\n", + " 72.89\n", + " | \n", + "\n", + " 57.62\n", + " | \n", + "\n", + " 61.14\n", + " | \n", + "\n", + " 80.01\n", + " | \n", + "\n", + " 75.56\n", + " | \n", + "\n", + " 81.58\n", + " | \n", + "\n", + " 80.80\n", + " | \n", + "\n", + " 63.87/84.95\n", + " | \n", + "\n", + " 81.52\n", + " | \n", + "\n", + " 62.76\n", + " | \n", + "|
8L512H | \n", + "\n", + " UER/Chinese-RoBERTa-Medium\n", + " | \n", + "\n", + " 67.06\n", + " | \n", + "\n", + " 70.64\n", + " | \n", + "\n", + " 56.10\n", + " | \n", + "\n", + " 58.29\n", + " | \n", + "\n", + " 77.35\n", + " | \n", + "\n", + " 71.90\n", + " | \n", + "\n", + " 68.09\n", + " | \n", + "\n", + " 78.63\n", + " | \n", + "\n", + " 57.63/78.91\n", + " | \n", + "\n", + " 75.13\n", + " | \n", + "\n", + " 56.84\n", + " | \n", + "
6L768H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Medium-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 72.49\n", + " | \n", + "\n", + " 73.37\n", + " | \n", + "\n", + " 57.00\n", + " | \n", + "\n", + " 60.67\n", + " | \n", + "\n", + " 80.64\n", + " | \n", + "\n", + " 76.88\n", + " | \n", + "\n", + " 79.28\n", + " | \n", + "\n", + " 81.60\n", + " | \n", + "\n", + " 65.83/87.30\n", + " | \n", + "\n", + " 79.91\n", + " | \n", + "\n", + " 69.73\n", + " | \n", + "
\n", + " HLF/RBT6, Chinese\n", + " | \n", + "\n", + " 70.06\n", + " | \n", + "\n", + " 73.45\n", + " | \n", + "\n", + " 56.82\n", + " | \n", + "\n", + " 59.64\n", + " | \n", + "\n", + " 79.36\n", + " | \n", + "\n", + " 73.32\n", + " | \n", + "\n", + " 76.64\n", + " | \n", + "\n", + " 80.67\n", + " | \n", + "\n", + " 62.72/84.77\n", + " | \n", + "\n", + " 78.17\n", + " | \n", + "\n", + " 59.85\n", + " | \n", + "|
\n", + " TinyBERT6, Chinese\n", + " | \n", + "\n", + " 69.62\n", + " | \n", + "\n", + " 72.22\n", + " | \n", + "\n", + " 55.70\n", + " | \n", + "\n", + " 54.48\n", + " | \n", + "\n", + " 79.12\n", + " | \n", + "\n", + " 74.07\n", + " | \n", + "\n", + " 77.63\n", + " | \n", + "\n", + " 80.17\n", + " | \n", + "\n", + " 63.03/83.75\n", + " | \n", + "\n", + " 77.64\n", + " | \n", + "\n", + " 62.11\n", + " | \n", + "|
\n", + " RoFormerV2 Small\n", + " | \n", + "\n", + " 68.52\n", + " | \n", + "\n", + " 72.47\n", + " | \n", + "\n", + " 56.53\n", + " | \n", + "\n", + " 60.72\n", + " | \n", + "\n", + " 76.37\n", + " | \n", + "\n", + " 72.95\n", + " | \n", + "\n", + " 75.00\n", + " | \n", + "\n", + " 81.07\n", + " | \n", + "\n", + " 62.97/83.64\n", + " | \n", + "\n", + " 67.66\n", + " | \n", + "\n", + " 59.41\n", + " | \n", + "|
\n", + " UER/Chinese-RoBERTa-L6-H768\n", + " | \n", + "\n", + " 67.09\n", + " | \n", + "\n", + " 70.13\n", + " | \n", + "\n", + " 56.54\n", + " | \n", + "\n", + " 60.48\n", + " | \n", + "\n", + " 77.49\n", + " | \n", + "\n", + " 72.00\n", + " | \n", + "\n", + " 72.04\n", + " | \n", + "\n", + " 77.33\n", + " | \n", + "\n", + " 53.74/75.52\n", + " | \n", + "\n", + " 76.73\n", + " | \n", + "\n", + " 54.40\n", + " | \n", + "|
6L384H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Mini-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 66.90\n", + " | \n", + "\n", + " 71.85\n", + " | \n", + "\n", + " 55.24\n", + " | \n", + "\n", + " 54.48\n", + " | \n", + "\n", + " 77.19\n", + " | \n", + "\n", + " 73.08\n", + " | \n", + "\n", + " 71.05\n", + " | \n", + "\n", + " 79.30\n", + " | \n", + "\n", + " 58.53/81.97\n", + " | \n", + "\n", + " 69.71\n", + " | \n", + "\n", + " 58.60\n", + " | \n", + "
4L768H | \n", + "\n", + " HFL/RBT4, Chinese\n", + " | \n", + "\n", + " 67.42\n", + " | \n", + "\n", + " 72.41\n", + " | \n", + "\n", + " 56.50\n", + " | \n", + "\n", + " 58.95\n", + " | \n", + "\n", + " 77.34\n", + " | \n", + "\n", + " 70.78\n", + " | \n", + "\n", + " 71.05\n", + " | \n", + "\n", + " 78.23\n", + " | \n", + "\n", + " 59.30/81.93\n", + " | \n", + "\n", + " 73.18\n", + " | \n", + "\n", + " 56.45\n", + " | \n", + "
4L512H | \n", + "\n", + " UER/Chinese-RoBERTa-Small\n", + " | \n", + "\n", + " 63.25\n", + " | \n", + "\n", + " 69.21\n", + " | \n", + "\n", + " 55.41\n", + " | \n", + "\n", + " 57.552\n", + " | \n", + "\n", + " 73.64\n", + " | \n", + "\n", + " 69.80\n", + " | \n", + "\n", + " 66.78\n", + " | \n", + "\n", + " 74.83\n", + " | \n", + "\n", + " 46.75/69.69\n", + " | \n", + "\n", + " 67.59\n", + " | \n", + "\n", + " 50.92\n", + " | \n", + "
4L384H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Micro-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 64.21\n", + " | \n", + "\n", + " 71.15\n", + " | \n", + "\n", + " 55.05\n", + " | \n", + "\n", + " 53.83\n", + " | \n", + "\n", + " 74.81\n", + " | \n", + "\n", + " 70.41\n", + " | \n", + "\n", + " 69.08\n", + " | \n", + "\n", + " 76.50\n", + " | \n", + "\n", + " 53.77/77.82\n", + " | \n", + "\n", + " 62.26\n", + " | \n", + "\n", + " 55.53\n", + " | \n", + "
4L312H | \n", + "\n", + " \n", + " \n", + " ERNIE 3.0-Nano-zh\n", + " \n", + " \n", + " | \n", + "\n", + " 62.97\n", + " | \n", + "\n", + " 70.51\n", + " | \n", + "\n", + " 54.57\n", + " | \n", + "\n", + " 48.36\n", + " | \n", + "\n", + " 74.97\n", + " | \n", + "\n", + " 70.61\n", + " | \n", + "\n", + " 68.75\n", + " | \n", + "\n", + " 75.93\n", + " | \n", + "\n", + " 52.00/76.35\n", + " | \n", + "\n", + " 58.91\n", + " | \n", + "\n", + " 55.11\n", + " | \n", + "
\n", + " TinyBERT4, Chinese\n", + " | \n", + "\n", + " 60.82\n", + " | \n", + "\n", + " 69.07\n", + " | \n", + "\n", + " 54.02\n", + " | \n", + "\n", + " 39.71\n", + " | \n", + "\n", + " 73.94\n", + " | \n", + "\n", + " 69.59\n", + " | \n", + "\n", + " 70.07\n", + " | \n", + "\n", + " 75.07\n", + " | \n", + "\n", + " 46.04/69.34\n", + " | \n", + "\n", + " 58.53\n", + " | \n", + "\n", + " 52.18\n", + " | \n", + "|
4L256H | \n", + "\n", + " UER/Chinese-RoBERTa-Mini\n", + " | \n", + "\n", + " 53.40\n", + " | \n", + "\n", + " 69.32\n", + " | \n", + "\n", + " 54.22\n", + " | \n", + "\n", + " 41.63\n", + " | \n", + "\n", + " 69.40\n", + " | \n", + "\n", + " 67.36\n", + " | \n", + "\n", + " 65.13\n", + " | \n", + "\n", + " 70.07\n", + " | \n", + "\n", + " 5.96/17.13\n", + " | \n", + "\n", + " 51.19\n", + " | \n", + "\n", + " 39.68\n", + " | \n", + "
3L1024H | \n", + "\n", + " HFL/RBTL3, Chinese\n", + " | \n", + "\n", + " 66.63\n", + " | \n", + "\n", + " 71.11\n", + " | \n", + "\n", + " 56.14\n", + " | \n", + "\n", + " 59.56\n", + " | \n", + "\n", + " 76.41\n", + " | \n", + "\n", + " 71.29\n", + " | \n", + "\n", + " 69.74\n", + " | \n", + "\n", + " 76.93\n", + " | \n", + "\n", + " 58.50/80.90\n", + " | \n", + "\n", + " 71.03\n", + " | \n", + "\n", + " 55.56\n", + " | \n", + "
3L768H | \n", + "\n", + " HFL/RBT3, Chinese\n", + " | \n", + "\n", + " 65.72\n", + " | \n", + "\n", + " 70.95\n", + " | \n", + "\n", + " 55.53\n", + " | \n", + "\n", + " 59.18\n", + " | \n", + "\n", + " 76.20\n", + " | \n", + "\n", + " 70.71\n", + " | \n", + "\n", + " 67.11\n", + " | \n", + "\n", + " 76.63\n", + " | \n", + "\n", + " 55.73/78.63\n", + " | \n", + "\n", + " 70.26\n", + " | \n", + "\n", + " 54.93\n", + " | \n", + "
2L128H | \n", + "\n", + " UER/Chinese-RoBERTa-Tiny\n", + " | \n", + "\n", + " 44.45\n", + " | \n", + "\n", + " 69.02\n", + " | \n", + "\n", + " 51.47\n", + " | \n", + "\n", + " 20.28\n", + " | \n", + "\n", + " 59.95\n", + " | \n", + "\n", + " 57.73\n", + " | \n", + "\n", + " 63.82\n", + " | \n", + "\n", + " 67.43\n", + " | \n", + "\n", + " 3.08/14.33\n", + " | \n", + "\n", + " 23.57\n", + " | \n", + "\n", + " 28.12\n", + " | \n", + "
\n", + " \n", + "
\n", + "\n", + "# 4.Model Principles\n", + "\n", + "### Online Distillation Technology\n", + "\n", + "In the process of model learning, online distillation technology periodically transmits knowledge signals to several student models for simultaneous training, thereby producing student models of multiple sizes at one time in the distillation stage. Compared with the traditional distillation technology, this technology greatly saves the computing power consumption caused by the extra distillation calculation of the large model and the repeated knowledge transfer of multiple students.\n", + "\n", + "This novel distillation method takes advantage of the scale advantage of the Wenxin large model, and ensures the effect and size richness of the student model after the distillation is completed, which is convenient for application scenarios with different performance requirements. In addition, due to the huge gap between the model size of the Wenxin model and the student model, the model distillation is extremely difficult or even easy to fail. To this end, by introducing the technology of the teaching assistant model for distillation, the teaching assistant is used as a bridge for knowledge transfer to shorten the problem that the expression space between the student model and the large model is too large, thereby promoting the improvement of distillation efficiency.\n", + "\n", + "For more technical details, please refer to the paper:\n", + "- [ERNIE-Tiny: A Progressive Distillation Framework for Pretrained Transformer Compression](https://arxiv.org/abs/2106.02241)\n", + "- [ERNIE 3.0 Titan: Exploring Larger-scale Knowledge Enhanced Pre-training for Language Understanding and Generation](https://arxiv.org/abs/2112.12731)\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "\n", + "# 5.Related papers and citations\n", + "\n", + "\n", + "```text\n", + "@article{sun2021ernie,\n", + " title={Ernie 3.0: Large-scale knowledge enhanced pre-training for language understanding and generation},\n", + " author={Sun, Yu and Wang, Shuohuan and Feng, Shikun and Ding, Siyu and Pang, Chao and Shang, Junyuan and Liu, Jiaxiang and Chen, Xuyi and Zhao, Yanbin and Lu, Yuxiang and others},\n", + " journal={arXiv preprint arXiv:2107.02137},\n", + " year={2021}\n", + "}\n", + "\n", + "@article{su2021ernie,\n", + " title={Ernie-tiny: A progressive distillation framework for pretrained transformer compression},\n", + " author={Su, Weiyue and Chen, Xuyi and Feng, Shikun and Liu, Jiaxiang and Liu, Weixin and Sun, Yu and Tian, Hao and Wu, Hua and Wang, Haifeng},\n", + " journal={arXiv preprint arXiv:2106.02241},\n", + " year={2021}\n", + "}\n", + "\n", + "@article{wang2021ernie,\n", + " title={Ernie 3.0 titan: Exploring larger-scale knowledge enhanced pre-training for language understanding and generation},\n", + " author={Wang, Shuohuan and Sun, Yu and Xiang, Yang and Wu, Zhihua and Ding, Siyu and Gong, Weibao and Feng, Shikun and Shang, Junyuan and Zhao, Yanbin and Pang, Chao and others},\n", + " journal={arXiv preprint arXiv:2112.12731},\n", + " year={2021}\n", + "}\n", + "```\n", + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "py35-paddle1.2.0" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- GitLab