Merge pull request #49 from wangxiao1021/api

add examples

Merge pull request #49 from wangxiao1021/api
add examples
34f5dbdc · Xiaoyao Xi · GitHub · b44960b1 · 21113d81 · 34f5dbdc
38 changed file
--- a/README.md
+++ b/README.md
@@ -6,6 +6,91 @@ PaddlePALM also provides state-of-the-art general purpose architectures (BERT,ER

 然后给出一些成功案例和一些公开数据集的各个backbone的实验结果（BERT、ERNIE、RoBERTa）和一些成功的多任务学习示例。

+<table>
+  <tbody>
+    <tr>
+      <th><strong>Dataset</strong>
+        <br></th>
+      <th colspan="3"><center><strong>chnsenticorp</strong></center></th>
+      <th colspan="3"><center><strong>Quora Question Pairs matching</strong><center></th>
+      <th colspan="3"><strong>MSRA-NER<br>(SIGHAN2006)</strong></th>
+      <th colspan="2"><strong>CMRC2018</strong></th>
+    </tr>
+    <tr>
+      <td rowspan="2">
+        <p>
+          <strong>Metric</strong>
+          <br></p>
+      </td>
+      <td colspan="1">
+        <center><strong>precision</strong></center>
+        <br></td>
+      <td colspan="1">
+        <strong>recall</strong>
+        <br></td>
+      <td colspan="1">
+        <strong>f1-score</strong>
+        <strong></strong>
+        <br></td>
+      <td colspan="1">
+        <center><strong>precision</strong></center>
+        <br></td>
+      <td colspan="1">
+        <strong>recall</strong>
+        <br></td>
+      <td colspan="1">
+        <strong>f1-score</strong>
+        <strong></strong>
+        <br></td>
+      <td colspan="1">
+        <center><strong>precision</strong></center>
+        <br></td>
+      <td colspan="1">
+        <strong>recall</strong>
+        <br></td>
+      <td colspan="1">
+        <strong>f1-score</strong>
+        <strong></strong>
+        <br></td>
+      <td colspan="1">
+        <strong>em</strong>
+        <br></td>
+      <td colspan="1">
+        <strong>f1-score</strong>
+        <br></td>
+    </tr>
+    <tr>
+      <td colspan="3" width="">
+        <strong>test</strong>
+        <br></td>
+      <td colspan="3" width="">
+        <strong>test</strong>
+        <br></td>
+      <td colspan="3" width="">
+        <strong>test</strong>
+        <br></td>
+      <td colspan="2" width="">
+        <strong>dev</strong>
+        <br></td>
+    </tr>
+    <tr>
+      <td><strong>ERNIE Base</strong></td>
+      <td>95.7</td>
+      <td>95.0</td>
+      <td>95.7</td>
+      <td>85.8</td>
+      <td>82.4</td>
+      <td>81.5</td>
+      <td>94.9</td>
+      <td>94.5</td>
+      <td>94.7</td>
+      <td>96.3</td>
+      <td>84.0</td>
+    </tr>
+
+  </tbody>
+</table>
+

 ## Package Overview


--- a/download_models.py
+++ b/download_models.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import paddlepalm as palm
-import sys
-import argparse
- 
-# create parser
-parser = argparse.ArgumentParser(prog='download_models.py', usage='python %(prog)s -l | -d <model_name> [-h]\n\nFor example,\n\tpython %(prog)s -d bert-en-uncased-large ',description = 'Download pretrain models for initializing params of backbones. ')
-parser1= parser.add_argument_group("required arguments")
-parser1.add_argument('-l','--list', action = 'store_true', help = 'show the list of available pretrain models', default = False)
-parser1.add_argument('-d','--download', action = 'store', help = 'download pretrain models. The available pretrain models can be listed by run "python download_models.py -l"') 
-args = parser.parse_args()
-
-if(args.list):
-  palm.downloader.ls('pretrain')
-elif(args.download):
-  print('download~~~')
-  print(args.download)
-  palm.downloader.download('pretrain', args.download)
-else:
-  print (parser.parse_args(['-h']))
--- a/examples/classification/README.md
+++ b/examples/classification/README.md
+## Examples 1: Classification
+This task is a sentiment analysis task. The following sections detail model preparation, dataset preparation, and how to run the task.
+
+### Step 1: Prepare Pre-trained Models & Datasets
+
+#### Pre-trianed Model
+
+The pre-training model of this mission is: [ernie-zh-base](https://github.com/PaddlePaddle/PALM/tree/r0.3-api).
+
+Make sure you have downloaded the required pre-training model in the current folder.
+
+
+#### Dataset
+
+This task uses the `chnsenticorp` dataset. 
+
+Download dataset:
+```shell
+python download.py
+```
+
+If everything goes well, there will be a folder named `data/`  created with all the datas in it.
+
+The data should have 2 fields,  `label  text_a`, with tsv format. Here is some example datas:
+
+```
+label  text_a
+0   当当网名不符实，订货多日不见送货，询问客服只会推托，只会要求用户再下订单。如此服务留不住顾客的。去别的网站买书服务更好。
+0   XP的驱动不好找！我的17号提的货，现在就降价了100元，而且还送杀毒软件！
+1   <荐书> 推荐所有喜欢<红楼>的红迷们一定要收藏这本书,要知道当年我听说这本书的时候花很长时间去图书馆找和借都没能如愿,所以这次一看到当当有,马上买了,红迷们也要记得备货哦!
+```
+
+### Step 2: Train & Predict
+
+The code used to perform classification task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run:
+
+```shell
+python run.py
+```
+
+If you want to specify a specific gpu or use multiple gpus for training, please use **`CUDA_VISIBLE_DEVICES`**, for example:
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2 python run.py
+```
+
+
+Some logs will be shown below:
+
+```
+step 1/154 (epoch 0), loss: 5.512, speed: 0.51 steps/s
+step 2/154 (epoch 0), loss: 2.595, speed: 3.36 steps/s
+step 3/154 (epoch 0), loss: 1.798, speed: 3.48 steps/s
+```
+
+
+After the run, you can view the saved models in the `outputs/` folder and the predictions in the `outputs/predict` folder. Here are some examples of predictions:
+
+
+```
+{"index": 0, "logits": [-0.2014336884021759, 0.6799028515815735], "probs": [0.29290086030960083, 0.7070990800857544], "label": 1}
+{"index": 1, "logits": [0.8593899011611938, -0.29743513464927673], "probs": [0.7607553601264954, 0.23924466967582703], "label": 0}
+{"index": 2, "logits": [0.7462944388389587, -0.7083730101585388], "probs": [0.8107157349586487, 0.18928426504135132], "label": 0}
+```
+
+### Step 3: Evaluate
+
+Once you have the prediction, you can run the evaluation script to evaluate the model:
+
+```shell
+python evaluate.py
+```
+
+The evaluation results are as follows:
+
+```
+data num: 1200
+precision: 0.956666666667, recall: 0.949013157895, f1: 0.95688225039
+```
--- a/examples/classification/download.py
+++ b/examples/classification/download.py
+#  -*- coding: utf-8 -*-
+
+import os
+import requests
+import tarfile
+import shutil
+from tqdm import tqdm
+
+
+def download(src, url):
+    file_size = int(requests.head(url).headers['Content-Length'])
+
+    header = {
+        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
+        '70.0.3538.67 Safari/537.36'
+    }
+    pbar = tqdm(total=file_size)
+    resp = requests.get(url, headers=header, stream=True)
+
+    with open(src, 'ab') as f:
+        for chunk in resp.iter_content(chunk_size=1024):
+            if chunk:
+                f.write(chunk)
+                pbar.update(1024)
+
+    pbar.close()
+    return file_size
+
+
+abs_path = os.path.abspath(__file__)
+download_url = "https://ernie.bj.bcebos.com/task_data_zh.tgz"
+downlaod_path = os.path.join(os.path.dirname(abs_path), "task_data_zh.tgz")
+target_dir = os.path.dirname(abs_path)
+download(downlaod_path, download_url)
+
+tar = tarfile.open(downlaod_path)
+tar.extractall(target_dir)
+os.remove(downlaod_path)
+
+abs_path = os.path.abspath(__file__)
+dst_dir = os.path.join(os.path.dirname(abs_path), "data")
+if not os.path.exists(dst_dir) or not os.path.isdir(dst_dir):
+    os.makedirs(dst_dir)
+
+for file in os.listdir(os.path.join(target_dir, 'task_data', 'chnsenticorp')):
+    shutil.move(os.path.join(target_dir, 'task_data', 'chnsenticorp', file), dst_dir)
+
+shutil.rmtree(os.path.join(target_dir, 'task_data'))
+
+
--- a/examples/classification/evaluate.py
+++ b/examples/classification/evaluate.py
+#  -*- coding: utf-8 -*-
+
+import json
+import numpy as np
+
+def accuracy(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels) 
+    return (preds == labels).mean()
+
+def f1(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels)
+    tp = np.sum((labels == '1') & (preds == '1'))
+    tn = np.sum((labels == '0') & (preds == '0'))
+    fp = np.sum((labels == '0') & (preds == '1'))
+    fn = np.sum((labels == '1') & (preds == '0'))
+    p = tp * 1.0 / (tp + fp) 
+    r = tp * 1.0 / (tp + fn) * 1.0
+    f1 = (2 * p * r) / (p + r + 1e-8)
+    return f1
+  
+def recall(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels)
+    # recall=TP/(TP+FN)
+    tp = np.sum((labels == '1') & (preds == '1'))
+    fn = np.sum((labels == '1') & (preds == '0'))
+    re = tp * 1.0 / (tp + fn)
+    return re
+
+
+def res_evaluate(res_dir="./outputs/predict/predictions.json", eval_phase='test'):
+    if eval_phase == 'test':
+        data_dir="./data/test.tsv"
+    elif eval_phase == 'dev':
+        data_dir="./data/dev.tsv"
+    else:
+        assert eval_phase in ['dev', 'test'], 'eval_phase should be dev or test'
+    
+    labels = []
+    with open(data_dir, "r") as file:
+        first_flag = True
+        for line in file:
+            line = line.split("\t")
+            label = line[0]
+            if label=='label':
+                continue
+            labels.append(str(label))
+    file.close()
+
+    preds = []
+    with open(res_dir, "r") as file:
+        for line in file.readlines():
+            line = json.loads(line)
+            pred = line['label']
+            preds.append(str(pred))
+    file.close()
+    assert len(labels) == len(preds), "prediction result doesn't match to labels"
+    print('data num: {}'.format(len(labels)))
+    print("precision: {}, recall: {}, f1: {}".format(accuracy(preds, labels), recall(preds, labels), f1(preds, labels)))
+
+res_evaluate()
--- a/examples/classification/run.py
+++ b/examples/classification/run.py
+# coding=utf-8
+import paddlepalm as palm
+import json
+from paddlepalm.distribute import gpu_dev_count
+
+
+if __name__ == '__main__':
+
+    # configs
+    max_seqlen = 256
+    batch_size = 8
+    num_epochs = 10
+    lr = 5e-5
+    weight_decay = 0.01
+    vocab_path = './pretrain/ernie-zh-base/vocab.txt'
+
+    train_file = './data/train.tsv'
+    predict_file = './data/test.tsv'
+    config = json.load(open('./pretrain/ernie-zh-base/ernie_config.json'))
+    input_dim = config['hidden_size']
+    num_classes = 2
+    dropout_prob = 0.1
+    random_seed = 1
+    task_name = 'chnsenticorp'
+    save_path = './outputs/'
+    pred_output = './outputs/predict/'
+    save_type = 'ckpt'
+    print_steps = 20
+    pre_params = './pretrain/ernie-zh-base/params'
+
+    # -----------------------  for training ----------------------- 
+
+    # step 1-1: create readers for training
+    cls_reader = palm.reader.ClassifyReader(vocab_path, max_seqlen, seed=random_seed)
+    # step 1-2: load the training data
+    cls_reader.load_data(train_file, batch_size, num_epochs=num_epochs)
+
+    # step 2: create a backbone of the model to extract text features
+    ernie = palm.backbone.ERNIE.from_config(config)
+
+    # step 3: register the backbone in reader
+    cls_reader.register_with(ernie)
+
+    # step 4: create the task output head
+    cls_head = palm.head.Classify(num_classes, input_dim, dropout_prob)
+
+    # step 5-1: create a task trainer
+    trainer = palm.Trainer(task_name)
+    # step 5-2: build forward graph with backbone and task head
+    loss_var = trainer.build_forward(ernie, cls_head)
+
+    # step 6-1*: use warmup
+    n_steps = cls_reader.num_examples * num_epochs // batch_size
+    warmup_steps = int(0.1 * n_steps)
+    sched = palm.lr_sched.TriangularSchedualer(warmup_steps, n_steps)
+    # step 6-2: create a optimizer
+    adam = palm.optimizer.Adam(loss_var, lr, sched)
+    # step 6-3: build backward
+    trainer.build_backward(optimizer=adam, weight_decay=weight_decay)
+  
+    # step 7: fit prepared reader and data
+    trainer.fit_reader(cls_reader)
+    
+    # step 8-1*: load pretrained parameters
+    trainer.load_pretrain(pre_params)
+    # step 8-2*: set saver to save model
+    # save_steps = n_steps // gpu_dev_count - batch_size
+    save_steps = 2396
+    trainer.set_saver(save_steps=save_steps, save_path=save_path, save_type=save_type)
+    # step 8-3: start training
+    trainer.train(print_steps=print_steps)
+   
+    # -----------------------  for prediction ----------------------- 
+
+    # step 1-1: create readers for prediction
+    print('prepare to predict...')
+    predict_cls_reader = palm.reader.ClassifyReader(vocab_path, max_seqlen, seed=random_seed, phase='predict')
+    # step 1-2: load the training data
+    predict_cls_reader.load_data(predict_file, batch_size)
+    
+    # step 2: create a backbone of the model to extract text features
+    pred_ernie = palm.backbone.ERNIE.from_config(config, phase='predict')
+
+    # step 3: register the backbone in reader
+    predict_cls_reader.register_with(pred_ernie)
+    
+    # step 4: create the task output head
+    cls_pred_head = palm.head.Classify(num_classes, input_dim, phase='predict')
+    
+    # step 5: build forward graph with backbone and task head
+    trainer.build_predict_forward(pred_ernie, cls_pred_head)
+ 
+    # step 6: load pretrained model
+    # model_path = './outputs/ckpt.step'+str(save_steps)
+    model_path = './outputs/ckpt.step'+str(11980)
+    pred_ckpt = trainer.load_ckpt(model_path)
+
+    # step 7: fit prepared reader and data
+    trainer.fit_reader(predict_cls_reader, phase='predict')
+
+    # step 8: predict
+    print('predicting..')
+    trainer.predict(print_steps=print_steps, output_dir=pred_output)
--- a/examples/matching/README.md
+++ b/examples/matching/README.md
+## Examples 2: Mathing
+This task is a sentence pair matching task. The following sections detail model preparation, dataset preparation, and how to run the task.
+
+### Step 1: Prepare Pre-trained Models & Datasets
+
+#### Pre-trianed Model
+
+The pre-training model of this mission is: [ernie-en-base](https://github.com/PaddlePaddle/PALM/tree/r0.3-api).
+
+Make sure you have downloaded the required pre-training model in the current folder.
+
+
+#### Dataset
+
+This task uses the `Quora Question Pairs matching` dataset. 
+
+Download dataset:
+```shell
+python download.py
+```
+
+After the dataset is downloaded, you should convert the data format for training:
+```shell
+python process.py quora_duplicate_questions.tsv train.tsv test.tsv
+```
+
+If everything goes well, there will be a folder named `data/`  created with all the converted datas in it.
+
+The data should have 3 fields,  `text_a  text_b  label`, with tsv format. Here is some example datas:
+
+```
+text_a  text_b  label
+How can the arrangement of corynebacterium xerosis be described?  How would you describe waves? 0
+How do you fix a Google Play Store account that isn't working?  What can cause the Google Play store to not open? How are such probelms fixed?  1
+Which is the best earphone under 1000?  What are the best earphones under 1k? 1
+What are the differences between the Dell Inspiron 3000, 5000, and 7000 series laptops? "Should I buy an Apple MacBook Pro 15"" or a Dell Inspiron 17 5000 series?" 0
+```
+
+
+
+### Step 2: Train & Predict
+
+The code used to perform classification task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run:
+
+```shell
+python run.py
+```
+
+If you want to specify a specific gpu or use multiple gpus for training, please use **`CUDA_VISIBLE_DEVICES`**, for example:
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2 python run.py
+```
+
+Some logs will be shown below:
+
+```
+step 20/49087 (epoch 0), loss: 1.079, speed: 3.48 steps/s
+step 40/49087 (epoch 0), loss: 1.251, speed: 5.18 steps/s
+step 60/49087 (epoch 0), loss: 1.193, speed: 5.04 steps/s
+```
+
+
+After the run, you can view the saved models in the `outputs/` folder and the predictions in the `outputs/predict` folder. Here are some examples of predictions:
+
+
+```
+{"index": 0, "logits": [-0.32688724994659424, -0.8568955063819885], "probs": [0.629485011100769, 0.3705149292945862], "label": 0}
+{"index": 1, "logits": [-0.2735646963119507, -0.7983021140098572], "probs": [0.6282548904418945, 0.37174513936042786], "label": 0}
+{"index": 2, "logits": [-0.3381381630897522, -0.8614270091056824], "probs": [0.6279165148735046, 0.37208351492881775], "label": 0}
+```
+
+### Step 3: Evaluate
+
+Once you have the prediction, you can run the evaluation script to evaluate the model:
+
+```shell
+python evaluate.py
+```
+
+The evaluation results are as follows:
+
+```
+data_num: 4300
+precision: 0.857906976744, recall: 0.824249846908, f1: 0.81501664653
+```
--- a/examples/matching/download.py
+++ b/examples/matching/download.py
+#  -*- coding: utf-8 -*-
+
+import os
+import requests
+from tqdm import tqdm
+
+
+def download(src, url):
+    file_size = int(requests.head(url).headers['Content-Length'])
+    header = {
+        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
+        '70.0.3538.67 Safari/537.36'
+    }
+    pbar = tqdm(total=file_size)
+    resp = requests.get(url, headers=header, stream=True)
+
+    with open(src, 'ab') as f:
+        for chunk in resp.iter_content(chunk_size=1024):
+            if chunk:
+                f.write(chunk)
+                pbar.update(1024)
+
+    pbar.close()
+    return file_size
+
+
+abs_path = os.path.abspath(__file__)
+data_dir = os.path.join(os.path.dirname(abs_path), "data")
+if not os.path.exists(data_dir) or not os.path.isdir(data_dir):
+    os.makedirs(data_dir)
+
+download_url = "http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv"
+downlaod_path = os.path.join(data_dir, "quora_duplicate_questions.tsv")
+download(downlaod_path, download_url)
--- a/examples/matching/evaluate.py
+++ b/examples/matching/evaluate.py
+#  -*- coding: utf-8 -*-
+
+import json
+import numpy as np
+
+def accuracy(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels) 
+    return (preds == labels).mean()
+
+def f1(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels)
+    tp = np.sum((labels == '1') & (preds == '1'))
+    tn = np.sum((labels == '0') & (preds == '0'))
+    fp = np.sum((labels == '0') & (preds == '1'))
+    fn = np.sum((labels == '1') & (preds == '0'))
+    p = tp * 1.0 / (tp + fp) 
+    r = tp * 1.0 / (tp + fn) * 1.0
+    f1 = (2 * p * r) / (p + r + 1e-8)
+    return f1
+  
+def recall(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels)
+    # recall=TP/(TP+FN)
+    tp = np.sum((labels == '1') & (preds == '1'))
+    fn = np.sum((labels == '1') & (preds == '0'))
+    re = tp * 1.0 / (tp + fn)
+    return re
+
+
+def res_evaluate(res_dir="./outputs/predict/predictions.json", eval_phase='test'):
+    if eval_phase == 'test':
+        data_dir="./data/test.tsv"
+    elif eval_phase == 'dev':
+        data_dir="./data/dev.tsv"
+    else:
+        assert eval_phase in ['dev', 'test'], 'eval_phase should be dev or test'
+    
+    labels = []
+    with open(data_dir, "r") as file:
+        first_flag = True
+        for line in file:
+            line = line.split("\t")
+            label = line[2][:-1]
+            if label=='label':
+                continue
+            labels.append(str(label))
+    file.close()
+
+    preds = []
+    with open(res_dir, "r") as file:
+        for line in file.readlines():
+            line = json.loads(line)
+            pred = line['label']
+            preds.append(str(pred))
+    file.close()
+    assert len(labels) == len(preds), "prediction result({}) doesn't match to labels({})".format(len(preds),len(labels))
+    print('data num: {}'.format(len(labels)))
+    print("precision: {}, recall: {}, f1: {}".format(accuracy(preds, labels), recall(preds, labels), f1(preds, labels)))
+
+res_evaluate()
--- a/examples/matching/process.py
+++ b/examples/matching/process.py
+#  -*- coding: utf-8 -*-
+
+import sys
+import os
+
+if len(sys.argv) != 4:
+    exit(0)
+
+data_dir = sys.argv[1]
+if not os.path.exists(data_dir):
+    print("%s not exists" % data_dir)
+    exit(0)
+
+train_dir = sys.argv[2]
+train_file = open(train_dir, "w")
+train_file.write("text_a\ttext_b\tlabel\n")
+
+test_dir = sys.argv[3]
+test_file = open(test_dir, "w")
+test_file.write("text_a\ttext_b\tlabel\n")
+with open(data_dir, "r") as file:
+    before = ""
+    cnt = 0
+    for line in file:
+        line = line.strip("\n")
+        line_t = line.split("\t")
+        flag = 0
+        if len(line_t) < 6:
+            if flag: 
+                flag = 0
+                out_line = "{}{}\n".format(out_line, line)
+            else:
+                flag = 1
+                outline = "{}".format(line)
+            continue
+        else:
+            out_line = "{}\t{}\t{}\n".format(line_t[3], line_t[4], line_t[5])
+        cnt += 1
+
+        if 2 <= cnt <= 4301:
+            test_file.write(out_line)
+        if 4301 <= cnt <= 104301:
+            train_file.write(out_line)
+
+train_file.close()
+test_file.close()
--- a/examples/matching/run.py
+++ b/examples/matching/run.py
+# coding=utf-8
+import paddlepalm as palm
+import json
+from paddlepalm.distribute import gpu_dev_count
+
+if __name__ == '__main__':
+
+    # configs 
+    max_seqlen = 128
+    batch_size = 16 
+    num_epochs = 3
+    lr = 3e-5
+    weight_decay = 0.0
+    num_classes = 2
+    random_seed = 1
+    dropout_prob = 0.1
+    save_path = './outputs/'
+    save_type = 'ckpt'
+    pred_model_path = './outputs/ckpt.step'+str(18732)
+    print_steps = 50
+    pred_output = './outputs/predict/'
+    pre_params = './pretrain/ernie-en-base/params'
+    task_name = 'Quora Question Pairs matching'
+
+    vocab_path = './pretrain/ernie-en-base/vocab.txt'
+    train_file = './data/train.tsv'
+    predict_file = './data/test.tsv'
+    config = json.load(open('./pretrain/ernie-en-base/ernie_config.json'))
+    input_dim = config['hidden_size']
+
+    # -----------------------  for training ----------------------- 
+
+    # step 1-1: create readers for training
+    match_reader = palm.reader.MatchReader(vocab_path, max_seqlen, seed=random_seed)
+    # step 1-2: load the training data
+    match_reader.load_data(train_file, file_format='tsv', num_epochs=num_epochs, batch_size=batch_size)
+    
+    # step 2: create a backbone of the model to extract text features
+    ernie = palm.backbone.ERNIE.from_config(config)
+
+    # step 3: register the backbone in reader
+    match_reader.register_with(ernie)
+    
+    # step 4: create the task output head
+    match_head = palm.head.Match(num_classes, input_dim, dropout_prob)
+ 
+    # step 5-1: create a task trainer
+    trainer = palm.Trainer(task_name)
+    # step 5-2: build forward graph with backbone and task head
+    loss_var = trainer.build_forward(ernie, match_head)
+    
+    # step 6-1*: use warmup
+    n_steps = match_reader.num_examples * num_epochs // batch_size
+    warmup_steps = int(0.1 * n_steps)
+    print('total_steps: {}'.format(n_steps))
+    print('warmup_steps: {}'.format(warmup_steps))
+    sched = palm.lr_sched.TriangularSchedualer(warmup_steps, n_steps)
+
+    # step 6-2: create a optimizer
+    adam = palm.optimizer.Adam(loss_var, lr, sched)
+    # step 6-3: build backward
+    trainer.build_backward(optimizer=adam, weight_decay=weight_decay)
+    
+    # step 7: fit prepared reader and data
+    trainer.fit_reader(match_reader)
+
+    # step 8-1*: load pretrained parameters
+    trainer.load_pretrain(pre_params, False)
+    # step 8-2*: set saver to save model
+    # save_steps = (n_steps-16) // gpu_dev_count
+    save_steps = 6244
+    trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type)
+    # step 8-3: start training
+    trainer.train(print_steps=print_steps)
+     
+    # -----------------------  for prediction ----------------------- 
+
+    # step 1-1: create readers for prediction
+    print('prepare to predict...')
+    predict_match_reader = palm.reader.MatchReader(vocab_path, max_seqlen, seed=random_seed, phase='predict')
+    # step 1-2: load the training data
+    predict_match_reader.load_data(predict_file, batch_size)
+
+    # step 2: create a backbone of the model to extract text features
+    pred_ernie = palm.backbone.ERNIE.from_config(config, phase='predict')
+
+    # step 3: register the backbone in reader
+    predict_match_reader.register_with(pred_ernie)
+    
+    # step 4: create the task output head
+    match_pred_head = palm.head.Match(num_classes, input_dim, phase='predict')
+
+    # step 5: build forward graph with backbone and task head
+    trainer.build_predict_forward(pred_ernie, match_pred_head)
+
+    # step 6: load pretrained model
+    pred_ckpt = trainer.load_ckpt(pred_model_path)
+
+    # step 7: fit prepared reader and data
+    trainer.fit_reader(predict_match_reader, phase='predict')
+    
+    # step 8: predict
+    print('predicting..')
+    trainer.predict(print_steps=print_steps, output_dir=pred_output)
--- a/examples/mrc/README.md
+++ b/examples/mrc/README.md
+## Examples 4: Machine Reading Comprehension
+This task is a machine reading comprehension task. The following sections detail model preparation, dataset preparation, and how to run the task.
+
+### Step 1: Prepare Pre-trained Models & Datasets
+
+#### Pre-trianed Model
+
+The pre-training model of this mission is: [ernie-zh-base](https://github.com/PaddlePaddle/PALM/tree/r0.3-api).
+
+Make sure you have downloaded the required pre-training model in the current folder.
+
+
+#### Dataset
+
+This task uses the `CMRC2018` dataset. `CMRC2018` is an evaluation conducted by Chinese information society. The task of evaluation is to extract reading comprehension.
+
+Download dataset:
+```shell
+python download.py
+```
+
+If everything goes well, there will be a folder named `data/`  created with all the datas in it.
+
+Here is some example datas:
+
+ ```json
+"paragraphs": [
+         {
+           "id": "TRAIN_36",
+           "context": "NGC 6231是一个位于天蝎座的疏散星团，天球座标为赤经16时54分，赤纬-41度48分，视觉观测大小约45角分，亮度约2.6视星等，距地球5900光年。NGC 6231年龄约为三百二十万年，是一个非常年轻的星团，星团内的最亮星是5等的天蝎座 ζ1星。用双筒望远镜或小型望远镜就能看到个别的行星。NGC 6231在1654年被意大利天文学家乔瓦尼·巴蒂斯特·霍迪尔纳（Giovanni Battista Hodierna）以Luminosae的名字首次纪录在星表中，但是未见记载于夏尔·梅西耶的天体列表和威廉·赫歇尔的深空天体目录。这个天体在1678年被爱德蒙·哈雷（I.7）、1745年被夏西亚科斯（Jean-Phillippe Loys de Cheseaux）（9）、1751年被尼可拉·路易·拉卡伊（II.13）分别再次独立发现。",
+           "qas": [
+             {
+               "question": "NGC 6231的经纬度是多少？",
+               "id": "TRAIN_36_QUERY_0",
+               "answers": [
+                 {
+                   "text": "赤经16时54分，赤纬-41度48分",
+                   "answer_start": 27
+                 }
+               ]
+             }
+ ```
+
+
+### Step 2: Train & Predict
+
+The code used to perform classification task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run:
+
+```shell
+python run.py
+```
+
+If you want to specify a specific gpu or use multiple gpus for training, please use **`CUDA_VISIBLE_DEVICES`**, for example:
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2 python run.py
+```
+
+Some logs will be shown below:
+
+```
+step 1/1515 (epoch 0), loss: 6.251, speed: 0.31 steps/s
+step 2/1515 (epoch 0), loss: 6.206, speed: 0.80 steps/s
+step 3/1515 (epoch 0), loss: 6.172, speed: 0.86 steps/s
+```
+
+
+After the run, you can view the saved models in the `outputs/` folder and the predictions in the `outputs/predict` folder. Here are some examples of predictions:
+
+
+```json
+{
+    "DEV_0_QUERY_0": "光 荣 和 ω-force 开 发", 
+    "DEV_0_QUERY_1": "任 天 堂 游 戏 谜 之 村 雨 城", 
+    "DEV_0_QUERY_2": "战 史 演 武 」&「 争 霸 演 武 」。", 
+    "DEV_1_QUERY_0": "大 陆 传 统 器 乐 及 戏 曲 里 面 常 用 的 打 击 乐 记 谱 方 法 ， 以 中 文 字 的 声 音 模 拟 敲 击 乐 的 声 音 ， 纪 录 打 击 乐 的 各 种 不 同 的 演 奏 方 法 。", 
+    "DEV_1_QUERY_1": "「 锣 鼓 点", 
+    "DEV_1_QUERY_2": "锣 鼓 的 运 用 有 约 定 俗 成 的 程 式 ， 依 照 角 色 行 当 的 身 份 、 性 格 、 情 绪 以 及 环 境 ， 配 合 相 应 的 锣 鼓 点", 
+    "DEV_1_QUERY_3": "鼓 、 锣 、 钹 和 板 四 类 型", 
+    "DEV_2_QUERY_0": "364.6 公 里", 
+}
+```
+
+### Step 3: Evaluate
+
+Once you have the prediction, you can run the evaluation script to evaluate the model:
+
+```shell
+python evaluate.py
+```
+
+The evaluation results are as follows:
+
+```
+data_num: 3219
+em_sroce: 0.963031997515, f1: 83.9865402973
+```
--- a/examples/mrc/download.py
+++ b/examples/mrc/download.py
+#  -*- coding: utf-8 -*-
+
+import os
+import requests
+import tarfile
+import shutil
+from tqdm import tqdm
+
+
+def download(src, url):
+    file_size = int(requests.head(url).headers['Content-Length'])
+
+    header = {
+        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
+        '70.0.3538.67 Safari/537.36'
+    }
+    pbar = tqdm(total=file_size)
+    resp = requests.get(url, headers=header, stream=True)
+
+    with open(src, 'ab') as f:
+        for chunk in resp.iter_content(chunk_size=1024):
+            if chunk:
+                f.write(chunk)
+                pbar.update(1024)
+
+    pbar.close()
+    return file_size
+
+
+abs_path = os.path.abspath(__file__)
+download_url = "https://ernie.bj.bcebos.com/task_data_zh.tgz"
+downlaod_path = os.path.join(os.path.dirname(abs_path), "task_data_zh.tgz")
+target_dir = os.path.dirname(abs_path)
+download(downlaod_path, download_url)
+
+tar = tarfile.open(downlaod_path)
+tar.extractall(target_dir)
+os.remove(downlaod_path)
+
+abs_path = os.path.abspath(__file__)
+dst_dir = os.path.join(os.path.dirname(abs_path), "data")
+if not os.path.exists(dst_dir) or not os.path.isdir(dst_dir):
+    os.makedirs(dst_dir)
+
+for file in os.listdir(os.path.join(target_dir, 'task_data', 'cmrc2018')):
+    shutil.move(os.path.join(target_dir, 'task_data', 'cmrc2018', file), dst_dir)
+
+shutil.rmtree(os.path.join(target_dir, 'task_data'))
+
+
--- a/examples/mrc/evaluate.py
+++ b/examples/mrc/evaluate.py
+# -*- coding: utf-8 -*-
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''
+Evaluation script for CMRC 2018
+version: v5
+Note:
+v5 formatted output, add usage description
+v4 fixed segmentation issues
+'''
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+from __future__ import absolute_import
+
+from collections import Counter, OrderedDict
+import string
+import re
+import argparse
+import json
+import sys
+import nltk
+import pdb
+
+
+# split Chinese with English
+def mixed_segmentation(in_str, rm_punc=False):
+    in_str = in_str.lower().strip()
+    segs_out = []
+    temp_str = ""
+    sp_char = [
+        '-', ':', '_', '*', '^', '/', '\\', '~', '`', '+', '=', '，', '。', '：',
+        '？', '！', '“', '”', '；', '’', '《', '》', '……', '·', '、', '「', '」', '（',
+        '）', '－', '～', '『', '』'
+    ]
+    for char in in_str:
+        if rm_punc and char in sp_char:
+            continue
+        if re.search(r'[\u4e00-\u9fa5]', char) or char in sp_char:
+            if temp_str != "":
+                ss = nltk.word_tokenize(temp_str)
+                segs_out.extend(ss)
+                temp_str = ""
+            segs_out.append(char)
+        else:
+            temp_str += char
+
+    #handling last part
+    if temp_str != "":
+        ss = nltk.word_tokenize(temp_str)
+        segs_out.extend(ss)
+
+    return segs_out
+
+
+# remove punctuation
+def remove_punctuation(in_str):
+    in_str = in_str.lower().strip()
+    sp_char = [
+        '-', ':', '_', '*', '^', '/', '\\', '~', '`', '+', '=', '，', '。', '：',
+        '？', '！', '“', '”', '；', '’', '《', '》', '……', '·', '、', '「', '」', '（',
+        '）', '－', '～', '『', '』'
+    ]
+    out_segs = []
+    for char in in_str:
+        if char in sp_char:
+            continue
+        else:
+            out_segs.append(char)
+    return ''.join(out_segs)
+
+
+# find longest common string
+def find_lcs(s1, s2):
+    m = [[0 for i in range(len(s2) + 1)] for j in range(len(s1) + 1)]
+    mmax = 0
+    p = 0
+    for i in range(len(s1)):
+        for j in range(len(s2)):
+            if s1[i] == s2[j]:
+                m[i + 1][j + 1] = m[i][j] + 1
+                if m[i + 1][j + 1] > mmax:
+                    mmax = m[i + 1][j + 1]
+                    p = i + 1
+    return s1[p - mmax:p], mmax
+
+
+#
+def evaluate(ground_truth_file, prediction_file):
+    f1 = 0
+    em = 0
+    total_count = 0
+    skip_count = 0
+    for instances in ground_truth_file["data"]:
+        for instance in instances["paragraphs"]:
+            context_text = instance['context'].strip()
+            for qas in instance['qas']:
+                total_count += 1
+                query_id = qas['id'].strip()
+                query_text = qas['question'].strip()
+                answers = [ans["text"] for ans in qas["answers"]]
+
+                if query_id not in prediction_file:
+                    print('Unanswered question: {}\n'.format(
+                        query_id))
+                    skip_count += 1
+                    continue
+
+                prediction = prediction_file[query_id]
+                f1 += calc_f1_score(answers, prediction)
+                em += calc_em_score(answers, prediction)
+
+    f1_score = 100.0 * f1 / total_count
+    em_score = 100.0 * em / total_count
+    return f1_score, em_score, total_count, skip_count
+
+
+def calc_f1_score(answers, prediction):
+    f1_scores = []
+    for ans in answers:
+        ans_segs = mixed_segmentation(ans, rm_punc=True)
+        prediction_segs = mixed_segmentation(prediction, rm_punc=True)
+        lcs, lcs_len = find_lcs(ans_segs, prediction_segs)
+        if lcs_len == 0:
+            f1_scores.append(0)
+            continue
+        precision = 1.0 * lcs_len / len(prediction_segs)
+        recall = 1.0 * lcs_len / len(ans_segs)
+        f1 = (2 * precision * recall) / (precision + recall)
+        f1_scores.append(f1)
+    return max(f1_scores)
+
+
+def calc_em_score(answers, prediction):
+    em = 0
+    for ans in answers:
+        ans_ = remove_punctuation(ans)
+        prediction_ = remove_punctuation(prediction)
+        if ans_ == prediction_:
+            em = 1
+            break
+    return em
+
+
+def eval_file(dataset_file, prediction_file):
+    ground_truth_file = json.load(open(dataset_file, 'r'))
+    prediction_file = json.load(open(prediction_file, 'r'))
+    F1, EM, TOTAL, SKIP = evaluate(ground_truth_file, prediction_file)
+    AVG = (EM + F1) * 0.5
+    return EM, F1, AVG, TOTAL
+
+
+if __name__ == '__main__':
+    EM, F1, AVG, TOTAL = eval_file("task_data/cmrc2018/dev.json", "predictions.json")
+    print(EM)
+    print(F1)
+    print(TOTAL)
\ No newline at end of file
--- a/examples/mrc/run.py
+++ b/examples/mrc/run.py
+# coding=utf-8
+import paddlepalm as palm
+import json
+from paddlepalm.distribute import gpu_dev_count
+
+
+if __name__ == '__main__':
+
+    # configs
+    max_seqlen = 512
+    batch_size = 8   
+    num_epochs = 8
+    lr = 3e-5
+    doc_stride = 128
+    max_query_len = 64
+    max_ans_len = 128
+    weight_decay = 0.01
+    print_steps = 20
+    vocab_path = './pretrain/ernie-zh-base/vocab.txt'
+    do_lower_case = True
+
+    train_file = './data/train.json'
+    predict_file = './data/dev.json'
+    save_path = './outputs/'
+    pred_output = './outputs/predict/'
+    save_type = 'ckpt'
+    task_name = 'cmrc2018'
+    pre_params = './pretrain/ernie-zh-base/params'
+    config = json.load(open('./pretrain/ernie-zh-base/ernie_config.json'))
+
+    # -----------------------  for training ----------------------- 
+
+    # step 1-1: create readers for training
+    mrc_reader = palm.reader.MRCReader(vocab_path, max_seqlen, max_query_len, doc_stride, do_lower_case=do_lower_case)
+    # step 1-2: load the training data
+    mrc_reader.load_data(train_file, file_format='json', num_epochs=num_epochs, batch_size=batch_size)
+
+    # step 2: create a backbone of the model to extract text features
+    ernie = palm.backbone.ERNIE.from_config(config)
+
+    # step 3: register the backbone in reader
+    mrc_reader.register_with(ernie)
+
+    # step 4: create the task output head
+    mrc_head = palm.head.MRC(max_query_len, config['hidden_size'], do_lower_case=do_lower_case, max_ans_len=max_ans_len)
+ 
+    # step 5-1: create a task trainer
+    trainer = palm.Trainer(task_name)
+    # step 5-2: build forward graph with backbone and task head
+    loss_var = trainer.build_forward(ernie, mrc_head)
+    
+    # step 6-1*: use warmup
+    n_steps = mrc_reader.num_examples * num_epochs // batch_size
+    warmup_steps = int(0.1 * n_steps)
+    sched = palm.lr_sched.TriangularSchedualer(warmup_steps, n_steps)
+    # step 6-2: create a optimizer
+    adam = palm.optimizer.Adam(loss_var, lr, sched)
+    # step 6-3: build backward
+    trainer.build_backward(optimizer=adam, weight_decay=weight_decay)
+
+    # step 7: fit prepared reader and data
+    trainer.fit_reader(mrc_reader)
+ 
+    # step 8-1*: load pretrained parameters
+    trainer.load_pretrain(pre_params)
+    # step 8-2*: set saver to save model
+    # save_steps = (n_steps-8) // gpu_dev_count // 4
+    save_steps = 1520
+    trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type)
+    # step 8-3: start training
+    trainer.train(print_steps=print_steps)
+   
+    # -----------------------  for prediction ----------------------- 
+
+    # step 1-1: create readers for prediction
+    predict_mrc_reader = palm.reader.MRCReader(vocab_path, max_seqlen, max_query_len, doc_stride, do_lower_case=do_lower_case, phase='predict')
+    # step 1-2: load the training data
+    predict_mrc_reader.load_data(predict_file, batch_size)
+
+    # step 2: create a backbone of the model to extract text features
+    pred_ernie = palm.backbone.ERNIE.from_config(config, phase='predict')
+
+    # step 3: register the backbone in reader
+    predict_mrc_reader.register_with(pred_ernie)
+
+    # step 4: create the task output head
+    mrc_pred_head = palm.head.MRC(max_query_len, config['hidden_size'], do_lower_case=do_lower_case, max_ans_len=max_ans_len, phase='predict')
+    
+    # step 5: build forward graph with backbone and task head
+    trainer.build_predict_forward(pred_ernie, mrc_pred_head)
+
+    # step 6: load pretrained model
+    pred_model_path =  './outputs/ckpt.step'+str(12160)
+    pred_ckpt = trainer.load_ckpt(pred_model_path)
+    
+    # step 7: fit prepared reader and data
+    trainer.fit_reader(predict_mrc_reader, phase='predict')
+
+    # step 8: predict
+    print('predicting..')
+    trainer.predict(print_steps=print_steps, output_dir="outputs/")
--- a/examples/predict/README.md
+++ b/examples/predict/README.md
+## Examples 5: Predict(Classification)
+This task is a sentiment analysis task. The following sections detail model preparation, dataset preparation, and how to run the task.
+
+### Step 1: Prepare Pre-trained Models & Datasets
+
+#### Pre-trianed Model
+
+The pre-training model of this mission is: [ernie-zh-base](https://github.com/PaddlePaddle/PALM/tree/r0.3-api).
+
+Make sure you have downloaded the required pre-training model in the current folder.
+
+
+#### Dataset
+
+This task uses the `chnsenticorp` dataset. 
+
+Download dataset:
+```shell
+python download.py
+```
+
+If everything goes well, there will be a folder named `data/`  created with all the datas in it.
+
+The data should have 2 fields,  `label  text_a`, with tsv format. Here is some example datas:
+
+```
+label  text_a
+0   当当网名不符实，订货多日不见送货，询问客服只会推托，只会要求用户再下订单。如此服务留不住顾客的。去别的网站买书服务更好。
+0   XP的驱动不好找！我的17号提的货，现在就降价了100元，而且还送杀毒软件！
+1   <荐书> 推荐所有喜欢<红楼>的红迷们一定要收藏这本书,要知道当年我听说这本书的时候花很长时间去图书馆找和借都没能如愿,所以这次一看到当当有,马上买了,红迷们也要记得备货哦!
+```
+
+### Step 2: Predict
+
+The code used to perform classification task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run:
+
+```shell
+python run.py
+```
+
+If you want to specify a specific gpu or use multiple gpus for predict, please use **`CUDA_VISIBLE_DEVICES`**, for example:
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2 python run.py
+```
+
+
+Some logs will be shown below:
+
+```
+step 1/154, speed: 0.51 steps/s
+step 2/154, speed: 3.36 steps/s
+step 3/154, speed: 3.48 steps/s
+```
+
+
+After the run, you can view the predictions in the `outputs/predict` folder. Here are some examples of predictions:
+
+
+```
+{"index": 0, "logits": [-0.2014336884021759, 0.6799028515815735], "probs": [0.29290086030960083, 0.7070990800857544], "label": 1}
+{"index": 1, "logits": [0.8593899011611938, -0.29743513464927673], "probs": [0.7607553601264954, 0.23924466967582703], "label": 0}
+{"index": 2, "logits": [0.7462944388389587, -0.7083730101585388], "probs": [0.8107157349586487, 0.18928426504135132], "label": 0}
+```
+
+### Step 3: Evaluate
+
+Once you have the prediction, you can run the evaluation script to evaluate the model:
+
+```shell
+python evaluate.py
+```
+
+The evaluation results are as follows:
+
+```
+data num: 1200
+precision: 0.494166666667, recall: 0.0444078947368, f1: 0.0816944009455
+```
--- a/examples/predict/download.py
+++ b/examples/predict/download.py
+#  -*- coding: utf-8 -*-
+
+import os
+import requests
+import tarfile
+import shutil
+from tqdm import tqdm
+
+
+def download(src, url):
+    file_size = int(requests.head(url).headers['Content-Length'])
+
+    header = {
+        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
+        '70.0.3538.67 Safari/537.36'
+    }
+    pbar = tqdm(total=file_size)
+    resp = requests.get(url, headers=header, stream=True)
+
+    with open(src, 'ab') as f:
+        for chunk in resp.iter_content(chunk_size=1024):
+            if chunk:
+                f.write(chunk)
+                pbar.update(1024)
+
+    pbar.close()
+    return file_size
+
+
+abs_path = os.path.abspath(__file__)
+download_url = "https://ernie.bj.bcebos.com/task_data_zh.tgz"
+downlaod_path = os.path.join(os.path.dirname(abs_path), "task_data_zh.tgz")
+target_dir = os.path.dirname(abs_path)
+download(downlaod_path, download_url)
+
+tar = tarfile.open(downlaod_path)
+tar.extractall(target_dir)
+os.remove(downlaod_path)
+
+abs_path = os.path.abspath(__file__)
+dst_dir = os.path.join(os.path.dirname(abs_path), "data")
+if not os.path.exists(dst_dir) or not os.path.isdir(dst_dir):
+    os.makedirs(dst_dir)
+
+for file in os.listdir(os.path.join(target_dir, 'task_data', 'chnsenticorp')):
+    shutil.move(os.path.join(target_dir, 'task_data', 'chnsenticorp', file), dst_dir)
+
+shutil.rmtree(os.path.join(target_dir, 'task_data'))
+
+
--- a/examples/predict/evaluate.py
+++ b/examples/predict/evaluate.py
+#  -*- coding: utf-8 -*-
+
+import json
+import numpy as np
+
+def accuracy(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels) 
+    return (preds == labels).mean()
+
+def f1(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels)
+    tp = np.sum((labels == '1') & (preds == '1'))
+    tn = np.sum((labels == '0') & (preds == '0'))
+    fp = np.sum((labels == '0') & (preds == '1'))
+    fn = np.sum((labels == '1') & (preds == '0'))
+    p = tp * 1.0 / (tp + fp) 
+    r = tp * 1.0 / (tp + fn) * 1.0
+    f1 = (2 * p * r) / (p + r + 1e-8)
+    return f1
+  
+def recall(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels)
+    # recall=TP/(TP+FN)
+    tp = np.sum((labels == '1') & (preds == '1'))
+    fn = np.sum((labels == '1') & (preds == '0'))
+    re = tp * 1.0 / (tp + fn)
+    return re
+
+
+def res_evaluate(res_dir="./outputs/predict/predictions.json", eval_phase='test'):
+    if eval_phase == 'test':
+        data_dir="./data/test.tsv"
+    elif eval_phase == 'dev':
+        data_dir="./data/dev.tsv"
+    else:
+        assert eval_phase in ['dev', 'test'], 'eval_phase should be dev or test'
+    
+    labels = []
+    with open(data_dir, "r") as file:
+        first_flag = True
+        for line in file:
+            line = line.split("\t")
+            label = line[0]
+            if label=='label':
+                continue
+            labels.append(str(label))
+    file.close()
+
+    preds = []
+    with open(res_dir, "r") as file:
+        for line in file.readlines():
+            line = json.loads(line)
+            pred = line['label']
+            preds.append(str(pred))
+    file.close()
+    assert len(labels) == len(preds), "prediction result doesn't match to labels"
+    print('data num: {}'.format(len(labels)))
+    print("precision: {}, recall: {}, f1: {}".format(accuracy(preds, labels), recall(preds, labels), f1(preds, labels)))
+
+res_evaluate()
--- a/examples/predict/run.py
+++ b/examples/predict/run.py
+# coding=utf-8
+import paddlepalm as palm
+import json
+from paddlepalm.distribute import gpu_dev_count
+
+
+if __name__ == '__main__':
+
+    # configs
+    max_seqlen = 256
+    batch_size = 8
+    vocab_path = './pretrain/ernie-zh-base/vocab.txt'
+    predict_file = './data/test.tsv'
+    random_seed = 1
+    config = json.load(open('./pretrain/ernie-zh-base/ernie_config.json'))
+    input_dim = config['hidden_size']
+    num_classes = 2
+    task_name = 'chnsenticorp'
+    pred_output = './outputs/predict/'
+    print_steps = 20
+    pre_params = './pretrain/ernie-zh-base/params'
+
+    # -----------------------  for prediction ----------------------- 
+
+    # step 1-1: create readers for prediction
+    print('prepare to predict...')
+    predict_cls_reader = palm.reader.ClassifyReader(vocab_path, max_seqlen, seed=random_seed, phase='predict')
+    # step 1-2: load the training data
+    predict_cls_reader.load_data(predict_file, batch_size)
+    
+    # step 2: create a backbone of the model to extract text features
+    pred_ernie = palm.backbone.ERNIE.from_config(config, phase='predict')
+
+    # step 3: register the backbone in reader
+    predict_cls_reader.register_with(pred_ernie)
+    
+    # step 4: create the task output head
+    cls_pred_head = palm.head.Classify(num_classes, input_dim, phase='predict')
+    
+    # step 5-1: create a task trainer
+    trainer = palm.Trainer(task_name)
+    # step 5-2: build forward graph with backbone and task head
+    trainer.build_predict_forward(pred_ernie, cls_pred_head)
+ 
+    # step 6: load pretrained model
+    pred_model = trainer.load_predict_model(pre_params)
+
+    # step 7: fit prepared reader and data
+    trainer.fit_reader(predict_cls_reader, phase='predict')
+
+    # step 8: predict
+    print('predicting..')
+    trainer.predict(print_steps=print_steps, output_dir=pred_output)
--- a/examples/tagging/README.md
+++ b/examples/tagging/README.md
+## Examples 3: Tagging
+This task is a named entity recognition task. The following sections detail model preparation, dataset preparation, and how to run the task.
+
+### Step 1: Prepare Pre-trained Models & Datasets
+
+#### Pre-trianed Model
+
+The pre-training model of this mission is: [ernie-zh-base](https://github.com/PaddlePaddle/PALM/tree/r0.3-api).
+
+Make sure you have downloaded the required pre-training model in the current folder.
+
+
+#### Dataset
+
+This task uses the `MSRA-NER(SIGHAN2006)` dataset. 
+
+Download dataset:
+```shell
+python download.py
+```
+
+If everything goes well, there will be a folder named `data/`  created with all the datas in it.
+
+The data should have 2 fields,  `text_a  label`, with tsv format. Here is some example datas:
+
+ ```
+text_a  label
+在 这 里 恕 弟 不 恭 之 罪 ， 敢 在 尊 前 一 诤 ： 前 人 论 书 ， 每 曰 “ 字 字 有 来 历 ， 笔 笔 有 出 处 ” ， 细 读 公 字 ， 何 尝 跳 出 前 人 藩 篱 ， 自 隶 变 而 后 ， 直 至 明 季 ， 兄 有 何 新 出 ？    O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+相 比 之 下 ， 青 岛 海 牛 队 和 广 州 松 日 队 的 雨 中 之 战 虽 然 也 是 0 ∶ 0 ， 但 乏 善 可 陈 。   O O O O O B-ORG I-ORG I-ORG I-ORG I-ORG O B-ORG I-ORG I-ORG I-ORG I-ORG O O O O O O O O O O O O O O O O O O O
+理 由 多 多 ， 最 无 奈 的 却 是 ： 5 月 恰 逢 双 重 考 试 ， 她 攻 读 的 博 士 学 位 论 文 要 通 考 ； 她 任 教 的 两 所 学 校 ， 也 要 在 这 段 时 日 大 考 。    O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+ ```
+
+
+
+### Step 2: Train & Predict
+
+The code used to perform classification task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run:
+
+```shell
+python run.py
+```
+
+If you want to specify a specific gpu or use multiple gpus for training, please use **`CUDA_VISIBLE_DEVICES`**, for example:
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2 python run.py
+```
+
+Some logs will be shown below:
+
+```
+step 1/652 (epoch 0), loss: 216.002, speed: 0.32 steps/s
+step 2/652 (epoch 0), loss: 202.567, speed: 1.28 steps/s
+step 3/652 (epoch 0), loss: 170.677, speed: 1.05 steps/s
+```
+
+After the run, you can view the saved models in the `outputs/` folder and the predictions in the `outputs/predict` folder. Here are some examples of predictions:
+
+
+```
+[6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 4, 4, 6, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6]
+[6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6]
+[6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6]
+```
+
+### Step 3: Evaluate
+
+Once you have the prediction, you can run the evaluation script to evaluate the model:
+
+```python
+python evaluate.py
+```
+
+The evaluation results are as follows:
+
+```
+precision: 0.948718989809, recall: 0.944806113784, f1: 0.946758508914
+```
--- a/examples/tagging/download.py
+++ b/examples/tagging/download.py
+#  -*- coding: utf-8 -*-
+
+import os
+import requests
+import tarfile
+import shutil
+from tqdm import tqdm
+
+
+def download(src, url):
+    file_size = int(requests.head(url).headers['Content-Length'])
+
+    header = {
+        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
+        '70.0.3538.67 Safari/537.36'
+    }
+    pbar = tqdm(total=file_size)
+    resp = requests.get(url, headers=header, stream=True)
+
+    with open(src, 'ab') as f:
+        for chunk in resp.iter_content(chunk_size=1024):
+            if chunk:
+                f.write(chunk)
+                pbar.update(1024)
+
+    pbar.close()
+    return file_size
+
+
+abs_path = os.path.abspath(__file__)
+download_url = "https://ernie.bj.bcebos.com/task_data_zh.tgz"
+downlaod_path = os.path.join(os.path.dirname(abs_path), "task_data_zh.tgz")
+target_dir = os.path.dirname(abs_path)
+download(downlaod_path, download_url)
+
+tar = tarfile.open(downlaod_path)
+tar.extractall(target_dir)
+os.remove(downlaod_path)
+
+abs_path = os.path.abspath(__file__)
+dst_dir = os.path.join(os.path.dirname(abs_path), "data")
+if not os.path.exists(dst_dir) or not os.path.isdir(dst_dir):
+    os.makedirs(dst_dir)
+
+for file in os.listdir(os.path.join(target_dir, 'task_data', 'msra_ner')):
+    shutil.move(os.path.join(target_dir, 'task_data', 'msra_ner', file), dst_dir)
+
+shutil.rmtree(os.path.join(target_dir, 'task_data'))
+
+
--- a/examples/tagging/evaluate.py
+++ b/examples/tagging/evaluate.py
+#  -*- coding: utf-8 -*-
+
+import json
+
+
+def load_label_map(map_dir="./data/label_map.json"):
+    """
+    :param map_dir: dict indictuing chunk type
+    :return:
+    """
+    return json.load(open(map_dir, "r"))
+
+
+def cal_chunk(total_res, total_label):
+    assert len(total_label) == len(total_res), 'prediction result doesn\'t match to labels'
+    num_labels = 0
+    num_corr = 0
+    num_infers = 0
+    for res, label in zip(total_res, total_label):
+        assert len(res) == len(label), "prediction result doesn\'t match to labels"
+        num_labels += sum([0 if i == 6 else 1 for i in label])
+        num_corr += sum([1 if label[i] == res[i] and label[i] != 6 else 0 for i in range(len(label))])
+        num_infers += sum([0 if i == 6 else 1 for i in res])
+
+    precision = num_corr * 1.0 / num_infers if num_infers > 0 else 0.0
+    recall = num_corr * 1.0 / num_labels if num_labels > 0 else 0.0
+    f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0.0
+
+    return precision, recall, f1
+
+
+def res_evaluate(res_dir="./outputs/predict/predictions.json", data_dir="./data/test.tsv"):
+    label_map = load_label_map()
+
+    total_label = []
+    with open(data_dir, "r") as file:
+        first_flag = True
+        for line in file:
+            if first_flag:
+                first_flag = False
+                continue
+            line = line.strip("\n")
+            if len(line) == 0:
+                continue
+            line = line.split("\t")
+            if len(line) < 2:
+                continue
+            labels = line[1].split("\x02")
+            total_label.append(labels)
+    total_label = [[label_map[j] for j in i] for i in total_label]
+
+    total_res = []
+    with open(res_dir, "r") as file:
+        cnt = 0
+        for line in file:
+            line = line.strip("\n")
+            if len(line) == 0:
+                continue
+            try:
+                res_arr = json.loads(line)
+
+                if len(total_label[cnt]) < len(res_arr):
+                    total_res.append(res_arr[1: 1 + len(total_label[cnt])])
+                elif len(total_label[cnt]) == len(res_arr):
+                    total_res.append(res_arr)
+                else:
+                    total_res.append(res_arr)
+                    total_label[cnt] = total_label[cnt][: len(res_arr)]
+            except:
+                print("json format error: {}".format(cnt))
+                print(line)
+
+            cnt += 1
+
+    precision, recall, f1 = cal_chunk(total_res, total_label)
+    print("precision: {}, recall: {}, f1: {}".format(precision, recall, f1))
+
+res_evaluate()
--- a/examples/tagging/run.py
+++ b/examples/tagging/run.py
+# coding=utf-8
+import paddlepalm as palm
+import json
+from paddlepalm.distribute import gpu_dev_count
+
+if __name__ == '__main__':
+ 
+    # configs
+    max_seqlen = 256
+    batch_size = 16
+    num_epochs = 6
+    lr = 5e-5
+    num_classes = 7
+    weight_decay = 0.01
+    dropout_prob = 0.1
+    vocab_path = './pretrain/ernie-zh-base/vocab.txt'
+    label_map = './data/label_map.json'
+    random_seed = 1
+    train_file = './data/train.tsv'
+    predict_file = './data/test.tsv'
+    
+    save_path='./outputs/'
+    save_type='ckpt' 
+    pre_params = './pretrain/ernie-zh-base/params'
+    config = json.load(open('./pretrain/ernie-zh-base/ernie_config.json'))
+    input_dim = config['hidden_size']  
+    task_name = 'msra_ner'
+    pred_output = './outputs/predict/'
+    train_print_steps = 10
+    pred_print_steps = 20
+    
+    # -----------------------  for training ----------------------- 
+
+    # step 1-1: create readers for training
+    ner_reader = palm.reader.SequenceLabelReader(vocab_path, max_seqlen, label_map, seed=random_seed)
+    # step 1-2: load the training data
+    ner_reader.load_data(train_file, file_format='tsv', num_epochs=num_epochs, batch_size=batch_size)
+    
+    # step 2: create a backbone of the model to extract text features
+    ernie = palm.backbone.ERNIE.from_config(config)
+
+    # step 3: register the backbone in reader
+    ner_reader.register_with(ernie)
+
+    # step 4: create the task output head
+    ner_head = palm.head.SequenceLabel(num_classes, input_dim, dropout_prob)
+
+    # step 5-1: create a task trainer
+    trainer = palm.Trainer(task_name)
+    # step 5-2: build forward graph with backbone and task head
+    loss_var = trainer.build_forward(ernie, ner_head)
+
+    # step 6-1*: use warmup
+    n_steps = ner_reader.num_examples * num_epochs // batch_size
+    warmup_steps = int(0.1 * n_steps)
+    print('total_steps: {}'.format(n_steps))
+    print('warmup_steps: {}'.format(warmup_steps))
+    sched = palm.lr_sched.TriangularSchedualer(warmup_steps, n_steps)
+    # step 6-2: create a optimizer
+    adam = palm.optimizer.Adam(loss_var, lr, sched)
+    # step 6-3: build backward
+    trainer.build_backward(optimizer=adam, weight_decay=weight_decay)
+  
+    # step 7: fit prepared reader and data
+    trainer.fit_reader(ner_reader)
+
+    # step 8-1*: load pretrained parameters
+    trainer.load_pretrain(pre_params)
+    # step 8-2*: set saver to save model
+    save_steps = (n_steps-20)// gpu_dev_count
+    print('save_steps: {}'.format(save_steps))
+    trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type)
+    # step 8-3: start training
+    trainer.train(print_steps=train_print_steps)
+   
+    # -----------------------  for prediction ----------------------- 
+
+    # step 1-1: create readers for prediction
+    print('prepare to predict...')
+    predict_ner_reader = palm.reader.SequenceLabelReader(vocab_path, max_seqlen, label_map, phase='predict')
+    # step 1-2: load the training data
+    predict_ner_reader.load_data(predict_file, batch_size)
+   
+    # step 2: create a backbone of the model to extract text features
+    pred_ernie = palm.backbone.ERNIE.from_config(config, phase='predict')
+    
+    # step 3: register the backbone in reader
+    predict_ner_reader.register_with(pred_ernie)
+
+    # step 4: create the task output head
+    ner_pred_head = palm.head.SequenceLabel(num_classes, input_dim, phase='predict')
+    
+    # step 5: build forward graph with backbone and task head
+    trainer.build_predict_forward(pred_ernie, ner_pred_head)
+    
+    # step 6: load pretrained model
+    pred_model_path = './outputs/ckpt.step' + str(save_steps)
+    pred_ckpt = trainer.load_ckpt(pred_model_path)
+    
+    # step 7: fit prepared reader and data
+    trainer.fit_reader(predict_ner_reader, phase='predict')
+   
+    # step 8: predict
+    print('predicting..')
+    trainer.predict(print_steps=pred_print_steps, output_dir=pred_output)
--- a/paddlepalm/_downloader.py
+++ b/paddlepalm/_downloader.py
@@ -31,12 +31,13 @@ __all__ = ["download", "ls"]
 ssl._create_default_https_context = ssl._create_unverified_context

 _items = {
-    'pretrain': {'ernie-en-uncased-large': 'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz',
+    'pretrain': {'ernie-en-large': 'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz',
+                 'ernie-en-base': 'https://ernie.bj.bcebos.com/ERNIE_Base_en_stable-2.0.0.tar.gz',
+                 'ernie-zh-base':'https://ernie.bj.bcebos.com/ERNIE_1.0_max-len-512.tar.gz',
                 'bert-en-uncased-large': 'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz',
                 'bert-en-uncased-base': 'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz',
-                 'ernie-ch-uncased-base':'https://ernie.bj.bcebos.com/ERNIE_1.0_max-len-512.tar.gz',
-                 'roberta-cn-base': 'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_ext_L-12_H-768_A-12.tar.gz',
-                 'roberta-cn-large': 'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.tar.gz',
+                 'roberta-zh-base': 'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_ext_L-12_H-768_A-12.tar.gz',
+                 'roberta-zh-large': 'https://bert-models.bj.bcebos.com/chinese_roberta_wwm_large_ext_L-24_H-1024_A-16.tar.gz',
                 'utils': None},
    'vocab': {'utils': None},
    'backbone': {'utils': None},
@@ -132,27 +133,27 @@ def _convert(path, silent=False):
            tar_info.close()
            os.removedirs(path + '/params1/') 

-def download(scope, item='all', path='.'):
+def download(item, scope='all', path='.'):
    """download an item. The available scopes and contained items can be showed with `paddlepalm.downloader.ls`.

    Args:
-        scope: the scope the item belongs to.
        item: the item to download.
+        scope: the scope of the item to download.
        path: the target dir to download to. Default is `.`, means current dir.
    """
-    scope = scope.lower()
    item = item.lower()
-    ascopeert scope in _scopes, '{} is not found. Support list: {}'.format(scope, list(_scopes.keys()))
+    scope = scope.lower()
+    assert item in _items, '{} is not found. Support list: {}'.format(item, list(_items.keys()))
   
-    if _scopes[scope]['utils'] is not None:
-        _download(scope, 'utils', path, silent=True)
+    if _items[item]['utils'] is not None:
+        _download(item, 'utils', path, silent=True)

-    if item != 'all':
-        ascopeert item in _scopes[scope], '{} is not found. Support items: {}'.format(item, list(_scopes[scope].keys()))
-        _download(scope, item, path)
+    if scope != 'all':
+        assert scope in _items[item], '{} is not found. Support scopes: {}'.format(scope, list(_items[item].keys()))
+        _download(item, scope, path)
    else:
-        for s in _scopes[scope].keys():
-            _download(scope, s, path)
+        for s in _items[item].keys():
+            _download(item, s, path)


 def _ls(item, scope, l = 10):
@@ -165,22 +166,19 @@ def _ls(item, scope, l = 10):
                continue
            print ('  => '+s)

-def ls(scope='all'):
-    """show all the available download items of a scope.
-
-    Args:
-        scope: the scope to show items. Default is 'all', means to show all items in all scopes. Avaliable scopes: pretrain.
-    """
+def ls(item='all', scope='all'):
    
-    if scope != 'all':
-        assert scope in _items, '{} is not found. Support scopes: {}'.format(scope, list(_items.keys()))
-        print ('Available {} scopes:'.format(scope))
-        _ls(scope, 'all')
+    if scope == 'utils':
+        return
+    if item != 'all':
+        assert item in _items, '{} is not found. Support scopes: {}'.format(item, list(_items.keys()))
+        print ('Available {} items:'.format(item))
+        _ls(item, scope)
    else:
        l = max(map(len, _items.keys()))
        for i in _items.keys():
            print ('Available {} items: '.format(i))
-            _ls(i, 'all', l)
+            _ls(i, scope, l)


    
--- a/paddlepalm/distribute/reader.py
+++ b/paddlepalm/distribute/reader.py
@@ -33,14 +33,14 @@ def yield_pieces(data, distribute_strategy, batch_size):
            s = s.strip().lower()
            if s == 's' or s == 'split':
                if p - stride >= len(d):
-                    print('WARNING: no more examples to feed empty devices')
+                    # print('WARNING: no more examples to feed empty devices')
                    temp = []
                    return
                temp.append(d[p-stride:p])
            elif s == 'u' or s == 'unstack':
                assert len(d) <= dev_count, 'Tensor size on dim 0 must be less equal to dev_count when unstack is applied.'
                if p//stride > len(d):
-                    print('WARNING: no more examples to feed empty devices')
+                    # print('WARNING: no more examples to feed empty devices')
                    return
                temp.append(d[p//stride-1])
            elif s == 'c' or s == 'copy':
@@ -102,18 +102,19 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
                batch_buf.append(batch)
                flag_buf.append(flag)
            yield batch_buf, flag_buf
-        else: 
+        else:
            break
    queue.join()


+
 def decode_fake(nums, mask, bs):
    n_t = 0
-    for flag in mask: 
+    for flag in mask:
        if not flag:
            break
        n_t = n_t + 1
-    
+
    n_f = len(mask) - n_t
    p1 = nums - (n_t-1) * bs
    each_f = p1 / (n_f+1)

--- a/paddlepalm/downloader.py
+++ b/paddlepalm/downloader.py
-from _downloader import *
-
+from _downloader import *
\ No newline at end of file
--- a/paddlepalm/head/cls.py
+++ b/paddlepalm/head/cls.py
@@ -18,6 +18,7 @@ from paddle.fluid import layers
 from paddlepalm.head.base_head import Head
 import numpy as np
 import os
+import json


 class Classify(Head):
@@ -37,6 +38,7 @@ class Classify(Head):
        self._param_initializer = fluid.initializer.TruncatedNormal(
            scale=param_initializer_range)
        self._preds = []
+        self._probs = []

    @property
    def inputs_attrs(self):
@@ -51,7 +53,9 @@ class Classify(Head):
        if self._is_training:
            return {'loss': [[1], 'float32']}
        else:
-            return {'logits': [[-1, self.num_classes], 'float32']}
+            return {'logits': [[-1, self.num_classes], 'float32'],
+                    'probs': [[-1, self.num_classes], 'float32']}
+            

    def build(self, inputs, scope_name=''):
        sent_emb = inputs['backbone']['sentence_embedding']
@@ -70,22 +74,22 @@ class Classify(Head):
                initializer=self._param_initializer),
            bias_attr=fluid.ParamAttr(
                name=scope_name+"cls_out_b", initializer=fluid.initializer.Constant(0.)))
-
+        probs = fluid.layers.softmax(logits)
        if self._is_training:
-            inputs = fluid.layers.softmax(logits)
            loss = fluid.layers.cross_entropy(
-                input=inputs, label=label_ids)
+                input=probs, label=label_ids)
            loss = layers.mean(loss)
            return {"loss": loss}
        else:
-            return {"logits":logits}
+            return {"logits":logits,
+                    "probs":probs}

    def batch_postprocess(self, rt_outputs):
        if not self._is_training:
            logits = rt_outputs['logits']
-            preds = np.argmax(logits, -1)
-            self._preds.extend(preds.tolist())
-            return preds
+            probs = rt_outputs['probs']
+            self._preds.extend(logits.tolist())
+            self._probs.extend(probs.tolist())

    def epoch_postprocess(self, post_inputs, output_dir=None):
        # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs
@@ -94,9 +98,22 @@ class Classify(Head):
                for p in self._preds:
                    print(p)
            else:
-                with open(os.path.join(self._pred_output_path, 'predictions.json'), 'w') as writer:
+                with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer:
                    for p in self._preds:
                        writer.write(str(p)+'\n')
-                print('Predictions saved at '+os.path.join(self._pred_output_path, 'predictions.json'))
+                print('Predictions saved at '+os.path.join(output_dir, 'predictions.json'))
+
+    def epoch_postprocess(self, post_inputs, output_dir=None):
+        # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs
+        if not self._is_training:
+            if output_dir is None:
+                raise ValueError('argument output_dir not found in config. Please add it into config dict/file.')
+            with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer:
+                for i in range(len(self._preds)):
+                    label = 0 if self._preds[i][0] > self._preds[i][1] else 1
+                    result = {'index': i, 'label': label, 'logits': self._preds[i], 'probs': self._preds[i]}
+                    result = json.dumps(result)
+                    writer.write(result+'\n')
+            print('Predictions saved at '+os.path.join(output_dir, 'predictions.json'))

                
--- a/paddlepalm/head/match.py
+++ b/paddlepalm/head/match.py
@@ -83,8 +83,8 @@ class Match(Head):
            if self._learning_strategy=='paiwise':
                return {"probs": [[-1, 1], 'float32']}
            else:
-                return {"logits": [[-1, 2], 'float32'],
-                        "probs": [[-1, 2], 'float32']}
+                return {"logits": [[-1, self._num_classes], 'float32'],
+                        "probs": [[-1, self._num_classes], 'float32']}

    def build(self, inputs, scope_name=""):

@@ -184,6 +184,6 @@ class Match(Head):
                    elif self._learning_strategy == 'pairwise':
                        label = 0 if self._preds[i][0] < 0.5 else 1
                        result = {'index': i, 'label': label, 'probs': self._preds[i][0]}
-                    result = json.dumps(result)
+                    result = json.dumps(result, ensure_ascii=False)
                    writer.write(result+'\n')
-            print('Predictions saved at '+os.path.join(output_dir, 'predictions.json'))
\ No newline at end of file
+            print('Predictions saved at '+os.path.join(output_dir, 'predictions.json'))
--- a/paddlepalm/head/mrc.py
+++ b/paddlepalm/head/mrc.py
@@ -360,14 +360,14 @@ def _write_predictions(all_examples, all_features, all_results, n_best_size,
        all_nbest_json[example.qas_id] = nbest_json

    with open(output_prediction_file, "w") as writer:
-        writer.write(json.dumps(all_predictions, indent=4) + "\n")
+        writer.write(json.dumps(all_predictions, indent=4, ensure_ascii=False) + "\n")

    with open(output_nbest_file, "w") as writer:
-        writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
+        writer.write(json.dumps(all_nbest_json, indent=4, ensure_ascii=False) + "\n")

    if with_negative:
        with open(output_null_log_odds_file, "w") as writer:
-            writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
+            writer.write(json.dumps(scores_diff_json, indent=4, ensure_ascii=False) + "\n")


 def _get_final_text(pred_text, orig_text, do_lower_case, verbose):

--- a/paddlepalm/lr_sched/slanted_triangular_schedualer.py
+++ b/paddlepalm/lr_sched/slanted_triangular_schedualer.py
-
-from paddlepalm.lr_sched.schedualer import Schedualer
+from paddlepalm.lr_sched.base_schedualer import Schedualer
 from paddle import fluid

 class TriangularSchedualer(Schedualer):
@@ -14,7 +13,7 @@ class TriangularSchedualer(Schedualer):
            num_train_steps: the number of train steps.

        """
-        BaseSchedualer.__init__(self)
+        Schedualer.__init__(self)
        assert num_train_steps > warmup_steps > 0
        self.warmup_steps = warmup_steps
        self.num_train_steps = num_train_steps

--- a/paddlepalm/lr_sched/warmup_schedualer.py
+++ b/paddlepalm/lr_sched/warmup_schedualer.py

-from paddlepalm.lr_sched.schedualer import Schedualer
+from paddlepalm.lr_sched.base_schedualer import Schedualer
 import paddle.fluid as fluid

 def WarmupSchedualer(Schedualer):

--- a/paddlepalm/optimizer/adam.py
+++ b/paddlepalm/optimizer/adam.py
@@ -26,7 +26,7 @@ class Adam(Optimizer):

    def __init__(self, loss_var, lr, lr_schedualer=None):

-        BaseOptimizer.__init__(self, loss_var, lr, lr_schedualer=None)
+        Optimizer.__init__(self, loss_var, lr, lr_schedualer=None)

        self._loss = loss_var
        self._lr = lr

--- a/paddlepalm/reader/utils/reader4ernie.py
+++ b/paddlepalm/reader/utils/reader4ernie.py
@@ -42,6 +42,12 @@ if six.PY3:
    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')

+if sys.version[0] == '2':
+    reload(sys)
+    sys.setdefaultencoding('utf-8')
+else:
+    import importlib
+    importlib.reload(sys)

 def csv_reader(fd, delimiter='\t'):
    def gen():
@@ -338,16 +344,8 @@ class Reader(object):
        def f():
            for i in wrapper():
                yield i
-
-        # def f():
-        #     try:
-        #         for i in wrapper():
-        #             yield i
-        #     except Exception as e:
-        #         import traceback
-        #         traceback.print_exc()
-
        return f
+        # return wrapper


 class MaskLMReader(Reader):
@@ -478,11 +476,14 @@ class MaskLMReader(Reader):
                        return_input_mask=True,
                        return_max_len=False,
                        return_num_token=False,
-                        dev_count=1)
+                        dev_count=dev_count)

                    # yield batch
                    for piece in palm.distribute.yield_pieces(batch_data, ['s', 's', 's', 's', 's', 'u', 'u'], batch_size):
                        yield piece
+                    # # ds = ['s'] * len(batch_data)
+                    # for piece in palm.distribute.yield_pieces(batch_data, ['s'] * 7, batch_size):
+                    #     yield piece

        return wrapper

@@ -725,7 +726,8 @@ class MRCReader(Reader):

    def _read_json(self, input_file, is_training):
        examples = []
-        with open(input_file, "r", encoding='utf8') as f:
+        with open(input_file, "r", encoding='utf-8') as f:
+           # f = f.read().decode(encoding='gbk').encode(encoding='utf-8')
            input_data = json.load(f)["data"]
            for entry in input_data:
                for paragraph in entry["paragraphs"]:

--- a/paddlepalm/trainer.py
+++ b/paddlepalm/trainer.py
@@ -47,6 +47,11 @@ class Trainer(object):
        self._task_head = None
        self._pred_head = None

+        self._train_reader = None
+        self._predict_reader = None
+        self._train_iterator = None
+        self._predict_iterator = None
+
        self._train_init = False
        self._predict_init = False

@@ -55,7 +60,7 @@ class Trainer(object):
        # if save_predict_model:
        #     self._save_predict_model = True
        #     assert pred_head is not None, "pred_head is required to save predict model."
-        #     self._pred_reader = reader.clone(phase='pred')
+        #     self._pred_reader = reader.clone(phase='predict')
        # else:
        #     assert pred_head is None, "You should set save_predict_model as True, or the pred_head is invalid." 
        #     self._save_predict_model = False
@@ -354,14 +359,20 @@ class Trainer(object):
        # load data

        self._check_phase(phase)
-        assert self._shape_and_dtypes is not None or self._pred_shape_and_dtypes is not None, "You need to build_forward or build_predict_head first to prepare input features."
+        if phase=='train':
+            assert self._shape_and_dtypes is not None, "You need to build_forward or build_predict_head first to prepare input features."
+        else:
+            assert self._pred_shape_and_dtypes is not None, "You need to build_forward     or build_predict_head first to prepare input features."

        # 这里不确定是否要向上取整，需确认
        # tail = self._num_examples % batch_size > 0
        # self._steps_pur_epoch = self._num_examples // batch_size + 1 if tail else 0
+        
        batch_size = reader._batch_size
+
        self._num_epochs = reader.num_epochs
        if phase == 'train':
+            self._train_reader = reader
            self._steps_pur_epoch = reader.num_examples // batch_size
            shape_and_dtypes = self._shape_and_dtypes
            name_to_position = self._name_to_position
@@ -374,6 +385,7 @@ class Trainer(object):
            reader_helper.check_io(self._task_head.inputs_attrs['reader'], reader.outputs_attr, in_name='task_head(reader)', out_name='reader(train)')
            reader_helper.check_io(self._task_head.inputs_attrs['backbone'], self._backbone.outputs_attr, in_name='task_head(backbone, train)', out_name='backbone')
        elif phase == 'predict':
+            self._predict_reader = reader
            tail = self._num_examples % batch_size > 0
            self._pred_steps_pur_epoch = reader.num_examples // batch_size + 1 if tail else 0
            shape_and_dtypes = self._pred_shape_and_dtypes
@@ -383,7 +395,7 @@ class Trainer(object):
            self._pred_num_examples = reader.num_examples
            reader_helper.check_io(self._pred_backbone.inputs_attr, reader.outputs_attr, in_name='backbone', out_name='reader(predict)')
            reader_helper.check_io(self._pred_head.inputs_attrs['reader'], reader.outputs_attr, in_name='task_head(reader)', out_name='reader(predict)')
-            reader_helper.check_io(inst._pred_head.inputs_attrs['backbone'], self._pred_backbone.outputs_attr, in_name='task_head(backbone, predict)', out_name='backbone')
+            reader_helper.check_io(self._pred_head.inputs_attrs['backbone'], self._pred_backbone.outputs_attr, in_name='task_head(backbone, predict)', out_name='backbone')
        else:
            raise NotImplementedError()
            
@@ -405,13 +417,13 @@ class Trainer(object):
        if gpu_dev_count > 1:
            distribute_feeder_fn = data_feeder(iterator_fn, feed_batch_process_fn)
        else:
-            distribute_feeder_fn = iterator_fn
+            distribute_feeder_fn = iterator_fn()

        if phase == 'train':
-            self._train_reader = distribute_feeder_fn()
+            self._train_iterator = distribute_feeder_fn
            self._feed_batch_process_fn = feed_batch_process_fn
        elif phase == 'predict':
-            self._predict_reader = distribute_feeder_fn()
+            self._predict_iterator = distribute_feeder_fn
            self._pred_feed_batch_process_fn = feed_batch_process_fn
        # return distribute_feeder_fn()

@@ -439,6 +451,7 @@ class Trainer(object):
            saver.init_pretraining_params(
                self._exe,
                model_path,
+                convert=False,
                main_program=self._train_init_prog,
                strict=True)
        # elif phase == 'predict':
@@ -447,13 +460,28 @@ class Trainer(object):
            saver.init_pretraining_params(
                self._exe,
                model_path,
+                convert=False,
                main_program=self._pred_init_prog,
                strict=True)
        else:
            raise Exception("model not found. You should at least build_forward or build_predict_forward to load its checkpoint.")
            
-    def load_predict_model(self, model_path):
-        raise NotImplementedError()
+    def load_predict_model(self, model_path, convert=False):
+        """
+        load pretrain models(backbone) for training.
+
+        Args:
+            model_path: the path of saved pretrained parameters.
+        """
+
+        assert self._pred_prog is not None, "training graph not found. You should at least build_forward to load its pretrained parameters."
+
+        saver.init_pretraining_params(
+            self._exe,
+            model_path,
+            convert=convert,
+            main_program=self._pred_prog)
+        # raise NotImplementedError()

    def load_pretrain(self, model_path, convert=False):
        """
@@ -525,7 +553,8 @@ class Trainer(object):
        Args:
            print_steps: int. Logging frequency of training message, e.g., current step, loss and speed.
        """
-        iterator = self._train_reader
+        
+        iterator = self._train_iterator
        self._distribute_train_prog = fluid.CompiledProgram(self._train_prog).with_data_parallel(loss_name=self._loss_var.name)

        # if save_path is not None or save_steps is not None:
@@ -547,7 +576,7 @@ class Trainer(object):
            rt_outputs = self.train_one_step(feed)
            # if gpu_dev_count > 1:
            #     feed, mask = feed
-            # rt_outputs = self.exe.run(self._train_prog, feed=feed, fetch_list=self._fetch_list)
+            # rt_outputs = self._exe.run(self._train_prog, feed=feed, fetch_list=self._fetch_list)
            # print(rt_outputs)
            # print(len(rt_outputs))
            # if gpu_dev_count > 1:
@@ -559,8 +588,6 @@ class Trainer(object):
            task_rt_outputs = {k[len(self.name+'.'):]: v for k,v in rt_outputs.items() if k.startswith(self.name+'.')}
            self._task_head.batch_postprocess(task_rt_outputs)

-            # if self._save_predict_model and self._cur_train_step % save_steps == 0:
-            #     self.save(save_path, suffix='.step'+str(self._cur_train_steps))

            if print_steps > 0 and self._cur_train_step % print_steps == 0:
                loss = rt_outputs[self.name+'.loss']
@@ -570,10 +597,10 @@ class Trainer(object):
                time_cost = time_end - time_begin

                print("step {}/{} (epoch {}), loss: {:.3f}, speed: {:.2f} steps/s".format(
-                       (self._cur_train_step-1) % self._steps_pur_epoch + 1, self._steps_pur_epoch, self._cur_train_epoch,
+                       (self._cur_train_step-1) % self._steps_pur_epoch + 1 , self._steps_pur_epoch, self._cur_train_epoch,
                       loss, print_steps / time_cost))
-                time_begin = time.time()
-
+                time_begin = time.time() 
+                self._check_save()
            # if cur_task.train_finish and cur_task.cur_train_step + cur_task.cur_train_epoch * cur_task.steps_pur_epoch == cur_task.expected_train_steps:
            #     print(cur_task.name+': train finished!')
            #     cur_task.save()
@@ -583,7 +610,6 @@ class Trainer(object):
        # save_path = os.path.join(main_conf['save_path'], 'ckpt',
        #                          "step_" + str(global_step))
        # fluid.io.save_persistables(self.exe, save_path, saver_program)
-        # print('checkpoint has been saved at '+save_path)

        # print("ALL tasks train finished, exiting...")
        
@@ -595,18 +621,18 @@ class Trainer(object):
            output_dir: str. The path to save prediction results, default is None. If set as None, the results would output to screen directly. 
            print_steps: int. Logging frequency of predicting message, e.g., current progress and speed.
        """
-        iterator = self._predict_reader
+        iterator = self._predict_iterator
        self._distribute_pred_prog = fluid.CompiledProgram(self._pred_prog).with_data_parallel()

        if output_dir is not None and not os.path.exists(output_dir):
            os.makedirs(output_dir)

        time_begin = time.time()
+        
        cur_predict_step = 0
        for feed in iterator:
            rt_outputs = self.predict_one_batch(feed)
            # rt_outputs = {k[len(self.name+'.'):]: v for k,v in rt_outputs.items() if k.startswith(self.name+'.')}
-            # print(rt_outputs)
            self._pred_head.batch_postprocess(rt_outputs)

            cur_predict_step += 1
@@ -621,7 +647,7 @@ class Trainer(object):
                time_begin = time.time()

        if self._pred_head.epoch_inputs_attrs:
-            reader_outputs = self._pred_reader.get_epoch_outputs()
+            reader_outputs = self._predict_reader.get_epoch_outputs()
        else:
            reader_outputs = None

@@ -691,7 +717,7 @@ class Trainer(object):
        if gpu_dev_count > 1:
            feed, mask = batch
            rt_outputs = exe.run(distribute_train_prog, feed=feed, fetch_list=fetch_list)
-            num_fakes = decode_fake(len(rt_outputs[0]), mask, self._batch_size)
+            num_fakes = decode_fake(len(rt_outputs[0]), mask, self._train_batch_size)
            for _ in range(num_fakes):
                for item in rt_outputs:
                    item.pop()
@@ -702,14 +728,13 @@ class Trainer(object):
        rt_outputs = {k:v for k,v in zip(self._fetch_names, rt_outputs)}
        self._cur_train_step += 1
        self._cur_train_epoch = (self._cur_train_step-1) // self._steps_pur_epoch
-        self._check_save()
        return rt_outputs

    def predict_one_batch(self, batch):
        if gpu_dev_count > 1:
            feed, mask = batch
-            rt_outputs = self.exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._pred_fetch_list)
-            num_fakes = decode_fake(len(rt_outputs[0]), mask, self._batch_size)
+            rt_outputs = self._exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._pred_fetch_list)
+            num_fakes = decode_fake(len(rt_outputs[0]), mask, self._predict_batch_size)
            for _ in range(num_fakes):
                for item in rt_outputs:
                    item.pop()
@@ -720,6 +745,8 @@ class Trainer(object):
        rt_outputs = {k:v for k,v in zip(self._pred_fetch_name_list, rt_outputs)}
        return rt_outputs

+
+
    @property
    def name(self):
        return self._name

--- a/script/convert_params.sh
+++ b/script/convert_params.sh
-
-#!/bin/sh
-if [[ $# != 1 ]]; then
-    echo "usage: bash convert_params.sh <params_dir>"
-    exit 1
-fi
-
-if [[ -f $1/__palminfo__ ]]; then
-    echo "already converted."
-    exit 0
-fi
-
-echo "converting..."
-if [[ -d $1/params ]]; then
-    cd $1/params
-else
-    cd $1
-fi
-
-mkdir .palm.backup
-
-for file in $(ls *)
-    do cp $file .palm.backup; mv $file "__paddlepalm_"$file
-done
-tar -cf __rawmodel__ .palm.backup/*
-rm .palm.backup/*
-mv __rawmodel__ .palm.backup
-# find . ! -name '__rawmodel__' -exec rm {} +
-tar -cf __palmmodel__ __paddlepalm_*
-touch __palminfo__
-ls __paddlepalm_* > __palminfo__
-rm __paddlepalm_*
-
-cd - >/dev/null
-
-echo "done!"
-
--- a/script/download_pretrain_backbone.sh
+++ b/script/download_pretrain_backbone.sh
-#!/bin/bash
-
-set -e
-
-if [[ $# != 1 ]]; then
-    echo "Usage: bash download_pretrain.sh <bert|ernie>"
-    exit 1
-fi
-
-if [[ $1 == 'bert' ]]; then
-    name="bert"
-    link="https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz"
-    packname="uncased_L-24_H-1024_A-16.tar.gz"
-    dirname="uncased_L-24_H-1024_A-16"
-elif [[ $1 == 'ernie' ]]; then
-    name="ernie"
-    link="https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz"
-    packname="ERNIE_Large_en_stable-2.0.0.tar.gz"
-else
-    echo "$1 is currently not supported."
-    exit 1
-fi
-
-if [[ ! -d pretrain_model ]]; then
-    mkdir pretrain_model
-fi
-
-cd pretrain_model
-mkdir $name
-cd $name
-echo "downloading ${name}..."
-wget --no-check-certificate $link
-echo "decompressing..."
-tar -zxf $packname
-rm -rf $packname
-if [[ $dirname != "" ]]; then
-    mv $dirname/* .
-    rm -rf $dirname
-fi
-
-cd ../..
-
-
--- a/script/recover_params.sh
+++ b/script/recover_params.sh
-
-#!/bin/sh
-if [[ $# != 1 ]]; then
-    echo "usage: bash recover_params.sh <params_dir>"
-    exit 1
-fi
-
-if [[ ! -d $1 ]]; then
-    echo "$1 not found."
-    exit 1
-fi
-
-if [[ ! -f $1/__palmmodel__ ]]; then
-    echo "paddlepalm model not found."
-    exit 1
-fi
-
-echo "recovering..."
-if [[ -d $1/params ]]; then
-    cd $1/params
-else
-    cd $1
-fi
-rm __palm*
-mv .palm.backup/__rawmodel__ .
-rm -rf .palm.backup
-tar -xf __rawmodel__
-mv .palm.backup/* .
-rm __rawmodel__
-
-rm -rf .palm.backup
-cd - >/dev/null
-
--- a/test/test3/run.py
+++ b/test/test3/run.py
@@ -24,25 +24,47 @@ if __name__ == '__main__':

    # 创建该分类任务的reader，由诸多参数控制数据集读入格式、文件数量、预处理规则等
    cls_reader = palm.reader.ClassifyReader(vocab_path, max_seqlen)
+<<<<<<< HEAD:test/test2/run.py
+    cls_reader2 = palm.reader.ClassifyReader(vocab_path, max_seqlen)
+=======
    predict_cls_reader = palm.reader.ClassifyReader(vocab_path, max_seqlen, phase='predict')
+>>>>>>> remotes/upstream/r0.3-api:test/test3/run.py
    print(cls_reader.outputs_attr)
    print(predict_cls_reader.outputs_attr)
    # 不同的backbone会对任务reader有不同的特征要求，例如对于分类任务，基本的输入feature为token_ids和label_ids，但是对于BERT，还要求从输入中额外提取position、segment、input_mask等特征，因此经过register后，reader会自动补充backbone所要求的字段
    cls_reader.register_with(ernie)
+    cls_reader2.register_with(ernie)
    print(cls_reader.outputs_attr)
+<<<<<<< HEAD:test/test2/run.py
+
+    print("preparing data...")
+    print(cls_reader.num_examples)
+    cls_reader.load_data(train_file, batch_size)
+    cls_reader2.load_data(train_file, batch_size)
+=======
    print(predict_cls_reader.outputs_attr)

    print("preparing data...")
    print(cls_reader.num_examples)
    cls_reader.load_data(train_file, batch_size, num_epochs=num_epochs)
+>>>>>>> remotes/upstream/r0.3-api:test/test3/run.py
    print(cls_reader.num_examples)
    print('done!')

    # 创建任务头（task head），如分类、匹配、机器阅读理解等。每个任务头有跟该任务相关的必选/可选参数。注意，任务头与reader是解耦合的，只要任务头依赖的数据集侧的字段能被reader提供，那么就是合法的
    cls_head = palm.head.Classify(4, 1024, 0.1)
+<<<<<<< HEAD:test/test2/run.py
+    cls_head2 = palm.head.Classify(4, 1024, 0.1)
+
+    # 根据reader和任务头来创建一个训练器trainer，trainer代表了一个训练任务，内部维护着训练进程、和任务的关键信息，并完成合法性校验，该任务的模型保存、载入等相关规则控制
+    trainer = palm.Trainer('cls')
+    trainer2 = palm.Trainer('senti_cls')
+    mh_trainer = palm.MultiHeadTrainer([trainer, trainer2])
+=======

    # 根据reader和任务头来创建一个训练器trainer，trainer代表了一个训练任务，内部维护着训练进程、和任务的关键信息，并完成合法性校验，该任务的模型保存、载入等相关规则控制
    trainer = palm.Trainer('senti_cls')
+>>>>>>> remotes/upstream/r0.3-api:test/test3/run.py

    # match4mrqa.reuse_head_with(mrc4mrqa)

@@ -50,6 +72,14 @@ if __name__ == '__main__':
    # output_vars = ernie.build(data_vars)
    # cls_head.build({'backbone': output_vars, 'reader': data_vars})

+<<<<<<< HEAD:test/test2/run.py
+    loss_var = mh_trainer.build_forward(ernie, [cls_head, cls_head2])
+
+    n_steps = cls_reader.num_examples * num_epochs // batch_size
+    warmup_steps = int(0.1 * n_steps)
+    print(warmup_steps)
+    sched = palm.lr_sched.TriangularSchedualer(warmup_steps, n_steps)
+=======
    loss_var = trainer.build_forward(ernie, cls_head)

    # controller.build_forward()
@@ -60,15 +90,22 @@ if __name__ == '__main__':
    # print(warmup_steps)
    # sched = palm.lr_sched.TriangularSchedualer(warmup_steps, n_steps)
    sched = None
+>>>>>>> remotes/upstream/r0.3-api:test/test3/run.py

    adam = palm.optimizer.Adam(loss_var, lr, sched)

-    trainer.build_backward(optimizer=adam, weight_decay=0.001)
-
-    trainer.random_init_params()
-    trainer.load_pretrain('pretrain/ernie/params')
+    mh_trainer.build_backward(optimizer=adam, weight_decay=0.001)
+    
+    # mh_trainer.random_init_params()
+    mh_trainer.load_pretrain('pretrain/ernie/params')

    # trainer.train(iterator_fn, print_steps=1, save_steps=5, save_path='outputs', save_type='ckpt,predict')
+<<<<<<< HEAD:test/test2/run.py
+    mh_trainer.fit_readers_with_mixratio([cls_reader, cls_reader2], 'cls', 2)
+    mh_trainer.train(print_steps=1)
+    # trainer.save()
+
+=======
    trainer.fit_reader(cls_reader)
    trainer.train(print_steps=1)
    # trainer.save()
@@ -114,3 +151,4 @@ if __name__ == '__main__':
    # controller.pred('mrqa', inference_model_dir='output_model/secondrun/mrqa/infer_model')


+>>>>>>> remotes/upstream/r0.3-api:test/test3/run.py