Merdev (#143)

* Add Reading Comprehension Taskwq * Add Reading Comprehension Task * Add Reading Comprehension Task * dd Reading Comprehension Task * Add the reading comprehension task * add reading comprehension task * Add reading comprehension * Add reading comprehension task * add reading comprehension task * Add reading comprehension task * Add reading comprehension task * Fix tokenization for bert chn * Add GLUE and XNLI, modify text-classification demo test=develop (#94) * Add GLUE and XNLI, modify text-classification demo * add hub_server rw lock * Support GLUE (#108) * Support GLUE * Support MNLI_m and MNLI_mm * restore checkpoint.py * Modify for review * Fix the bug whether module is valid or not * Enhancetask (#122) * accelerate predict * Add autoft (#127) * add autofinetune * update paddlehub required libs * Add copyright * Split task and add regression demo (#130) * split task * add regression demo * implement CombinedStrategy (#128) * implement CombinedStrategy * optimize some details (#131) * optimize some details * optimize some details (#133) * support cv_reader * use logger instead of print * add warning * remove todo * add gradual_unfreeze frz_blocks * replace the visualization toolkit from visualdl to tb-paddle (#139) * replace the logging toolkit from visualdl to tb-paddle. * modified: requirements.txt * modified: paddlehub/finetune/task/basic_task.py * update autofinetune (#135) * update autofinetune (add modelbased evaluator) * support cpu count * fix ci

Merdev (#143)
* Add Reading Comprehension Taskwq * Add Reading Comprehension Task * Add Reading Comprehension Task * dd Reading Comprehension Task * Add the reading comprehension task * add reading comprehension task * Add reading comprehension * Add reading comprehension task * add reading comprehension task * Add reading comprehension task * Add reading comprehension task * Fix tokenization for bert chn * Add GLUE and XNLI, modify text-classification demo test=develop (#94) * Add GLUE and XNLI, modify text-classification demo * add hub_server rw lock * Support GLUE (#108) * Support GLUE * Support MNLI_m and MNLI_mm * restore checkpoint.py * Modify for review * Fix the bug whether module is valid or not * Enhancetask (#122) * accelerate predict * Add autoft (#127) * add autofinetune * update paddlehub required libs * Add copyright * Split task and add regression demo (#130) * split task * add regression demo * implement CombinedStrategy (#128) * implement CombinedStrategy * optimize some details (#131) * optimize some details * optimize some details (#133) * support cv_reader * use logger instead of print * add warning * remove todo * add gradual_unfreeze frz_blocks * replace the visualization toolkit from visualdl to tb-paddle (#139) * replace the logging toolkit from visualdl to tb-paddle. * modified: requirements.txt * modified: paddlehub/finetune/task/basic_task.py * update autofinetune (#135) * update autofinetune (add modelbased evaluator) * support cpu count * fix ci
7c600b66 · kinghuin · wuzewu · e5cb716b · 7c600b66 · 7c600b66
51 changed file
--- a/demo/image-classification/img_classifier.py
+++ b/demo/image-classification/img_classifier.py
@@ -10,13 +10,13 @@ import numpy as np
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
 parser.add_argument("--num_epoch",          type=int,               default=1,                          help="Number of epoches for fine-tuning.")
-parser.add_argument("--use_gpu",            type=ast.literal_eval,  default=False,                      help="Whether use GPU for fine-tuning.")
+parser.add_argument("--use_gpu",            type=ast.literal_eval,  default=True,                      help="Whether use GPU for fine-tuning.")
 parser.add_argument("--checkpoint_dir",     type=str,               default="paddlehub_finetune_ckpt",  help="Path to save log data.")
 parser.add_argument("--batch_size",         type=int,               default=16,                         help="Total examples' number in batch for training.")
 parser.add_argument("--module",             type=str,               default="resnet50",                 help="Module used as feature extractor.")
 parser.add_argument("--dataset",            type=str,               default="flowers",                  help="Dataset to finetune.")
-parser.add_argument("--use_pyreader",       type=ast.literal_eval,  default=False,                      help="Whether use pyreader to feed data.")
+parser.add_argument("--use_pyreader",       type=ast.literal_eval,  default=True,                      help="Whether use pyreader to feed data.")
-parser.add_argument("--use_data_parallel",  type=ast.literal_eval,  default=False,                      help="Whether use data parallel.")
+parser.add_argument("--use_data_parallel",  type=ast.literal_eval,  default=True,                      help="Whether use data parallel.")
 # yapf: enable.
 module_map = {

--- a/demo/reading-comprehension/evaluate_v1.py
+++ b/demo/reading-comprehension/evaluate_v1.py
+""" Official evaluation script for v1.1 of the SQuAD dataset. """
+from __future__ import print_function
+from collections import Counter
+import string
+import re
+import argparse
+import json
+import sys
+def normalize_answer(s):
+    """Lower text and remove punctuation, articles and extra whitespace."""
+    def remove_articles(text):
+        return re.sub(r'\b(a|an|the)\b', ' ', text)
+    def white_space_fix(text):
+        return ' '.join(text.split())
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return ''.join(ch for ch in text if ch not in exclude)
+    def lower(text):
+        return text.lower()
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+def f1_score(prediction, ground_truth):
+    prediction_tokens = normalize_answer(prediction).split()
+    ground_truth_tokens = normalize_answer(ground_truth).split()
+    common = Counter(prediction_tokens) & Counter(ground_truth_tokens)
+    num_same = sum(common.values())
+    if num_same == 0:
+        return 0
+    precision = 1.0 * num_same / len(prediction_tokens)
+    recall = 1.0 * num_same / len(ground_truth_tokens)
+    f1 = (2 * precision * recall) / (precision + recall)
+    return f1
+def exact_match_score(prediction, ground_truth):
+    return (normalize_answer(prediction) == normalize_answer(ground_truth))
+def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
+    scores_for_ground_truths = []
+    for ground_truth in ground_truths:
+        score = metric_fn(prediction, ground_truth)
+        scores_for_ground_truths.append(score)
+    return max(scores_for_ground_truths)
+def evaluate(dataset, predictions):
+    f1 = exact_match = total = 0
+    for article in dataset:
+        for paragraph in article['paragraphs']:
+            for qa in paragraph['qas']:
+                total += 1
+                if qa['id'] not in predictions:
+                    message = 'Unanswered question ' + qa['id'] + \
+                              ' will receive score 0.'
+                    print(message, file=sys.stderr)
+                    continue
+                ground_truths = list(map(lambda x: x['text'], qa['answers']))
+                prediction = predictions[qa['id']]
+                exact_match += metric_max_over_ground_truths(
+                    exact_match_score, prediction, ground_truths)
+                f1 += metric_max_over_ground_truths(f1_score, prediction,
+                                                    ground_truths)
+    exact_match = 100.0 * exact_match / total
+    f1 = 100.0 * f1 / total
+    return {'exact_match': exact_match, 'f1': f1}
+if __name__ == '__main__':
+    expected_version = '1.1'
+    parser = argparse.ArgumentParser(
+        description='Evaluation for SQuAD ' + expected_version)
+    parser.add_argument('dataset_file', help='Dataset file')
+    parser.add_argument('prediction_file', help='Prediction File')
+    args = parser.parse_args()
+    with open(args.dataset_file) as dataset_file:
+        dataset_json = json.load(dataset_file)
+        if (dataset_json['version'] != expected_version):
+            print(
+                'Evaluation expects v-' + expected_version +
+                ', but got dataset with v-' + dataset_json['version'],
+                file=sys.stderr)
+        dataset = dataset_json['data']
+    print(args.prediction_file)
+    with open(args.prediction_file) as prediction_file:
+        predictions = json.load(prediction_file)
+    print(json.dumps(evaluate(dataset, predictions)))
--- a/demo/reading-comprehension/evaluate_v2.py
+++ b/demo/reading-comprehension/evaluate_v2.py
+"""Official evaluation script for SQuAD version 2.0.
+In addition to basic functionality, we also compute additional statistics and
+plot precision-recall curves if an additional na_prob.json file is provided.
+This file is expected to map question ID's to the model's predicted probability
+that a question is unanswerable.
+"""
+import argparse
+import collections
+import json
+import numpy as np
+import os
+import re
+import string
+import sys
+def make_qid_to_has_ans(dataset):
+    qid_to_has_ans = {}
+    for article in dataset:
+        for p in article['paragraphs']:
+            for qa in p['qas']:
+                qid_to_has_ans[qa['id']] = bool(qa['answers'])
+    return qid_to_has_ans
+def normalize_answer(s):
+    """Lower text and remove punctuation, articles and extra whitespace."""
+    def remove_articles(text):
+        regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
+        return re.sub(regex, ' ', text)
+    def white_space_fix(text):
+        return ' '.join(text.split())
+    def remove_punc(text):
+        exclude = set(string.punctuation)
+        return ''.join(ch for ch in text if ch not in exclude)
+    def lower(text):
+        return text.lower()
+    return white_space_fix(remove_articles(remove_punc(lower(s))))
+def get_tokens(s):
+    if not s: return []
+    return normalize_answer(s).split()
+def compute_exact(a_gold, a_pred):
+    return int(normalize_answer(a_gold) == normalize_answer(a_pred))
+def compute_f1(a_gold, a_pred):
+    gold_toks = get_tokens(a_gold)
+    pred_toks = get_tokens(a_pred)
+    common = collections.Counter(gold_toks) & collections.Counter(pred_toks)
+    num_same = sum(common.values())
+    if len(gold_toks) == 0 or len(pred_toks) == 0:
+        # If either is no-answer, then F1 is 1 if they agree, 0 otherwise
+        return int(gold_toks == pred_toks)
+    if num_same == 0:
+        return 0
+    precision = 1.0 * num_same / len(pred_toks)
+    recall = 1.0 * num_same / len(gold_toks)
+    f1 = (2 * precision * recall) / (precision + recall)
+    return f1
+def get_raw_scores(dataset, preds):
+    exact_scores = {}
+    f1_scores = {}
+    for article in dataset:
+        for p in article['paragraphs']:
+            for qa in p['qas']:
+                qid = qa['id']
+                gold_answers = [
+                    a['text'] for a in qa['answers']
+                    if normalize_answer(a['text'])
+                ]
+                if not gold_answers:
+                    # For unanswerable questions, only correct answer is empty string
+                    gold_answers = ['']
+                if qid not in preds:
+                    print('Missing prediction for %s' % qid)
+                    continue
+                a_pred = preds[qid]
+                # Take max over all gold answers
+                exact_scores[qid] = max(
+                    compute_exact(a, a_pred) for a in gold_answers)
+                f1_scores[qid] = max(
+                    compute_f1(a, a_pred) for a in gold_answers)
+    return exact_scores, f1_scores
+def apply_no_ans_threshold(scores, na_probs, qid_to_has_ans, na_prob_thresh):
+    new_scores = {}
+    for qid, s in scores.items():
+        pred_na = na_probs[qid] > na_prob_thresh
+        if pred_na:
+            new_scores[qid] = float(not qid_to_has_ans[qid])
+        else:
+            new_scores[qid] = s
+    return new_scores
+def make_eval_dict(exact_scores, f1_scores, qid_list=None):
+    if not qid_list:
+        total = len(exact_scores)
+        return collections.OrderedDict([
+            ('exact', 100.0 * sum(exact_scores.values()) / total),
+            ('f1', 100.0 * sum(f1_scores.values()) / total),
+            ('total', total),
+        ])
+    else:
+        total = len(qid_list)
+        return collections.OrderedDict([
+            ('exact', 100.0 * sum(exact_scores[k] for k in qid_list) / total),
+            ('f1', 100.0 * sum(f1_scores[k] for k in qid_list) / total),
+            ('total', total),
+        ])
+def merge_eval(main_eval, new_eval, prefix):
+    for k in new_eval:
+        main_eval['%s_%s' % (prefix, k)] = new_eval[k]
+def find_all_best_thresh(main_eval, preds, exact_raw, f1_raw, na_probs,
+                         qid_to_has_ans):
+    best_exact, exact_thresh = find_best_thresh(preds, exact_raw, na_probs,
+                                                qid_to_has_ans)
+    best_f1, f1_thresh = find_best_thresh(preds, f1_raw, na_probs,
+                                          qid_to_has_ans)
+    main_eval['best_exact'] = best_exact
+    main_eval['best_exact_thresh'] = exact_thresh
+    main_eval['best_f1'] = best_f1
+    main_eval['best_f1_thresh'] = f1_thresh
+def find_best_thresh(preds, scores, na_probs, qid_to_has_ans):
+    num_no_ans = sum(1 for k in qid_to_has_ans if not qid_to_has_ans[k])
+    cur_score = num_no_ans
+    best_score = cur_score
+    best_thresh = 0.0
+    qid_list = sorted(na_probs, key=lambda k: na_probs[k])
+    for i, qid in enumerate(qid_list):
+        if qid not in scores:
+            continue
+        if qid_to_has_ans[qid]:
+            diff = scores[qid]
+        else:
+            if preds[qid]:
+                diff = -1
+            else:
+                diff = 0
+        cur_score += diff
+        if cur_score > best_score:
+            best_score = cur_score
+            best_thresh = na_probs[qid]
+    return 100.0 * best_score / len(scores), best_thresh
--- a/demo/reading-comprehension/predict.py
+++ b/demo/reading-comprehension/predict.py
+#coding:utf-8
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Finetuning on classification task """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import argparse
+import ast
+import collections
+import json
+import io
+import math
+import numpy as np
+import os
+import six
+import sys
+import time
+import paddle
+import paddle.fluid as fluid
+import paddlehub as hub
+import evaluate_v1
+import evaluate_v2
+hub.common.logger.logger.setLevel("INFO")
+# yapf: disable
+parser = argparse.ArgumentParser(__doc__)
+parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
+parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False")
+parser.add_argument("--learning_rate", type=float, default=4e-5, help="Learning rate used to train with warmup.")
+parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
+parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
+parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint.")
+parser.add_argument("--result_dir", type=str, default=None, help="Directory to predicted results to be written.")
+parser.add_argument("--max_seq_len", type=int, default=384, help="Number of words of the longest seqence.")
+parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.")
+parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
+parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=True, help="Whether use data parallel.")
+parser.add_argument("--max_answer_length", type=int, default=30, help="Max answer length.")
+parser.add_argument("--n_best_size", type=int, default=20, help="The total number of n-best predictions to generate in the nbest_predictions.json output file.")
+parser.add_argument("--null_score_diff_threshold", type=float, default=0.0, help="If null_score - best_non_null is greater than the threshold predict null.")
+parser.add_argument("--version_2_with_negative", type=ast.literal_eval, default=False, help="If true, the SQuAD examples contain some that do not have an answer. If using squad v2.0, it should be set true.")
+args = parser.parse_args()
+# yapf: enable.
+def write_predictions(
+        all_examples,
+        all_features,
+        all_results,
+        output_prediction_file,
+        output_nbest_file,
+        output_null_log_odds_file,
+        n_best_size=20,
+        max_answer_length=30,
+        do_lower_case=True,
+        version_2_with_negative=False,
+        null_score_diff_threshold=0.0,
+):
+    """Write final predictions to the json file and log-odds of null if needed."""
+    print("Writing predictions to: %s" % (output_prediction_file))
+    print("Writing nbest to: %s" % (output_nbest_file))
+    example_index_to_features = collections.defaultdict(list)
+    for feature in all_features:
+        example_index_to_features[feature.example_index].append(feature)
+    unique_id_to_result = {}
+    for result in all_results:
+        unique_id_to_result[result.unique_id] = result
+    _PrelimPrediction = collections.namedtuple(  # pylint: disable=invalid-name
+        "PrelimPrediction", [
+            "feature_index", "start_index", "end_index", "start_logit",
+            "end_logit"
+        ])
+    all_predictions = collections.OrderedDict()
+    all_nbest_json = collections.OrderedDict()
+    scores_diff_json = collections.OrderedDict()
+    for (example_index, example) in enumerate(all_examples):
+        features = example_index_to_features[example_index]
+        prelim_predictions = []
+        # keep track of the minimum score of null start+end of position 0
+        score_null = 1000000  # large and positive
+        min_null_feature_index = 0  # the paragraph slice with min mull score
+        null_start_logit = 0  # the start logit at the slice with min null score
+        null_end_logit = 0  # the end logit at the slice with min null score
+        for (feature_index, feature) in enumerate(features):
+            result = unique_id_to_result[feature.unique_id]
+            start_indexes = get_best_indexes(result.start_logits, n_best_size)
+            end_indexes = get_best_indexes(result.end_logits, n_best_size)
+            # if we could have irrelevant answers, get the min score of irrelevant
+            if version_2_with_negative:
+                feature_null_score = result.start_logits[0] + result.end_logits[
+                    0]
+                if feature_null_score < score_null:
+                    score_null = feature_null_score
+                    min_null_feature_index = feature_index
+                    null_start_logit = result.start_logits[0]
+                    null_end_logit = result.end_logits[0]
+            for start_index in start_indexes:
+                for end_index in end_indexes:
+                    # We could hypothetically create invalid predictions, e.g., predict
+                    # that the start of the span is in the question. We throw out all
+                    # invalid predictions.
+                    if start_index >= len(feature.tokens):
+                        continue
+                    if end_index >= len(feature.tokens):
+                        continue
+                    if start_index not in feature.token_to_orig_map:
+                        continue
+                    if end_index not in feature.token_to_orig_map:
+                        continue
+                    if not feature.token_is_max_context.get(start_index, False):
+                        continue
+                    if end_index < start_index:
+                        continue
+                    length = end_index - start_index + 1
+                    if length > max_answer_length:
+                        continue
+                    prelim_predictions.append(
+                        _PrelimPrediction(
+                            feature_index=feature_index,
+                            start_index=start_index,
+                            end_index=end_index,
+                            start_logit=result.start_logits[start_index],
+                            end_logit=result.end_logits[end_index]))
+        if version_2_with_negative:
+            prelim_predictions.append(
+                _PrelimPrediction(
+                    feature_index=min_null_feature_index,
+                    start_index=0,
+                    end_index=0,
+                    start_logit=null_start_logit,
+                    end_logit=null_end_logit))
+        prelim_predictions = sorted(
+            prelim_predictions,
+            key=lambda x: (x.start_logit + x.end_logit),
+            reverse=True)
+        _NbestPrediction = collections.namedtuple(  # pylint: disable=invalid-name
+            "NbestPrediction", ["text", "start_logit", "end_logit"])
+        seen_predictions = {}
+        nbest = []
+        for pred in prelim_predictions:
+            if len(nbest) >= n_best_size:
+                break
+            feature = features[pred.feature_index]
+            if pred.start_index > 0:  # this is a non-null prediction
+                tok_tokens = feature.tokens[pred.start_index:(
+                    pred.end_index + 1)]
+                orig_doc_start = feature.token_to_orig_map[pred.start_index]
+                orig_doc_end = feature.token_to_orig_map[pred.end_index]
+                orig_tokens = example.doc_tokens[orig_doc_start:(
+                    orig_doc_end + 1)]
+                tok_text = " ".join(tok_tokens)
+                # De-tokenize WordPieces that have been split off.
+                tok_text = tok_text.replace(" ##", "")
+                tok_text = tok_text.replace("##", "")
+                # Clean whitespace
+                tok_text = tok_text.strip()
+                tok_text = " ".join(tok_text.split())
+                orig_text = " ".join(orig_tokens)
+                final_text = get_final_text(tok_text, orig_text, do_lower_case)
+                if final_text in seen_predictions:
+                    continue
+                seen_predictions[final_text] = True
+            else:
+                final_text = ""
+                seen_predictions[final_text] = True
+            nbest.append(
+                _NbestPrediction(
+                    text=final_text,
+                    start_logit=pred.start_logit,
+                    end_logit=pred.end_logit))
+        # if we didn't inlude the empty option in the n-best, inlcude it
+        if version_2_with_negative:
+            if "" not in seen_predictions:
+                nbest.append(
+                    _NbestPrediction(
+                        text="",
+                        start_logit=null_start_logit,
+                        end_logit=null_end_logit))
+        # In very rare edge cases we could have no valid predictions. So we
+        # just create a nonce prediction in this case to avoid failure.
+        if not nbest:
+            nbest.append(
+                _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
+        assert len(nbest) >= 1
+        total_scores = []
+        best_non_null_entry = None
+        for entry in nbest:
+            total_scores.append(entry.start_logit + entry.end_logit)
+            if not best_non_null_entry:
+                if entry.text:
+                    best_non_null_entry = entry
+        # debug
+        if best_non_null_entry is None:
+            print("Emmm..., sth wrong")
+        probs = compute_softmax(total_scores)
+        nbest_json = []
+        for (i, entry) in enumerate(nbest):
+            output = collections.OrderedDict()
+            output["text"] = entry.text
+            output["probability"] = probs[i]
+            output["start_logit"] = entry.start_logit
+            output["end_logit"] = entry.end_logit
+            nbest_json.append(output)
+        assert len(nbest_json) >= 1
+        if not version_2_with_negative:
+            all_predictions[example.qas_id] = nbest_json[0]["text"]
+        else:
+            # predict "" iff the null score - the score of best non-null > threshold
+            score_diff = score_null - best_non_null_entry.start_logit - (
+                best_non_null_entry.end_logit)
+            scores_diff_json[example.qas_id] = score_diff
+            if score_diff > null_score_diff_threshold:
+                all_predictions[example.qas_id] = ""
+            else:
+                all_predictions[example.qas_id] = best_non_null_entry.text
+        all_nbest_json[example.qas_id] = nbest_json
+    with open(output_prediction_file, "w") as writer:
+        writer.write(json.dumps(all_predictions, indent=4) + "\n")
+    with open(output_nbest_file, "w") as writer:
+        writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
+    if version_2_with_negative:
+        with open(output_null_log_odds_file, "w") as writer:
+            writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
+def get_final_text(pred_text, orig_text, do_lower_case):
+    """Project the tokenized prediction back to the original text."""
+    # When we created the data, we kept track of the alignment between original
+    # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So
+    # now `orig_text` contains the span of our original text corresponding to the
+    # span that we predicted.
+    #
+    # However, `orig_text` may contain extra characters that we don't want in
+    # our prediction.
+    #
+    # For example, let's say:
+    #   pred_text = steve smith
+    #   orig_text = Steve Smith's
+    #
+    # We don't want to return `orig_text` because it contains the extra "'s".
+    #
+    # We don't want to return `pred_text` because it's already been normalized
+    # (the SQuAD eval script also does punctuation stripping/lower casing but
+    # our tokenizer does additional normalization like stripping accent
+    # characters).
+    #
+    # What we really want to return is "Steve Smith".
+    #
+    # Therefore, we have to apply a semi-complicated alignment heruistic between
+    # `pred_text` and `orig_text` to get a character-to-charcter alignment. This
+    # can fail in certain cases in which case we just return `orig_text`.
+    def _strip_spaces(text):
+        ns_chars = []
+        ns_to_s_map = collections.OrderedDict()
+        for (i, c) in enumerate(text):
+            if c == " ":
+                continue
+            ns_to_s_map[len(ns_chars)] = i
+            ns_chars.append(c)
+        ns_text = "".join(ns_chars)
+        return (ns_text, ns_to_s_map)
+    # We first tokenize `orig_text`, strip whitespace from the result
+    # and `pred_text`, and check if they are the same length. If they are
+    # NOT the same length, the heuristic has failed. If they are the same
+    # length, we assume the characters are one-to-one aligned.
+    tokenizer = hub.reader.tokenization.BasicTokenizer(
+        do_lower_case=do_lower_case)
+    tok_text = " ".join(tokenizer.tokenize(orig_text))
+    start_position = tok_text.find(pred_text)
+    if start_position == -1:
+        return orig_text
+    end_position = start_position + len(pred_text) - 1
+    (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text)
+    (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text)
+    if len(orig_ns_text) != len(tok_ns_text):
+        return orig_text
+    # We then project the characters in `pred_text` back to `orig_text` using
+    # the character-to-character alignment.
+    tok_s_to_ns_map = {}
+    for (i, tok_index) in six.iteritems(tok_ns_to_s_map):
+        tok_s_to_ns_map[tok_index] = i
+    orig_start_position = None
+    if start_position in tok_s_to_ns_map:
+        ns_start_position = tok_s_to_ns_map[start_position]
+        if ns_start_position in orig_ns_to_s_map:
+            orig_start_position = orig_ns_to_s_map[ns_start_position]
+    if orig_start_position is None:
+        return orig_text
+    orig_end_position = None
+    if end_position in tok_s_to_ns_map:
+        ns_end_position = tok_s_to_ns_map[end_position]
+        if ns_end_position in orig_ns_to_s_map:
+            orig_end_position = orig_ns_to_s_map[ns_end_position]
+    if orig_end_position is None:
+        return orig_text
+    output_text = orig_text[orig_start_position:(orig_end_position + 1)]
+    return output_text
+def get_best_indexes(logits, n_best_size):
+    """Get the n-best logits from a list."""
+    index_and_score = sorted(
+        enumerate(logits), key=lambda x: x[1], reverse=True)
+    best_indexes = []
+    for i in range(len(index_and_score)):
+        if i >= n_best_size:
+            break
+        best_indexes.append(index_and_score[i][0])
+    return best_indexes
+def compute_softmax(scores):
+    """Compute softmax probability over raw logits."""
+    if not scores:
+        return []
+    max_score = None
+    for score in scores:
+        if max_score is None or score > max_score:
+            max_score = score
+    exp_scores = []
+    total_sum = 0.0
+    for score in scores:
+        x = math.exp(score - max_score)
+        exp_scores.append(x)
+        total_sum += x
+    probs = []
+    for score in exp_scores:
+        probs.append(score / total_sum)
+    return probs
+if __name__ == '__main__':
+    # Load Paddlehub bert_uncased_L-12_H-768_A-12 pretrained model
+    module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+    # module = hub.Module(module_dir=["./bert_uncased_L-12_H-768_A-12.hub_module"])
+    inputs, outputs, program = module.context(
+        trainable=True, max_seq_len=args.max_seq_len)
+    # Download dataset and use ReadingComprehensionReader to read dataset
+    dataset = hub.dataset.SQUAD(
+        version_2_with_negative=args.version_2_with_negative)
+    reader = hub.reader.ReadingComprehensionReader(
+        dataset=dataset,
+        vocab_path=module.get_vocab_path(),
+        max_seq_length=args.max_seq_len,
+        doc_stride=128,
+        max_query_length=64)
+    # Use "sequence_output" for token-level output.
+    seq_output = outputs["sequence_output"]
+    # Setup feed list for data feeder
+    feed_list = [
+        inputs["input_ids"].name,
+        inputs["position_ids"].name,
+        inputs["segment_ids"].name,
+        inputs["input_mask"].name,
+    ]
+    # Select finetune strategy, setup config and finetune
+    strategy = hub.AdamWeightDecayStrategy(
+        weight_decay=args.weight_decay,
+        learning_rate=args.learning_rate,
+        warmup_proportion=args.warmup_proportion,
+        lr_scheduler="linear_decay")
+    # Setup runing config for PaddleHub Finetune API
+    config = hub.RunConfig(
+        log_interval=10,
+        use_pyreader=args.use_pyreader,
+        use_data_parallel=args.use_data_parallel,
+        save_ckpt_interval=100,
+        use_cuda=args.use_gpu,
+        num_epoch=args.num_epoch,
+        batch_size=args.batch_size,
+        checkpoint_dir=args.checkpoint_dir,
+        enable_memory_optim=True,
+        strategy=strategy)
+    # Define a reading comprehension finetune task by PaddleHub's API
+    reading_comprehension_task = hub.ReadingComprehensionTask(
+        data_reader=reader,
+        feature=seq_output,
+        feed_list=feed_list,
+        config=config)
+    # Data to be predicted
+    data = dataset.predict_examples
+    features = reader.convert_examples_to_features(
+        examples=data, is_training=False)
+    run_states = reading_comprehension_task.predict(data=data)
+    results = [run_state.run_results for run_state in run_states]
+    RawResult = collections.namedtuple(
+        "RawResult", ["unique_id", "start_logits", "end_logits"])
+    all_results = []
+    for batch_idx, batch_result in enumerate(results):
+        np_unique_ids = batch_result[0]
+        np_start_logits = batch_result[1]
+        np_end_logits = batch_result[2]
+        np_num_seqs = batch_result[3]
+        for idx in range(np_unique_ids.shape[0]):
+            unique_id = int(np_unique_ids[idx])
+            start_logits = [float(x) for x in np_start_logits[idx].flat]
+            end_logits = [float(x) for x in np_end_logits[idx].flat]
+            all_results.append(
+                RawResult(
+                    unique_id=unique_id,
+                    start_logits=start_logits,
+                    end_logits=end_logits))
+    output_prediction_file = os.path.join(args.result_dir, "predictions.json")
+    output_nbest_file = os.path.join(args.result_dir, "nbest_predictions.json")
+    output_null_log_odds_file = os.path.join(args.result_dir, "null_odds.json")
+    write_predictions(
+        data,
+        features,
+        all_results,
+        output_prediction_file,
+        output_nbest_file,
+        output_null_log_odds_file,
+        max_answer_length=args.max_answer_length,
+        n_best_size=args.n_best_size,
+        version_2_with_negative=args.version_2_with_negative,
+        null_score_diff_threshold=args.null_score_diff_threshold)
+    with io.open(dataset.predict_file, 'r', encoding="utf8") as dataset_file:
+        dataset_json = json.load(dataset_file)
+        dataset = dataset_json['data']
+    with io.open(
+            output_prediction_file, 'r', encoding="utf8") as prediction_file:
+        predictions = json.load(prediction_file)
+    if not args.version_2_with_negative:
+        print(json.dumps(evaluate_v1.evaluate(dataset, predictions)))
+    else:
+        with io.open(
+                output_null_log_odds_file, 'r', encoding="utf8") as odds_file:
+            na_probs = json.load(odds_file)
+        # Maps qid to true/false
+        qid_to_has_ans = evaluate_v2.make_qid_to_has_ans(dataset)
+        has_ans_qids = [k for k, v in qid_to_has_ans.items() if v]
+        no_ans_qids = [k for k, v in qid_to_has_ans.items() if not v]
+        exact_raw, f1_raw = evaluate_v2.get_raw_scores(dataset, predictions)
+        exact_thresh = evaluate_v2.apply_no_ans_threshold(
+            exact_raw, na_probs, qid_to_has_ans, na_prob_thresh=1.0)
+        f1_thresh = evaluate_v2.apply_no_ans_threshold(
+            f1_raw, na_probs, qid_to_has_ans, na_prob_thresh=1.0)
+        out_eval = evaluate_v2.make_eval_dict(exact_thresh, f1_thresh)
+        if has_ans_qids:
+            has_ans_eval = evaluate_v2.make_eval_dict(
+                exact_thresh, f1_thresh, qid_list=has_ans_qids)
+            evaluate_v2.merge_eval(out_eval, has_ans_eval, 'HasAns')
+        if no_ans_qids:
+            no_ans_eval = evaluate_v2.make_eval_dict(
+                exact_thresh, f1_thresh, qid_list=no_ans_qids)
+            evaluate_v2.merge_eval(out_eval, no_ans_eval, 'NoAns')
+        evaluate_v2.find_all_best_thresh(out_eval, predictions, exact_raw,
+                                         f1_raw, na_probs, qid_to_has_ans)
+        print(json.dumps(out_eval, indent=4))
--- a/demo/reading-comprehension/reading_comprehension.py
+++ b/demo/reading-comprehension/reading_comprehension.py
+#coding:utf-8
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Finetuning on classification task """
+import argparse
+import ast
+import paddle.fluid as fluid
+import paddlehub as hub
+hub.common.logger.logger.setLevel("INFO")
+# yapf: disable
+parser = argparse.ArgumentParser(__doc__)
+parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
+parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False")
+parser.add_argument("--learning_rate", type=float, default=3e-5, help="Learning rate used to train with warmup.")
+parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
+parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
+parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
+parser.add_argument("--max_seq_len", type=int, default=384, help="Number of words of the longest seqence.")
+parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.")
+parser.add_argument("--use_pyreader", type=ast.literal_eval, default=True, help="Whether use pyreader to feed data.")
+parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=True, help="Whether use data parallel.")
+parser.add_argument("--version_2_with_negative", type=ast.literal_eval, default=False, help="If true, the SQuAD examples contain some that do not have an answer. If using squad v2.0, it should be set true.")
+args = parser.parse_args()
+# yapf: enable.
+if __name__ == '__main__':
+    # Load Paddlehub bert_uncased_L-12_H-768_A-12 pretrained model
+    module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+    inputs, outputs, program = module.context(
+        trainable=True, max_seq_len=args.max_seq_len)
+    # Download dataset and use ReadingComprehensionReader to read dataset
+    dataset = hub.dataset.SQUAD(
+        version_2_with_negative=args.version_2_with_negative)
+    reader = hub.reader.ReadingComprehensionReader(
+        dataset=dataset,
+        vocab_path=module.get_vocab_path(),
+        max_seq_length=args.max_seq_len,
+        doc_stride=128,
+        max_query_length=64)
+    seq_output = outputs["sequence_output"]
+    # Setup feed list for data feeder
+    feed_list = [
+        inputs["input_ids"].name,
+        inputs["position_ids"].name,
+        inputs["segment_ids"].name,
+        inputs["input_mask"].name,
+    ]
+    # Select finetune strategy, setup config and finetune
+    strategy = hub.AdamWeightDecayStrategy(
+        weight_decay=args.weight_decay,
+        learning_rate=args.learning_rate,
+        warmup_proportion=args.warmup_proportion,
+        lr_scheduler="linear_decay")
+    # Setup runing config for PaddleHub Finetune API
+    config = hub.RunConfig(
+        log_interval=10,
+        use_pyreader=args.use_pyreader,
+        use_data_parallel=args.use_data_parallel,
+        save_ckpt_interval=1000,
+        use_cuda=args.use_gpu,
+        num_epoch=args.num_epoch,
+        batch_size=args.batch_size,
+        checkpoint_dir=args.checkpoint_dir,
+        enable_memory_optim=True,
+        strategy=strategy)
+    # Define a reading comprehension finetune task by PaddleHub's API
+    reading_comprehension_task = hub.ReadingComprehensionTask(
+        data_reader=reader,
+        feature=seq_output,
+        feed_list=feed_list,
+        config=config)
+    # Finetune by PaddleHub's API
+    reading_comprehension_task.finetune()
--- a/demo/reading-comprehension/run_finetune.sh
+++ b/demo/reading-comprehension/run_finetune.sh
+export FLAGS_eager_delete_tensor_gb=0.0
+export CUDA_VISIBLE_DEVICES=0,1
+python -u reading_comprehension.py \
+                   --batch_size=12 \
+                   --use_gpu=True \
+                   --checkpoint_dir="./ckpt_rc" \
+                   --learning_rate=3e-5 \
+                   --weight_decay=0.01 \
+                   --warmup_proportion=0.1 \
+                   --num_epoch=2 \
+                   --max_seq_len=384 \
+                   --use_pyreader=True \
+                   --use_data_parallel=True \
+                   --version_2_with_negative=False
--- a/demo/reading-comprehension/run_predict.sh
+++ b/demo/reading-comprehension/run_predict.sh
+export FLAGS_eager_delete_tensor_gb=0.0
+export CUDA_VISIBLE_DEVICES=0
+CKPT_DIR="./ckpt_rc"
+RES_DIR="./result"
+mkdir $RES_DIR
+python -u  predict.py \
+                   --batch_size=12 \
+                   --use_gpu=True \
+                   --checkpoint_dir=${CKPT_DIR} \
+                   --learning_rate=3e-5 \
+                   --weight_decay=0.01 \
+                   --warmup_proportion=0.1 \
+                   --num_epoch=1 \
+                   --max_seq_len=384 \
+                   --use_pyreader=False \
+                   --use_data_parallel=False \
+                   --version_2_with_negative=False \
+                   --result_dir=${RES_DIR}
--- a/demo/regression/predict.py
+++ b/demo/regression/predict.py
+#coding:utf-8
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Finetuning on classification task """
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import argparse
+import ast
+import numpy as np
+import os
+import time
+import paddle
+import paddle.fluid as fluid
+import paddlehub as hub
+# yapf: disable
+parser = argparse.ArgumentParser(__doc__)
+parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
+parser.add_argument("--batch_size",     type=int,   default=1, help="Total examples' number in batch for training.")
+parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
+parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
+parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
+parser.add_argument("--dataset", type=str, default="STS-B", help="Directory to model checkpoint")
+args = parser.parse_args()
+# yapf: enable.
+if __name__ == '__main__':
+    dataset = None
+    metrics_choices = []
+    # Download dataset and use ClassifyReader to read dataset
+    if args.dataset.lower() == "sts-b":
+        dataset = hub.dataset.GLUE("STS-B")
+        module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
+    else:
+        raise ValueError("%s dataset is not defined" % args.dataset)
+    support_metrics = ["acc", "f1", "matthews"]
+    for metric in metrics_choices:
+        if metric not in support_metrics:
+            raise ValueError("\"%s\" metric is not defined" % metric)
+    inputs, outputs, program = module.context(
+        trainable=True, max_seq_len=args.max_seq_len)
+    reader = hub.reader.RegressionReader(
+        dataset=dataset,
+        vocab_path=module.get_vocab_path(),
+        max_seq_len=args.max_seq_len)
+    # Construct transfer learning network
+    # Use "pooled_output" for classification tasks on an entire sentence.
+    # Use "sequence_output" for token-level output.
+    pooled_output = outputs["pooled_output"]
+    # Setup feed list for data feeder
+    # Must feed all the tensor of ERNIE's module need
+    feed_list = [
+        inputs["input_ids"].name,
+        inputs["position_ids"].name,
+        inputs["segment_ids"].name,
+        inputs["input_mask"].name,
+    ]
+    # Setup runing config for PaddleHub Finetune API
+    config = hub.RunConfig(
+        use_data_parallel=False,
+        use_pyreader=args.use_pyreader,
+        use_cuda=args.use_gpu,
+        batch_size=args.batch_size,
+        enable_memory_optim=False,
+        checkpoint_dir=args.checkpoint_dir,
+        strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
+    # Define a regression finetune task by PaddleHub's API
+    reg_task = hub.RegressionTask(
+        data_reader=reader,
+        feature=pooled_output,
+        feed_list=feed_list,
+        config=config)
+    # Data to be prdicted
+    data = [[d.text_a, d.text_b] for d in dataset.get_predict_examples()]
+    index = 0
+    run_states = reg_task.predict(data=data)
+    results = [run_state.run_results for run_state in run_states]
+    if not os.path.exists("output"):
+        os.makedirs("output")
+    fout = open(os.path.join("output", "%s.tsv" % args.dataset.upper()), 'w')
+    fout.write("index\tprediction")
+    for batch_result in results:
+        for result in batch_result[0]:
+            if index < 3:
+                print("%s\t%s\tpredict=%.3f" % (data[index][0], data[index][1],
+                                                result[0]))
+            fout.write("\n%s\t%.3f" % (index, result[0]))
+            index += 1
+    fout.close()
--- a/demo/regression/regression.py
+++ b/demo/regression/regression.py
+#coding:utf-8
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Finetuning on classification task """
+import argparse
+import ast
+import paddle.fluid as fluid
+import paddlehub as hub
+# yapf: disable
+parser = argparse.ArgumentParser(__doc__)
+parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
+parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
+parser.add_argument("--dataset", type=str, default="STS-B", help="Directory to model checkpoint")
+parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
+parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
+parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
+parser.add_argument("--data_dir", type=str, default=None, help="Path to training data.")
+parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
+parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
+parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
+parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
+parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
+args = parser.parse_args()
+# yapf: enable.
+if __name__ == '__main__':
+    dataset = None
+    # Download dataset and use ClassifyReader to read dataset
+    if args.dataset.lower() == "sts-b":
+        dataset = hub.dataset.GLUE("STS-B")
+        module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+    else:
+        raise ValueError("%s dataset is not defined" % args.dataset)
+    inputs, outputs, program = module.context(
+        trainable=True, max_seq_len=args.max_seq_len)
+    reader = hub.reader.RegressionReader(
+        dataset=dataset,
+        vocab_path=module.get_vocab_path(),
+        max_seq_len=args.max_seq_len)
+    # Construct transfer learning network
+    # Use "pooled_output" for classification tasks on an entire sentence.
+    # Use "sequence_output" for token-level output.
+    pooled_output = outputs["pooled_output"]
+    # Setup feed list for data feeder
+    # Must feed all the tensor of ERNIE's module need
+    feed_list = [
+        inputs["input_ids"].name,
+        inputs["position_ids"].name,
+        inputs["segment_ids"].name,
+        inputs["input_mask"].name,
+    ]
+    # Select finetune strategy, setup config and finetune
+    strategy = hub.AdamWeightDecayStrategy(
+        weight_decay=args.weight_decay,
+        learning_rate=args.learning_rate,
+        lr_scheduler="linear_decay")
+    # Setup runing config for PaddleHub Finetune API
+    config = hub.RunConfig(
+        use_data_parallel=args.use_data_parallel,
+        use_pyreader=args.use_pyreader,
+        use_cuda=args.use_gpu,
+        num_epoch=args.num_epoch,
+        batch_size=args.batch_size,
+        checkpoint_dir=args.checkpoint_dir,
+        strategy=strategy)
+    # Define a regression finetune task by PaddleHub's API
+    reg_task = hub.RegressionTask(
+        data_reader=reader,
+        feature=pooled_output,
+        feed_list=feed_list,
+        config=config)
+    # Finetune and evaluate by PaddleHub's API
+    # will finish training, evaluation, testing, save model automatically
+    reg_task.finetune_and_eval()
--- a/demo/regression/run_predict.sh
+++ b/demo/regression/run_predict.sh
+export FLAGS_eager_delete_tensor_gb=0.0
+# export CUDA_VISIBLE_DEVICES=0
+# User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
+DATASET="STS-B"
+CKPT_DIR="./ckpt_${DATASET}"
+# STS-B: batch_size=32, max_seq_len=128
+python -u predict.py --checkpoint_dir $CKPT_DIR \
+                            --max_seq_len 128 \
+                            --use_gpu True \
+                            --dataset=${DATASET} \
+                            --batch_size=32 \
--- a/demo/regression/run_regssion.sh
+++ b/demo/regression/run_regssion.sh
+export FLAGS_eager_delete_tensor_gb=0.0
+export CUDA_VISIBLE_DEVICES=0
+DATASET="STS-B"
+CKPT_DIR="./ckpt_${DATASET}"
+# Recommending hyper parameters for difference task
+# STS-B: batch_size=32, weight_decay=0.1, num_epoch=3, max_seq_len=128, lr=4e-5
+python -u regression.py \
+                   --batch_size=32 \
+                   --use_gpu=True \
+                   --dataset=${DATASET} \
+                   --checkpoint_dir=${CKPT_DIR} \
+                   --learning_rate=4e-5 \
+                   --weight_decay=0.1 \
+                   --max_seq_len=128 \
+                   --num_epoch=3 \
+                   --use_pyreader=True \
+                   --use_data_parallel=True \
--- a/demo/text-classification/predict.py
+++ b/demo/text-classification/predict.py
@@ -29,7 +29,7 @@ import paddlehub as hub
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
-parser.add_argument("--checkpoint_dir", type=str, default="ckpt_20190802182531", help="Directory to model checkpoint")
+parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
 parser.add_argument("--batch_size",     type=int,   default=1, help="Total examples' number in batch for training.")
 parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
 parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
@@ -42,64 +42,89 @@ args = parser.parse_args()
 if __name__ == '__main__':
    dataset = None
+    metrics_choices = []
    # Download dataset and use ClassifyReader to read dataset
    if args.dataset.lower() == "chnsenticorp":
        dataset = hub.dataset.ChnSentiCorp()
        module = hub.Module(name="ernie")
+        metrics_choices = ["acc"]
    elif args.dataset.lower() == "nlpcc_dbqa":
        dataset = hub.dataset.NLPCC_DBQA()
        module = hub.Module(name="ernie")
+        metrics_choices = ["acc"]
    elif args.dataset.lower() == "lcqmc":
        dataset = hub.dataset.LCQMC()
        module = hub.Module(name="ernie")
+        metrics_choices = ["acc"]
    elif args.dataset.lower() == "mrpc":
        dataset = hub.dataset.GLUE("MRPC")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["f1", "acc"]
+    # The first metric will be choose to eval. Ref: task.py:799
    elif args.dataset.lower() == "qqp":
        dataset = hub.dataset.GLUE("QQP")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["f1", "acc"]
    elif args.dataset.lower() == "sst-2":
        dataset = hub.dataset.GLUE("SST-2")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
    elif args.dataset.lower() == "cola":
        dataset = hub.dataset.GLUE("CoLA")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["matthews", "acc"]
    elif args.dataset.lower() == "qnli":
        dataset = hub.dataset.GLUE("QNLI")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
    elif args.dataset.lower() == "rte":
        dataset = hub.dataset.GLUE("RTE")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
-    elif args.dataset.lower() == "mnli":
+        metrics_choices = ["acc"]
-        dataset = hub.dataset.GLUE("MNLI")
+    elif args.dataset.lower() == "mnli" or args.dataset.lower() == "mnli_m":
+        dataset = hub.dataset.GLUE("MNLI_m")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
+    elif args.dataset.lower() == "mnli_mm":
+        dataset = hub.dataset.GLUE("MNLI_mm")
+        if args.use_taskid:
+            module = hub.Module(name="ernie_v2_eng_base")
+        else:
+            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
    elif args.dataset.lower().startswith("xnli"):
        dataset = hub.dataset.XNLI(language=args.dataset.lower()[-2:])
        module = hub.Module(name="bert_multi_cased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
    else:
        raise ValueError("%s dataset is not defined" % args.dataset)
+    support_metrics = ["acc", "f1", "matthews"]
+    for metric in metrics_choices:
+        if metric not in support_metrics:
+            raise ValueError("\"%s\" metric is not defined" % metric)
    inputs, outputs, program = module.context(
        trainable=True, max_seq_len=args.max_seq_len)
    reader = hub.reader.ClassifyReader(
@@ -147,7 +172,8 @@ if __name__ == '__main__':
        feature=pooled_output,
        feed_list=feed_list,
        num_classes=dataset.num_labels,
-        config=config)
+        config=config,
+        metrics_choices=metrics_choices)
    # Data to be prdicted
    data = [[d.text_a, d.text_b] for d in dataset.get_dev_examples()[:3]]

--- a/demo/text-classification/run_classifier.sh
+++ b/demo/text-classification/run_classifier.sh
@@ -2,18 +2,32 @@ export FLAGS_eager_delete_tensor_gb=0.0
 export CUDA_VISIBLE_DEVICES=0
 # User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
 DATASET="chnsenticorp"
 CKPT_DIR="./ckpt_${DATASET}"
-# Support ChnSentiCorp	NLPCC_DBQA	LCQMC	MRPC	QQP	SST-2
+# Recommending hyper parameters for difference task
-#         CoLA	QNLI	RTE	MNLI	XNLI
+# ChnSentiCorp: batch_size=24, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
-# for XNLI: Specify the language with an underscore like xnli_zh.
+# NLPCC_DBQA: batch_size=8, weight_decay=0.01, num_epoch=3, max_seq_len=512, lr=2e-5
-#       ar: Arabic      bg: Bulgarian      de: German
+# LCQMC: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=2e-5
-#       el: Greek       en: English        es: Spanish
+# QQP: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
-#       fr: French      hi: Hindi          ru: Russian
+# QNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
-#       sw: Swahili     th: Thai           tr: Turkish
+# SST-2: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
-#       ur: Urdu        vi: Vietnamese     zh: Chinese (Simplified)
+# CoLA: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
+# MRPC: batch_size=32, weight_decay=0.01, num_epoch=3, max_seq_len=128, lr=5e-5
+# RTE: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=3e-5
+# MNLI: batch_size=32, weight_decay=0, num_epoch=3, max_seq_len=128, lr=5e-5
+#       Specify the matched/mismatched dev and test dataset  with an underscore.
+#       mnli_m or mnli: dev and test in matched dataset.
+#       mnli_mm: dev and test in mismatched dataset.
+#      The difference can be seen in https://www.nyu.edu/projects/bowman/multinli/paper.pdf.
+#       If you are not sure which one to pick, just use mnli or mnli_m.
+# XNLI: batch_size=32, weight_decay=0, num_epoch=2, max_seq_len=128, lr=5e-5
+#       Specify the language with an underscore like xnli_zh.
+#       ar- Arabic      bg- Bulgarian      de- German
+#       el- Greek       en- English        es- Spanish
+#       fr- French      hi- Hindi          ru- Russian
+#       sw- Swahili     th- Thai           tr- Turkish
+#       ur- Urdu        vi- Vietnamese     zh- Chinese (Simplified)
 python -u text_classifier.py \
                   --batch_size=24 \

--- a/demo/text-classification/run_predict.sh
+++ b/demo/text-classification/run_predict.sh
@@ -2,17 +2,20 @@ export FLAGS_eager_delete_tensor_gb=0.0
 export CUDA_VISIBLE_DEVICES=0
 # User can select chnsenticorp, nlpcc_dbqa, lcqmc and so on for different task
-DATASET="chnsenticorp"
-CKPT_DIR="./ckpt_${DATASET}"
 # Support ChnSentiCorp	NLPCC_DBQA	LCQMC	MRPC	QQP	SST-2
-#         CoLA	QNLI	RTE	MNLI	XNLI
+#         CoLA	QNLI	RTE	MNLI (or MNLI_m) 	MNLI_mm）	XNLI
 # for XNLI: Specify the language with an underscore like xnli_zh.
 #       ar: Arabic      bg: Bulgarian      de: German
 #       el: Greek       en: English        es: Spanish
 #       fr: French      hi: Hindi          ru: Russian
 #       sw: Swahili     th: Thai           tr: Turkish
 #       ur: Urdu        vi: Vietnamese     zh: Chinese (Simplified)
+DATASET="ChnSentiCorp"
+CKPT_DIR="./ckpt_${DATASET}"
-python -u predict.py --checkpoint_dir $CKPT_DIR --max_seq_len 128 --use_gpu False --dataset=${DATASET} --use_taskid False
+python -u predict.py --checkpoint_dir=$CKPT_DIR \
+                            --max_seq_len=128 \
+                            --use_gpu=True \
+                            --dataset=${DATASET} \
+                            --batch_size=150 \
+                            --use_taskid=False \
--- a/demo/text-classification/text_classifier.py
+++ b/demo/text-classification/text_classifier.py
@@ -26,7 +26,7 @@ parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whet
 parser.add_argument("--dataset", type=str, default="chnsenticorp", help="The choice of dataset")
 parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
 parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
-parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy")
+parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
 parser.add_argument("--data_dir", type=str, default=None, help="Path to training data.")
 parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
 parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
@@ -39,64 +39,89 @@ args = parser.parse_args()
 if __name__ == '__main__':
    dataset = None
+    metrics_choices = []
    # Download dataset and use ClassifyReader to read dataset
    if args.dataset.lower() == "chnsenticorp":
        dataset = hub.dataset.ChnSentiCorp()
        module = hub.Module(name="ernie")
+        metrics_choices = ["acc"]
    elif args.dataset.lower() == "nlpcc_dbqa":
        dataset = hub.dataset.NLPCC_DBQA()
        module = hub.Module(name="ernie")
+        metrics_choices = ["acc"]
    elif args.dataset.lower() == "lcqmc":
        dataset = hub.dataset.LCQMC()
        module = hub.Module(name="ernie")
+        metrics_choices = ["acc"]
    elif args.dataset.lower() == "mrpc":
        dataset = hub.dataset.GLUE("MRPC")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["f1", "acc"]
+    # The first metric will be choose to eval. Ref: task.py:799
    elif args.dataset.lower() == "qqp":
        dataset = hub.dataset.GLUE("QQP")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["f1", "acc"]
    elif args.dataset.lower() == "sst-2":
        dataset = hub.dataset.GLUE("SST-2")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
    elif args.dataset.lower() == "cola":
        dataset = hub.dataset.GLUE("CoLA")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["matthews", "acc"]
    elif args.dataset.lower() == "qnli":
        dataset = hub.dataset.GLUE("QNLI")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
    elif args.dataset.lower() == "rte":
        dataset = hub.dataset.GLUE("RTE")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
-    elif args.dataset.lower() == "mnli":
+        metrics_choices = ["acc"]
-        dataset = hub.dataset.GLUE("MNLI")
+    elif args.dataset.lower() == "mnli" or args.dataset.lower() == "mnli":
+        dataset = hub.dataset.GLUE("MNLI_m")
        if args.use_taskid:
            module = hub.Module(name="ernie_v2_eng_base")
        else:
            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
+    elif args.dataset.lower() == "mnli_mm":
+        dataset = hub.dataset.GLUE("MNLI_mm")
+        if args.use_taskid:
+            module = hub.Module(name="ernie_v2_eng_base")
+        else:
+            module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
    elif args.dataset.lower().startswith("xnli"):
        dataset = hub.dataset.XNLI(language=args.dataset.lower()[-2:])
        module = hub.Module(name="bert_multi_cased_L-12_H-768_A-12")
+        metrics_choices = ["acc"]
    else:
        raise ValueError("%s dataset is not defined" % args.dataset)
+    support_metrics = ["acc", "f1", "matthews"]
+    for metric in metrics_choices:
+        if metric not in support_metrics:
+            raise ValueError("\"%s\" metric is not defined" % metric)
    inputs, outputs, program = module.context(
        trainable=True, max_seq_len=args.max_seq_len)
    reader = hub.reader.ClassifyReader(
@@ -144,7 +169,8 @@ if __name__ == '__main__':
        feature=pooled_output,
        feed_list=feed_list,
        num_classes=dataset.num_labels,
-        config=config)
+        config=config,
+        metrics_choices=metrics_choices)
    # Finetune and evaluate by PaddleHub's API
    # will finish training, evaluation, testing, save model automatically

--- a/paddlehub/__init__.py
+++ b/paddlehub/__init__.py
@@ -50,7 +50,12 @@ from .finetune.task import TextClassifierTask
 from .finetune.task import ImageClassifierTask
 from .finetune.task import SequenceLabelTask
 from .finetune.task import MultiLabelClassifierTask
+from .finetune.task import RegressionTask
+from .finetune.task import ReadingComprehensionTask
 from .finetune.config import RunConfig
 from .finetune.strategy import AdamWeightDecayStrategy
 from .finetune.strategy import DefaultStrategy
 from .finetune.strategy import DefaultFinetuneStrategy
+from .finetune.strategy import L2SPFinetuneStrategy
+from .finetune.strategy import ULMFiTStrategy
+from .finetune.strategy import CombinedStrategy
--- a/paddlehub/autofinetune/__init__.py
+++ b/paddlehub/autofinetune/__init__.py
+# coding:utf-8
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/paddlehub/autofinetune/autoft.py
+++ b/paddlehub/autofinetune/autoft.py
+# coding:utf-8
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from multiprocessing.pool import ThreadPool
+import copy
+import json
+import math
+import numpy as np
+import six
+import time
+from paddlehub.common.logger import logger
+from paddlehub.common.utils import mkdir
+if six.PY3:
+    INF = math.inf
+else:
+    INF = float("inf")
+class PSHE2(object):
+    def __init__(
+            self,
+            evaluator,
+            cudas=["0"],
+            popsize=5,
+            output_dir=None,
+            alpha=0.5,
+            epsilon=0.2,
+    ):
+        self._num_thread = len(cudas)
+        self._popsize = popsize
+        self._alpha = alpha
+        self._epsilon = epsilon
+        self._iteration = 0
+        self.cudas = cudas
+        self.is_cuda_free = {"free": [], "busy": []}
+        self.is_cuda_free["free"] = cudas
+        self.evaluator = evaluator
+        self.init_input = evaluator.get_init_params()
+        self.num_hparm = len(self.init_input)
+        self.best_hparams_per_pop = [[0] * self.num_hparm] * self._popsize
+        self.best_reward_per_pop = [INF] * self._popsize
+        self.momentums = [[0] * self.num_hparm] * self._popsize
+        self.best_hparms_all_pop = []
+        self.best_reward_all_pop = INF
+        self.current_hparams = [[0] * self.num_hparm] * self._popsize
+        for i in range(self.popsize):
+            self.current_hparams[i] = self.randomSolution()
+        if output_dir is None:
+            now = int(time.time())
+            time_str = time.strftime("%Y%m%d%H%M%S", time.localtime(now))
+            self._output_dir = "output_" + time_str
+        else:
+            self._output_dir = output_dir
+    @property
+    def thread(self):
+        return self._num_thread
+    @property
+    def popsize(self):
+        return self._popsize
+    @property
+    def alpha(self):
+        return self._alpha
+    @property
+    def epsilon(self):
+        return self._epsilon
+    @property
+    def output_dir(self):
+        return self._output_dir
+    @property
+    def iteration(self):
+        return self._iteration
+    def set_output_dir(self, output_dir=None):
+        if output_dir is not None:
+            output_dir = output_dir
+        else:
+            output_dir = self._output_dir
+        return output_dir
+    def randomSolution(self):
+        solut = [0] * self.num_hparm
+        for i in range(self.num_hparm):
+            ratio = (np.random.random_sample() - 0.5) * 2.0
+            if ratio >= 0:
+                solut[i] = (
+                    1.0 - self.init_input[i]) * ratio + self.init_input[i]
+            else:
+                solut[i] = (
+                    self.init_input[i] + 1.0) * ratio + self.init_input[i]
+        return solut
+    def smallPeturb(self):
+        for i in range(self.popsize):
+            for j in range(self.num_hparm):
+                ratio = (np.random.random_sample() - 0.5) * 2.0
+                if ratio >= 0:
+                    self.current_hparams[i][j] = (
+                        1.0 - self.current_hparams[i][j]
+                    ) * ratio * self.epsilon + self.current_hparams[i][j]
+                else:
+                    self.current_hparams[i][j] = (
+                        self.current_hparams[i][j] +
+                        1.0) * ratio * self.epsilon + self.current_hparams[i][j]
+    def estimatePopGradients(self):
+        gradients = [[0] * self.num_hparm] * self.popsize
+        for i in range(self.popsize):
+            for j in range(self.num_hparm):
+                gradients[i][j] = self.current_hparams[i][
+                    j] - self.best_hparms_all_pop[j]
+        return gradients
+    def estimateLocalGradients(self):
+        gradients = [[0] * self.num_hparm] * self.popsize
+        for i in range(self.popsize):
+            for j in range(self.num_hparm):
+                gradients[i][j] = self.current_hparams[i][
+                    j] - self.best_hparams_per_pop[i][j]
+        return gradients
+    def estimateMomemtum(self):
+        popGrads = self.estimatePopGradients()
+        localGrads = self.estimateLocalGradients()
+        for i in range(self.popsize):
+            for j in range(self.num_hparm):
+                self.momentums[i][j] = (
+                    1 - 3.0 * self.alpha / self.iteration
+                ) * self.momentums[i][j] - self.alpha * localGrads[i][
+                    j] - self.alpha * popGrads[i][j]
+    def is_stop(self):
+        return False
+    def solutions(self):
+        return self.current_hparams
+    def feedback(self, params_list, reward_list):
+        self._iteration = self._iteration + 1
+        for i in range(self.popsize):
+            if reward_list[i] < self.best_reward_per_pop[i]:
+                self.best_hparams_per_pop[i] = copy.deepcopy(
+                    self.current_hparams[i])
+                self.best_reward_per_pop[i] = reward_list[i]
+            if reward_list[i] < self.best_reward_all_pop:
+                self.best_hparms_all_pop = self.current_hparams[i]
+                self.best_reward_all_pop = reward_list[i]
+        self.estimateMomemtum()
+        for i in range(self.popsize):
+            for j in range(len(self.init_input)):
+                self.current_hparams[i][j] = self.current_hparams[i][
+                    j] + self.alpha * self.momentums[i][j]
+        self.smallPeturb()
+    def optimal_solution(self):
+        return self.best_hparms_all_pop
+    def step(self, output_dir):
+        solutions = self.solutions()
+        params_cudas_dirs = []
+        solution_results = []
+        cnt = 0
+        solutions_ckptdirs = {}
+        mkdir(output_dir)
+        for idx, solution in enumerate(solutions):
+            cuda = self.is_cuda_free["free"][0]
+            ckptdir = output_dir + "/ckpt-" + str(idx)
+            log_file = output_dir + "/log-" + str(idx) + ".info"
+            params_cudas_dirs.append([solution, cuda, ckptdir, log_file])
+            solutions_ckptdirs[tuple(solution)] = ckptdir
+            self.is_cuda_free["free"].remove(cuda)
+            self.is_cuda_free["busy"].append(cuda)
+            if len(params_cudas_dirs) == self.thread or cnt == int(
+                    self.popsize / self.thread):
+                tp = ThreadPool(len(params_cudas_dirs))
+                solution_results += tp.map(self.evaluator.run,
+                                           params_cudas_dirs)
+                cnt += 1
+                tp.close()
+                tp.join()
+                for param_cuda in params_cudas_dirs:
+                    self.is_cuda_free["free"].append(param_cuda[1])
+                    self.is_cuda_free["busy"].remove(param_cuda[1])
+                params_cudas_dirs = []
+        self.feedback(solutions, solution_results)
+        return solutions_ckptdirs
--- a/paddlehub/autofinetune/evaluator.py
+++ b/paddlehub/autofinetune/evaluator.py
+# coding:utf-8
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import io
+import hashlib
+import math
+import os
+import random
+import six
+import yaml
+from paddlehub.common.logger import logger
+from paddlehub.common.utils import is_windows
+REWARD_SUM = 10000
+if six.PY3:
+    INF = math.inf
+else:
+    INF = float("inf")
+class BaseEvaluator(object):
+    def __init__(self, params_file, finetunee_script):
+        with io.open(params_file, 'r', encoding='utf8') as f:
+            self.params = yaml.safe_load(f)
+        self.finetunee_script = finetunee_script
+    def get_init_params(self):
+        init_params = []
+        for param in self.params["param_list"]:
+            init_params.append(param['init_value'])
+        init_params = self.inverse_convert_params(init_params)
+        return init_params
+    def get_reward(self, result_output):
+        return REWARD_SUM - float(result_output)
+    def is_valid_params(self, params):
+        for i in range(0, len(self.params["param_list"])):
+            if params[i] < float(self.params["param_list"][i]["greater_than"]):
+                return False
+            if params[i] > float(self.params["param_list"][i]["lower_than"]):
+                return False
+        return True
+    def convert_params(self, params):
+        cparams = []
+        for i in range(0, len(self.params["param_list"])):
+            cparams.append(
+                float(self.params["param_list"][i]["greater_than"] +
+                      (params[i] + 1.0) / 2.0 *
+                      (self.params["param_list"][i]["lower_than"] -
+                       self.params["param_list"][i]["greater_than"])))
+            if cparams[i] <= float(
+                    self.params["param_list"][i]["greater_than"]):
+                cparams[i] = float(self.params["param_list"][i]["greater_than"])
+            if cparams[i] >= float(self.params["param_list"][i]["lower_than"]):
+                cparams[i] = float(self.params["param_list"][i]["lower_than"])
+            if self.params["param_list"][i]["type"] == "int":
+                cparams[i] = int(cparams[i])
+        return cparams
+    def inverse_convert_params(self, params):
+        cparams = []
+        for i in range(0, len(self.params["param_list"])):
+            cparams.append(
+                float(
+                    -1.0 + 2.0 *
+                    (params[i] - self.params["param_list"][i]["greater_than"]) /
+                    (self.params["param_list"][i]["lower_than"] -
+                     self.params["param_list"][i]["greater_than"])))
+            if cparams[i] <= -1.0:
+                cparams[i] = -1.0
+            if cparams[i] >= 1.0:
+                cparams[i] = 1.0
+        return cparams
+    def format_params_str(self, params):
+        param_str = "--%s=%s" % (self.params["param_list"][0]["name"],
+                                 params[0])
+        for i in range(1, len(self.params["param_list"])):
+            param_str = "%s --%s=%s" % (
+                param_str, self.params["param_list"][i]["name"], str(params[i]))
+        return param_str
+    def run(self, *args):
+        raise NotImplementedError
+    def new_round(self):
+        pass
+class FullTrailEvaluator(BaseEvaluator):
+    def __init__(self, params_file, finetunee_script):
+        super(FullTrailEvaluator, self).__init__(params_file, finetunee_script)
+    def run(self, *args):
+        params = args[0][0]
+        num_cuda = args[0][1]
+        ckpt_dir = args[0][2]
+        log_file = args[0][3]
+        params = self.convert_params(params)
+        if not self.is_valid_params(params):
+            return REWARD_SUM
+        param_str = self.format_params_str(params)
+        f = open(log_file, "w")
+        f.close()
+        if is_windows():
+            run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
+                    (num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
+        else:
+            run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
+                    (num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
+        try:
+            os.system(run_cmd)
+            with open(log_file, "r") as f:
+                lines = f.readlines()
+                eval_result = lines[-1]
+        except:
+            print(
+                "WARNING: Program which was ran with hyperparameters as %s was crashed!"
+                % param_str.replace("--", ""))
+            eval_result = 0.0
+        reward = self.get_reward(eval_result)
+        self.model_rewards[ckpt_dir] = reward
+        return reward
+class ModelBasedEvaluator(BaseEvaluator):
+    def __init__(self, params_file, finetunee_script):
+        super(ModelBasedEvaluator, self).__init__(params_file, finetunee_script)
+        self.model_rewards = {}
+        self.half_best_model_ckpt = []
+        self.run_count = 0
+    def run(self, *args):
+        params = args[0][0]
+        num_cuda = args[0][1]
+        ckpt_dir = args[0][2]
+        log_file = args[0][3]
+        params = self.convert_params(params)
+        if not self.is_valid_params(params):
+            return REWARD_SUM
+        param_str = self.format_params_str(params)
+        f = open(log_file, "w")
+        f.close()
+        if len(self.half_best_model_ckpt) > 0:
+            model_path = self.half_best_model_ckpt[self.run_count % len(
+                self.half_best_model_ckpt)] + "/best_model"
+            if is_windows():
+                run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --epochs=1 --model_path %s --checkpoint_dir=%s %s >%s 2>&1" % \
+                        (num_cuda, self.finetunee_script, model_path, ckpt_dir, param_str, log_file)
+            else:
+                run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --epochs=1 --model_path %s --checkpoint_dir=%s %s >%s 2>&1" % \
+                        (num_cuda, self.finetunee_script, model_path, ckpt_dir, param_str, log_file)
+        else:
+            if is_windows():
+                run_cmd = "set FLAGS_eager_delete_tensor_gb=0.0&set CUDA_VISIBLE_DEVICES=%s&python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
+                        (num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
+            else:
+                run_cmd = "export FLAGS_eager_delete_tensor_gb=0.0; export CUDA_VISIBLE_DEVICES=%s; python -u %s --checkpoint_dir=%s %s >%s 2>&1" % \
+                        (num_cuda, self.finetunee_script, ckpt_dir, param_str, log_file)
+        self.run_count += 1
+        try:
+            os.system(run_cmd)
+            with open(log_file, "r") as f:
+                lines = f.readlines()
+                eval_result = lines[-1]
+        except:
+            print(
+                "WARNING: Program which was ran with hyperparameters as %s was crashed!"
+                % param_str.replace("--", ""))
+            eval_result = 0.0
+        reward = self.get_reward(eval_result)
+        self.model_rewards[ckpt_dir] = reward
+        return reward
+    def new_round(self):
+        """update self.half_best_model"""
+        half_size = int(len(self.model_rewards) / 2)
+        if half_size < 1:
+            half_size = 1
+        self.half_best_model_ckpt = list({
+            key
+            for key in sorted(
+                self.model_rewards, key=self.model_rewards.get, reverse=False)
+            [:half_size]
+        })
+        self.model_rewards = {}
--- a/paddlehub/commands/__init__.py
+++ b/paddlehub/commands/__init__.py
@@ -25,3 +25,4 @@ from . import help
 from . import clear
 from . import config
 from . import hub
+from . import autofinetune
--- a/paddlehub/commands/autofinetune.py
+++ b/paddlehub/commands/autofinetune.py
+# coding:utf-8
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import argparse
+import io
+import json
+import os
+import sys
+import ast
+import six
+import pandas
+import numpy as np
+from paddlehub.commands.base_command import BaseCommand, ENTRY
+from paddlehub.common.arg_helper import add_argument, print_arguments
+from paddlehub.autofinetune.autoft import PSHE2
+from paddlehub.autofinetune.evaluator import FullTrailEvaluator
+from paddlehub.autofinetune.evaluator import ModelBasedEvaluator
+from paddlehub.common.logger import logger
+import paddlehub as hub
+class AutoFineTuneCommand(BaseCommand):
+    name = "autofinetune"
+    def __init__(self, name):
+        super(AutoFineTuneCommand, self).__init__(name)
+        self.show_in_help = True
+        self.name = name
+        self.description = "Paddlehub helps to finetune a task by searching hyperparameters automatically."
+        self.parser = argparse.ArgumentParser(
+            description=self.__class__.__doc__,
+            prog='%s %s <task to be fintuned in python script>' % (ENTRY,
+                                                                   self.name),
+            usage='%(prog)s',
+            add_help=False)
+        self.module = None
+    def add_params_file_arg(self):
+        self.arg_params_to_be_searched_group.add_argument(
+            "--param_file",
+            type=str,
+            default=None,
+            required=True,
+            help=
+            "Hyperparameters to be searched in the yaml format. The number of hyperparameters searched must be greater than 1."
+        )
+    def add_autoft_config_arg(self):
+        self.arg_config_group.add_argument(
+            "--popsize", type=int, default=5, help="Population size")
+        self.arg_config_group.add_argument(
+            "--cuda",
+            type=ast.literal_eval,
+            default=['0'],
+            help="The list of gpu devices to be used")
+        self.arg_config_group.add_argument(
+            "--round", type=int, default=10, help="Number of searches")
+        self.arg_config_group.add_argument(
+            "--output_dir",
+            type=str,
+            default=None,
+            help="Directory to model checkpoint")
+        self.arg_config_group.add_argument(
+            "--evaluate_choice",
+            type=str,
+            default="fulltrail",
+            help="Choices: fulltrail or modelbased.")
+    def execute(self, argv):
+        if not argv:
+            print("ERROR: Please specify a script to be finetuned in python.\n")
+            self.help()
+            return False
+        self.fintunee_script = argv[0]
+        self.parser.prog = '%s %s %s' % (ENTRY, self.name, self.fintunee_script)
+        self.arg_params_to_be_searched_group = self.parser.add_argument_group(
+            title="Input options",
+            description="Hyperparameters to be searched.")
+        self.arg_config_group = self.parser.add_argument_group(
+            title="Autofinetune config options",
+            description=
+            "Autofintune configuration for controlling autofinetune behavior, not required"
+        )
+        self.add_params_file_arg()
+        self.add_autoft_config_arg()
+        if not argv[1:]:
+            self.help()
+            return False
+        self.args = self.parser.parse_args(argv[1:])
+        if self.args.evaluate_choice.lower() == "fulltrail":
+            evaluator = FullTrailEvaluator(self.args.param_file,
+                                           self.fintunee_script)
+        elif self.args.evaluate_choice.lower() == "modelbased":
+            evaluator = ModelBasedEvaluator(self.args.param_file,
+                                            self.fintunee_script)
+        else:
+            raise ValueError(
+                "The evaluate %s is not defined!" % self.args.evaluate_choice)
+        autoft = PSHE2(
+            evaluator,
+            cudas=self.args.cuda,
+            popsize=self.args.popsize,
+            output_dir=self.args.output_dir)
+        run_round_cnt = 0
+        solutions_ckptdirs = {}
+        print("PaddleHub Autofinetune starts.")
+        while (not autoft.is_stop()) and run_round_cnt < self.args.round:
+            print("PaddleHub Autofinetune starts round at %s." % run_round_cnt)
+            output_dir = autoft._output_dir + "/round" + str(run_round_cnt)
+            res = autoft.step(output_dir)
+            solutions_ckptdirs.update(res)
+            evaluator.new_round()
+            run_round_cnt = run_round_cnt + 1
+        print("PaddleHub Autofinetune ends.")
+        with open("./log_file.txt", "w") as f:
+            best_choice = evaluator.convert_params(autoft.optimal_solution())
+            print("The best hyperparameters:")
+            f.write("The best hyperparameters:\n")
+            param_name = []
+            for idx, param in enumerate(evaluator.params["param_list"]):
+                param_name.append(param["name"])
+                f.write(param["name"] + "\t:\t" + str(best_choice[idx]) + "\n")
+                print("%s : %s" % (param["name"], best_choice[idx]))
+            f.write("\n\n\n")
+            f.write("\t".join(param_name) + "\toutput_dir\n\n")
+            logger.info(
+                "The checkpont directory of programs ran with paramemters searched are saved as log_file.txt ."
+            )
+            print(
+                "The checkpont directory of programs ran with paramemters searched are saved as log_file.txt ."
+            )
+            for solution, ckptdir in solutions_ckptdirs.items():
+                param = evaluator.convert_params(solution)
+                param = [str(p) for p in param]
+                f.write("\t".join(param) + "\t" + ckptdir + "\n\n")
+        return True
+command = AutoFineTuneCommand.instance()
--- a/paddlehub/commands/install.py
+++ b/paddlehub/commands/install.py
@@ -36,7 +36,6 @@ class InstallCommand(BaseCommand):
            prog='%s %s <module_name>' % (ENTRY, name),
            usage='%(prog)s',
            add_help=False)
-        #TODO(wuzewu): add --upgrade option
    def execute(self, argv):
        if not argv:

--- a/paddlehub/common/dir.py
+++ b/paddlehub/common/dir.py
@@ -15,7 +15,6 @@
 import os
-# TODO: Change dir.py's filename, this naming rule is not qualified
 USER_HOME = os.path.expanduser('~')
 HUB_HOME = os.path.join(USER_HOME, ".paddlehub")
 MODULE_HOME = os.path.join(HUB_HOME, "modules")

--- a/paddlehub/common/downloader.py
+++ b/paddlehub/common/downloader.py
@@ -77,7 +77,6 @@ class Downloader(object):
                with open(file_name, 'wb') as f:
                    shutil.copyfileobj(r.raw, f)
            else:
-                #TODO(ZeyuChen) upgrade to tqdm process
                with open(file_name, 'wb') as f:
                    dl = 0
                    total_length = int(total_length)

--- a/paddlehub/common/hub_server.py
+++ b/paddlehub/common/hub_server.py
@@ -24,6 +24,7 @@ import requests
 import json
 import yaml
 import random
+import fcntl
 from random import randint
 from paddlehub.common import utils, srv_utils
@@ -38,6 +39,9 @@ CACHE_TIME = 60 * 10
 class HubServer(object):
    def __init__(self, config_file_path=None):
+        LOCK_FILE = os.path.join(hub.HUB_HOME, '__LOCK__')
+        LOCK_FP = open(LOCK_FILE, 'a+')
+        fcntl.flock(LOCK_FP.fileno(), fcntl.LOCK_EX)
        if not config_file_path:
            config_file_path = os.path.join(hub.CONF_HOME, 'config.json')
        if not os.path.exists(hub.CONF_HOME):
@@ -53,6 +57,7 @@ class HubServer(object):
        self.server_url = self.config['server_url']
        self.request()
        self._load_resource_list_file_if_valid()
+        LOCK_FP.close()
    def get_server_url(self):
        random.seed(int(time.time()))
@@ -178,7 +183,6 @@ class HubServer(object):
            self.resource_list_file['version'][index]
            for index in resource_index_list
        ]
-        #TODO(wuzewu): version sort method
        resource_version_list = sorted(resource_version_list)
        if not version:
            if not resource_version_list:

--- a/paddlehub/common/paddle_helper.py
+++ b/paddlehub/common/paddle_helper.py
@@ -83,7 +83,6 @@ def from_param_to_module_attr(param, module_attr):
                              module_attr.map.data['trainable'])
    from_pyobj_to_module_attr(param.do_model_average,
                              module_attr.map.data['do_model_average'])
-    #TODO(wuzewu): don't save learning rate
    from_pyobj_to_module_attr(param.optimize_attr,
                              module_attr.map.data['optimize_attr'])
    from_pyobj_to_module_attr(

--- a/paddlehub/common/utils.py
+++ b/paddlehub/common/utils.py
@@ -117,7 +117,6 @@ def get_pykey(key, keyed_type):
    return str(key)
-#TODO(wuzewu): solving the problem of circular references
 def from_pyobj_to_module_attr(pyobj, module_attr, obj_filter=None):
    if obj_filter and obj_filter(pyobj):
        return

--- a/paddlehub/dataset/__init__.py
+++ b/paddlehub/dataset/__init__.py
@@ -20,6 +20,7 @@ from .msra_ner import MSRA_NER
 from .nlpcc_dbqa import NLPCC_DBQA
 from .lcqmc import LCQMC
 from .toxic import Toxic
+from .squad import SQUAD
 from .xnli import XNLI
 from .glue import GLUE

--- a/paddlehub/dataset/base_cv_dataset.py
+++ b/paddlehub/dataset/base_cv_dataset.py
@@ -52,36 +52,35 @@ class ImageClassificationDataset(object):
        return dataset_path
    def _parse_data(self, data_path, shuffle=False, phase=None):
-        def _base_reader():
+        data = []
-            data = []
+        with open(data_path, "r") as file:
-            with open(data_path, "r") as file:
+            while True:
-                while True:
+                line = file.readline()
-                    line = file.readline()
+                if not line:
-                    if not line:
+                    break
-                        break
+                line = line.strip()
-                    line = line.strip()
+                items = line.split(" ")
-                    items = line.split(" ")
+                if len(items) > 2:
-                    if len(items) > 2:
+                    image_path = " ".join(items[0:-1])
-                        image_path = " ".join(items[0:-1])
+                else:
-                    else:
+                    image_path = items[0]
-                        image_path = items[0]
+                if not os.path.isabs(image_path):
-                    if not os.path.isabs(image_path):
+                    if self.base_path is not None:
-                        if self.base_path is not None:
+                        image_path = os.path.join(self.base_path, image_path)
-                            image_path = os.path.join(self.base_path,
+                label = items[-1]
-                                                      image_path)
+                data.append((image_path, items[-1]))
-                    label = items[-1]
-                    data.append((image_path, items[-1]))
+        if phase == 'train':
+            self.train_examples = data
-            if phase == 'train':
+        elif phase == 'dev':
-                self.train_examples = data
+            self.dev_examples = data
-            elif phase == 'dev':
+        elif phase == 'test':
-                self.dev_examples = data
+            self.test_examples = data
-            elif phase == 'test':
-                self.test_examples = data
+        if shuffle:
+            np.random.shuffle(data)
-            if shuffle:
-                np.random.shuffle(data)
+        def _base_reader():
            for item in data:
                yield item

--- a/paddlehub/dataset/glue.py
+++ b/paddlehub/dataset/glue.py
@@ -39,11 +39,18 @@ class GLUE(HubDataset):
    def __init__(self, sub_dataset='SST-2'):
        # sub_dataset : CoLA, MNLI, MRPC, QNLI, QQP, RTE, SST-2, STS-B
        if sub_dataset not in [
-                'CoLA', 'MNLI', 'MRPC', 'QNLI', 'QQP', 'RTE', 'SST-2', 'STS-B'
+                'CoLA', 'MNLI', 'MNLI_m', 'MNLI_mm', 'MRPC', 'QNLI', 'QQP',
+                'RTE', 'SST-2', 'STS-B'
        ]:
            raise Exception(
                sub_dataset +
                " is not in GLUE benchmark. Please confirm the data set")
+        self.mismatch = False
+        if sub_dataset == 'MNLI_mm':
+            sub_dataset = 'MNLI'
+            self.mismatch = True
+        elif sub_dataset == 'MNLI_m':
+            sub_dataset = 'MNLI'
        self.sub_dataset = sub_dataset
        self.dataset_dir = os.path.join(DATA_HOME, "glue_data")
@@ -64,9 +71,12 @@ class GLUE(HubDataset):
        self.train_examples = self._read_tsv(self.train_file)
    def _load_dev_examples(self):
-        if self.sub_dataset == 'MNLI':
+        if self.sub_dataset == 'MNLI' and not self.mismatch:
            self.dev_file = os.path.join(self.dataset_dir, self.sub_dataset,
                                         "dev_matched.tsv")
+        elif self.sub_dataset == 'MNLI' and self.mismatch:
+            self.dev_file = os.path.join(self.dataset_dir, self.sub_dataset,
+                                         "dev_mismatched.tsv")
        else:
            self.dev_file = os.path.join(self.dataset_dir, self.sub_dataset,
                                         "dev.tsv")
@@ -76,9 +86,12 @@ class GLUE(HubDataset):
        self.test_examples = []
    def _load_predict_examples(self):
-        if self.sub_dataset == 'MNLI':
+        if self.sub_dataset == 'MNLI' and not self.mismatch:
            self.predict_file = os.path.join(self.dataset_dir, self.sub_dataset,
                                             "test_matched.tsv")
+        elif self.sub_dataset == 'MNLI' and self.mismatch:
+            self.predict_file = os.path.join(self.dataset_dir, self.sub_dataset,
+                                             "test_mismatched.tsv")
        else:
            self.predict_file = os.path.join(self.dataset_dir, self.sub_dataset,
                                             "test.tsv")
@@ -187,7 +200,7 @@ class GLUE(HubDataset):
                    seq_id += 1
                    examples.append(example)
                except:
-                    print("[Discard Incorrect Data] " + "\t".join(line))
+                    logger.info("[Discard Incorrect Data] " + "\t".join(line))
            return examples

--- a/paddlehub/dataset/squad.py
+++ b/paddlehub/dataset/squad.py
+#coding:utf-8
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Run BERT on SQuAD 1.1 and SQuAD 2.0."""
+import json
+import os
+import sys
+from paddlehub.reader import tokenization
+from paddlehub.common.downloader import default_downloader
+from paddlehub.common.dir import DATA_HOME
+from paddlehub.common.logger import logger
+_DATA_URL = "https://bj.bcebos.com/paddlehub-dataset/squad.tar.gz"
+class SquadExample(object):
+    """A single training/test example for simple sequence classification.
+     For examples without an answer, the start and end position are -1.
+  """
+    def __init__(self,
+                 qas_id,
+                 question_text,
+                 doc_tokens,
+                 orig_answer_text=None,
+                 start_position=None,
+                 end_position=None,
+                 is_impossible=False):
+        self.qas_id = qas_id
+        self.question_text = question_text
+        self.doc_tokens = doc_tokens
+        self.orig_answer_text = orig_answer_text
+        self.start_position = start_position
+        self.end_position = end_position
+        self.is_impossible = is_impossible
+    def __str__(self):
+        return self.__repr__()
+    def __repr__(self):
+        s = ""
+        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
+        s += ", question_text: %s" % (tokenization.printable_text(
+            self.question_text))
+        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
+        if self.start_position:
+            s += ", start_position: %d" % (self.start_position)
+        if self.start_position:
+            s += ", end_position: %d" % (self.end_position)
+        if self.start_position:
+            s += ", is_impossible: %r" % (self.is_impossible)
+        return s
+class SQUAD(object):
+    """A single set of features of data."""
+    def __init__(self, version_2_with_negative=False):
+        self.dataset_dir = os.path.join(DATA_HOME, "squad_data")
+        if not os.path.exists(self.dataset_dir):
+            ret, tips, self.dataset_dir = default_downloader.download_file_and_uncompress(
+                url=_DATA_URL, save_path=DATA_HOME, print_progress=True)
+        else:
+            logger.info("Dataset {} already cached.".format(self.dataset_dir))
+        self._load_train_examples(version_2_with_negative, is_training=True)
+        self._load_predict_examples(version_2_with_negative, is_training=False)
+    def _load_train_examples(self,
+                             version_2_with_negative=False,
+                             is_training=True):
+        if not version_2_with_negative:
+            self.train_file = os.path.join(self.dataset_dir, "train-v1.1.json")
+        else:
+            self.train_file = os.path.join(self.dataset_dir, "train-v2.0.json")
+        self.train_examples = self._read_json(self.train_file, is_training,
+                                              version_2_with_negative)
+    def _load_predict_examples(self,
+                               version_2_with_negative=False,
+                               is_training=False):
+        if not version_2_with_negative:
+            self.predict_file = os.path.join(self.dataset_dir, "dev-v1.1.json")
+        else:
+            self.predict_file = os.path.join(self.dataset_dir, "dev-v2.0.json")
+        self.predict_examples = self._read_json(self.predict_file, is_training,
+                                                version_2_with_negative)
+    def get_train_examples(self):
+        return self.train_examples
+    def get_dev_examples(self):
+        return []
+    def get_test_examples(self):
+        return []
+    def _read_json(self, input_file, is_training,
+                   version_2_with_negative=False):
+        """Read a SQuAD json file into a list of SquadExample."""
+        with open(input_file, "r") as reader:
+            input_data = json.load(reader)["data"]
+        def is_whitespace(c):
+            if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(
+                    c) == 0x202F:
+                return True
+            return False
+        examples = []
+        for entry in input_data:
+            for paragraph in entry["paragraphs"]:
+                paragraph_text = paragraph["context"]
+                doc_tokens = []
+                char_to_word_offset = []
+                prev_is_whitespace = True
+                for c in paragraph_text:
+                    if is_whitespace(c):
+                        prev_is_whitespace = True
+                    else:
+                        if prev_is_whitespace:
+                            doc_tokens.append(c)
+                        else:
+                            doc_tokens[-1] += c
+                        prev_is_whitespace = False
+                    char_to_word_offset.append(len(doc_tokens) - 1)
+                for qa in paragraph["qas"]:
+                    qas_id = qa["id"]
+                    question_text = qa["question"]
+                    start_position = None
+                    end_position = None
+                    orig_answer_text = None
+                    is_impossible = False
+                    if is_training:
+                        if version_2_with_negative:
+                            is_impossible = qa["is_impossible"]
+                        if (len(qa["answers"]) != 1) and (not is_impossible):
+                            raise ValueError(
+                                "For training, each question should have exactly 1 answer."
+                            )
+                        if not is_impossible:
+                            answer = qa["answers"][0]
+                            orig_answer_text = answer["text"]
+                            answer_offset = answer["answer_start"]
+                            answer_length = len(orig_answer_text)
+                            start_position = char_to_word_offset[answer_offset]
+                            end_position = char_to_word_offset[
+                                answer_offset + answer_length - 1]
+                            # Only add answers where the text can be exactly recovered from the
+                            # document. If this CAN'T happen it's likely due to weird Unicode
+                            # stuff so we will just skip the example.
+                            #
+                            # Note that this means for training mode, every example is NOT
+                            # guaranteed to be preserved.
+                            actual_text = " ".join(
+                                doc_tokens[start_position:(end_position + 1)])
+                            cleaned_answer_text = " ".join(
+                                tokenization.whitespace_tokenize(
+                                    orig_answer_text))
+                            if actual_text.find(cleaned_answer_text) == -1:
+                                logger.warning(
+                                    "Could not find answer: '%s' vs. '%s'",
+                                    actual_text, cleaned_answer_text)
+                                continue
+                        else:
+                            start_position = -1
+                            end_position = -1
+                            orig_answer_text = ""
+                    example = SquadExample(
+                        qas_id=qas_id,
+                        question_text=question_text,
+                        doc_tokens=doc_tokens,
+                        orig_answer_text=orig_answer_text,
+                        start_position=start_position,
+                        end_position=end_position,
+                        is_impossible=is_impossible)
+                    examples.append(example)
+        return examples
+if __name__ == "__main__":
+    ds = SQUAD(version_2_with_negative=True)
+    examples = ds.get_dev_examples()
+    for index, e in enumerate(examples):
+        if index < 10:
+            print(e)
--- a/paddlehub/dataset/xnli.py
+++ b/paddlehub/dataset/xnli.py
@@ -43,6 +43,7 @@ class XNLI(HubDataset):
                "ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw",
                "th", "tr", "ur", "vi", "zh"
        ]:
            raise Exception(language +
                            "is not in XNLI. Please confirm the language")
        self.language = language

--- a/paddlehub/finetune/checkpoint.proto
+++ b/paddlehub/finetune/checkpoint.proto
@@ -22,4 +22,5 @@ message CheckPoint {
  int64 current_epoch = 1;
  int64 global_step = 2;
  string latest_model_dir = 3;
+  double best_score = 4;
 }
--- a/paddlehub/finetune/checkpoint.py
+++ b/paddlehub/finetune/checkpoint.py
@@ -37,6 +37,7 @@ def load_checkpoint(checkpoint_dir, exe, main_program):
            ckpt.ParseFromString(f.read())
    current_epoch = 1
    global_step = 0
+    best_score = -999
    def if_exist(var):
        return os.path.exists(os.path.join(ckpt.latest_model_dir, var.name))
@@ -45,20 +46,27 @@ def load_checkpoint(checkpoint_dir, exe, main_program):
        fluid.io.load_vars(
            exe, ckpt.latest_model_dir, main_program, predicate=if_exist)
+        # Compatible with older versions without best_score in checkpoint_pb2
+        try:
+            best_score = ckpt.best_score
+        except:
+            best_score = -999
        logger.info("PaddleHub model checkpoint loaded. current_epoch={}, "
-                    "global_step={}".format(ckpt.current_epoch,
+                    "global_step={}, best_score={:.5f}".format(
-                                            ckpt.global_step))
+                        ckpt.current_epoch, ckpt.global_step, best_score))
-        return True, ckpt.current_epoch, ckpt.global_step
+        return True, ckpt.current_epoch, ckpt.global_step, best_score
-    logger.info(
+    logger.info("PaddleHub model checkpoint not found, start from scratch...")
-        "PaddleHub model checkpoint not found, start training from scratch...")
-    return False, current_epoch, global_step
+    return False, current_epoch, global_step, best_score
 def save_checkpoint(checkpoint_dir,
                    current_epoch,
                    global_step,
+                    best_score,
                    exe,
                    main_program=fluid.default_main_program()):
@@ -73,5 +81,6 @@ def save_checkpoint(checkpoint_dir,
    ckpt.current_epoch = current_epoch
    ckpt.global_step = global_step
    ckpt.latest_model_dir = model_saved_dir
+    ckpt.best_score = best_score
    with open(ckpt_meta_path, "wb") as f:
        f.write(ckpt.SerializeToString())
--- a/paddlehub/finetune/checkpoint_pb2.py
+++ b/paddlehub/finetune/checkpoint_pb2.py
-#coding:utf-8
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
 # source: checkpoint.proto
@@ -18,7 +17,7 @@ DESCRIPTOR = _descriptor.FileDescriptor(
    package='paddlehub.task.checkpoint',
    syntax='proto3',
    serialized_pb=_b(
-        '\n\x10\x63heckpoint.proto\x12\x19paddlehub.task.checkpoint\"R\n\nCheckPoint\x12\x15\n\rcurrent_epoch\x18\x01 \x01(\x03\x12\x13\n\x0bglobal_step\x18\x02 \x01(\x03\x12\x18\n\x10latest_model_dir\x18\x03 \x01(\tB\x02H\x03\x62\x06proto3'
+        '\n\x10\x63heckpoint.proto\x12\x19paddlehub.task.checkpoint\"f\n\nCheckPoint\x12\x15\n\rcurrent_epoch\x18\x01 \x01(\x03\x12\x13\n\x0bglobal_step\x18\x02 \x01(\x03\x12\x18\n\x10latest_model_dir\x18\x03 \x01(\t\x12\x12\n\nbest_score\x18\x04 \x01(\x01\x42\x02H\x03\x62\x06proto3'
    ))
 _sym_db.RegisterFileDescriptor(DESCRIPTOR)
@@ -77,6 +76,22 @@ _CHECKPOINT = _descriptor.Descriptor(
            is_extension=False,
            extension_scope=None,
            options=None),
+        _descriptor.FieldDescriptor(
+            name='best_score',
+            full_name='paddlehub.task.checkpoint.CheckPoint.best_score',
+            index=3,
+            number=4,
+            type=1,
+            cpp_type=5,
+            label=1,
+            has_default_value=False,
+            default_value=float(0),
+            message_type=None,
+            enum_type=None,
+            containing_type=None,
+            is_extension=False,
+            extension_scope=None,
+            options=None),
    ],
    extensions=[],
    nested_types=[],
@@ -87,7 +102,7 @@ _CHECKPOINT = _descriptor.Descriptor(
    extension_ranges=[],
    oneofs=[],
    serialized_start=47,
-    serialized_end=129,
+    serialized_end=149,
 )
 DESCRIPTOR.message_types_by_name['CheckPoint'] = _CHECKPOINT

--- a/paddlehub/finetune/evaluate.py
+++ b/paddlehub/finetune/evaluate.py
@@ -128,3 +128,75 @@ def calculate_f1(num_label, num_infer, num_correct):
    else:
        f1 = 2 * precision * recall / (precision + recall)
    return precision, recall, f1
+def calculate_f1_np(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels)
+    tp = np.sum((labels == 1) & (preds == 1))
+    tn = np.sum((labels == 0) & (preds == 0))
+    fp = np.sum((labels == 0) & (preds == 1))
+    fn = np.sum((labels == 1) & (preds == 0))
+    p = tp / (tp + fp) if (tp + fp) else 0
+    r = tp / (tp + fn) if (tp + fn) else 0
+    f1 = (2 * p * r) / (p + r) if p + r else 0
+    return f1
+def matthews_corrcoef(preds, labels):
+    preds = np.array(preds)
+    labels = np.array(labels)
+    tp = np.sum((labels == 1) & (preds == 1))
+    tn = np.sum((labels == 0) & (preds == 0))
+    fp = np.sum((labels == 0) & (preds == 1))
+    fn = np.sum((labels == 1) & (preds == 0))
+    div = (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)
+    mcc = ((tp * tn) - (fp * fn)) / np.sqrt(div) if div else 0
+    return mcc
+def recall_nk(data, n, k, m):
+    '''
+    This metric can be used to evaluate whether the model can find the correct response B for question A
+    Note: Only applies to each question A only has one correct response B1.
+    Parameters
+    ----------
+    data: List. Each element is a tuple, consist of the positive probability of the sample prediction and its label.
+                For each example, the only one true positive sample should be the first tuple.
+    n: int. The number of labels per example.
+        eg: [A,B1,1], [A,B2,0], [A,B3,0]  n=3 as there has 3 labels for example A
+    k: int. If the top k is right, the example will be considered right.
+        eg: [A,B1,1]=0.5, [A,B2,0]=0.8, [A,B3,0]=0.3(Probability of 1)
+           If k=2, the prediction for the example A will be considered correct as 0.5 is the top2 Probability
+           If k=1, the prediction will be considered wrong as 0.5 is not the biggest probability.
+    m: int. For every m examples, there's going to be a positive sample.
+        eg. data= [A1,B1,1], [A1,B2,0], [A1,B3,0], [A2,B1,1], [A2,B2,0], [A2,B3,0]
+           For every 3 examples, there will be one positive sample. so m=3, and n can be 1,2 or 3.
+    '''
+    def get_p_at_n_in_m(data, n, k, ind):
+        """
+        calculate precision in recall n
+        """
+        pos_score = data[ind][0]
+        curr = data[ind:ind + n]
+        curr = sorted(curr, key=lambda x: x[0], reverse=True)
+        if curr[k - 1][0] <= pos_score:
+            return 1
+        return 0
+    correct_num = 0.0
+    length = len(data) // m
+    for i in range(0, length):
+        ind = i * m
+        assert data[ind][1] == 1
+        correct_num += get_p_at_n_in_m(data, n, k, ind)
+    return correct_num / length
--- a/paddlehub/finetune/optimization.py
+++ b/paddlehub/finetune/optimization.py
-#coding:utf-8
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Optimization and learning rate scheduling."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-import paddle.fluid as fluid
-import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler
-from paddle.fluid.layers import control_flow
-from paddlehub.common.logger import logger
-def adam_weight_decay_optimization(loss,
-                                   warmup_steps,
-                                   num_train_steps,
-                                   learning_rate,
-                                   main_program,
-                                   weight_decay,
-                                   scheduler='linear_decay'):
-    if scheduler == 'noam_decay':
-        if warmup_steps > 0:
-            scheduled_lr = fluid.layers.learning_rate_scheduler\
-             .noam_decay(1/(warmup_steps *(learning_rate ** 2)),
-                         warmup_steps)
-        else:
-            logger.warning(
-                "Noam decay learning rate scheduler should have positive \
-            warmup steps, using constant learning rate instead!")
-            scheduled_lr = fluid.layers.create_global_var(
-                shape=[1],
-                value=learning_rate,
-                dtype='float32',
-                persistable=True,
-                name="learning_rate")
-    elif scheduler == 'linear_decay':
-        scheduled_lr = linear_warmup_decay(learning_rate, num_train_steps,
-                                           warmup_steps, main_program)
-    else:
-        raise ValueError("Unkown learning rate scheduler, should be "
-                         "'noam_decay' or 'linear_decay'")
-    optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
-    clip_norm_thres = 1.0
-    fluid.clip.set_gradient_clip(
-        clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
-    def exclude_from_weight_decay(name):
-        if name.find("layer_norm") > -1:
-            return True
-        bias_suffix = ["_bias", "_b", ".b_0"]
-        for suffix in bias_suffix:
-            if name.endswith(suffix):
-                return True
-        return False
-    param_list = dict()
-    for param in main_program.global_block().all_parameters():
-        param_list[param.name] = param * 1.0
-        param_list[param.name].stop_gradient = True
-    _, param_grads = optimizer.minimize(loss)
-    if weight_decay > 0:
-        for param, grad in param_grads:
-            if exclude_from_weight_decay(param.name):
-                continue
-            with param.block.program._optimized_guard(
-                [param, grad]), fluid.framework.name_scope("weight_decay"):
-                updated_param = param - param_list[
-                    param.name] * weight_decay * scheduled_lr
-                fluid.layers.assign(output=param, input=updated_param)
-    return scheduled_lr
-def linear_warmup_decay(init_lr, num_train_steps, num_warmup_steps,
-                        main_program):
-    with main_program._lr_schedule_guard():
-        global_step = lr_scheduler._decay_step_counter()
-        lr = fluid.layers.create_global_var(
-            shape=[1],
-            value=init_lr,
-            dtype='float32',
-            persistable=True,
-            name="learning_rate")
-        with control_flow.Switch() as switch:
-            with switch.case(global_step < num_warmup_steps):
-                decayed_lr = init_lr * global_step * 1.0 / num_warmup_steps
-                fluid.layers.assign(decayed_lr, lr)
-            with switch.default():
-                decayed_lr = lr_scheduler.polynomial_decay(
-                    learning_rate=init_lr,
-                    decay_steps=num_train_steps,
-                    end_learning_rate=0.0,
-                    power=1.0,
-                    cycle=False)
-                fluid.layers.assign(decayed_lr, lr)
-        return lr
--- a/paddlehub/finetune/strategy.py
+++ b/paddlehub/finetune/strategy.py
@@ -18,12 +18,15 @@ from __future__ import division
 from __future__ import print_function
 import os
+import math
 import multiprocessing
 import paddle.fluid as fluid
-from paddlehub.finetune.optimization import adam_weight_decay_optimization
+from paddlehub.common.logger import logger
 from paddlehub.finetune.regularizer import L2SPDecayRegularizer
+import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler
+from paddle.fluid.layers import control_flow
 def get_pretrained_parameter(main_program, start_program):
@@ -40,6 +43,99 @@ def get_pretrained_parameter(main_program, start_program):
    return pretrained_parameters
+def get_parentOp_depth_max(parent_ops, op_depth_dict):
+    max_depth = 1
+    for parent_op in parent_ops:
+        depth = op_depth_dict.get(parent_op, 1)
+        if max_depth < depth:
+            max_depth = depth
+    return max_depth
+def get_opDepth_min(ops, op_depth_dict):
+    min_depth = max(op_depth_dict.values())
+    for op in ops:
+        depth = op_depth_dict[op]
+        if min_depth > depth:
+            min_depth = depth
+    return min_depth
+def get_depth_parameter(main_program):
+    global_block = main_program.global_block()
+    var_op_dict = {}
+    for op in global_block.ops:
+        for input_arg in op.input_arg_names:
+            if input_arg not in var_op_dict.keys():
+                var_op_dict[input_arg] = {"output_ops": [], "input_ops": []}
+            var_op_dict[input_arg]["output_ops"].append(op)
+        for output_arg in op.output_arg_names:
+            if output_arg not in var_op_dict.keys():
+                var_op_dict[output_arg] = {"output_ops": [], "input_ops": []}
+            var_op_dict[output_arg]["input_ops"].append(op)
+    op_depth_dict = {}
+    for op in global_block.ops:
+        parent_ops = []
+        for input_arg in op.input_arg_names:
+            for parent_op in var_op_dict[input_arg]["input_ops"]:
+                if parent_op not in parent_ops:
+                    parent_ops.append(parent_op)
+        if not parent_ops:
+            op_depth_dict[op] = 1
+        else:
+            op_depth_dict[op] = get_parentOp_depth_max(parent_ops,
+                                                       op_depth_dict) + 1
+    depth_params_dict = {}
+    updated_depth_params_dict = {}
+    for param in global_block.iter_parameters():
+        adherent_ops = var_op_dict[param.name]["output_ops"]
+        depth = get_opDepth_min(adherent_ops, op_depth_dict)
+        if depth not in depth_params_dict.keys():
+            depth_params_dict[depth] = []
+            updated_depth_params_dict[depth] = []
+        depth_params_dict[depth].append(param)
+        updated_depth_params_dict[depth].append(param)
+    depth_list = sorted(depth_params_dict.keys())
+    len_depth_list = len(depth_list)
+    for index, depth in enumerate(depth_list):
+        for param in depth_params_dict[depth]:
+            prefix = param.name.split(".")[0]
+            if index < len_depth_list - 1:
+                next_depth = depth_list[index + 1]
+                for param_next_depth in depth_params_dict[next_depth]:
+                    prefix_next_depth = param_next_depth.name.split(".")[0]
+                    if prefix == prefix_next_depth:
+                        updated_depth_params_dict[depth].append(
+                            param_next_depth)
+                        updated_depth_params_dict[next_depth].remove(
+                            param_next_depth)
+                        if not updated_depth_params_dict[next_depth]:
+                            updated_depth_params_dict.pop(next_depth)
+    return updated_depth_params_dict
+def set_gradual_unfreeze(main_program, unfreeze_depths):
+    depth_params_dict = get_depth_parameter(main_program)
+    for depth in unfreeze_depths:
+        for index, param in enumerate(depth_params_dict[depth]):
+            depth_params_dict[depth][index].stop_gradient = False
+    freeze_depths = list(
+        set(depth_params_dict.keys()).difference(set(unfreeze_depths)))
+    for depth in freeze_depths:
+        for index, param in enumerate(depth_params_dict[depth]):
+            depth_params_dict[depth][index].stop_gradient = True
 class DefaultStrategy(object):
    def __init__(self, learning_rate=1e-4, optimizer_name="adam"):
        self.learning_rate = learning_rate
@@ -75,133 +171,403 @@ class DefaultStrategy(object):
            self.optimizer = fluid.optimizer.Adam(
                learning_rate=self.learning_rate)
-    def execute(self, loss, data_reader, config):
+    def execute(self, loss, data_reader, config, dev_count):
        if self.optimizer is not None:
            self.optimizer.minimize(loss)
        else:
            raise ValueError("DefaultStrategy's optimizer is None")
-    # TODO complete __str__()
    def __str__(self):
        return "DefaultStrategy"
+    def step(self):
+        pass
-class AdamWeightDecayStrategy(DefaultStrategy):
+class CombinedStrategy(DefaultStrategy):
    def __init__(self,
+                 optimizer_name="adam",
                 learning_rate=1e-4,
-                 lr_scheduler="linear_decay",
+                 scheduler=None,
-                 warmup_proportion=0.1,
+                 regularization=None,
-                 weight_decay=0.01,
+                 clip=None):
-                 optimizer_name="adam"):
+        super(CombinedStrategy, self).__init__(
-        super(AdamWeightDecayStrategy, self).__init__(
+            optimizer_name=optimizer_name, learning_rate=learning_rate)
-            learning_rate=learning_rate, optimizer_name=optimizer_name)
-        # check strategy correctness
+        # init set
-        if lr_scheduler not in ["linear_decay", "noam_decay"]:
+        self.scheduler = {
-            raise ValueError("lr_scheduler {} is not setup "
+            "warmup": 0.0,
-                             "correctly".format(lr_scheduler))
+            "linear_decay": {
-        self._lr_scheduler = lr_scheduler
+                "start_point": 1.0,
-        self._warmup_proportion = warmup_proportion
+                "end_learning_rate": 0.0,
-        self._weight_decay = weight_decay
+            },
+            "noam_decay": False,
-    @property
+            "discriminative": {
-    def lr_scheduler(self):
+                "blocks": 0,
-        return self._lr_scheduler
+                "factor": 2.6
+            },
-    @property
+            "gradual_unfreeze": 0,
-    def warmup_proportion(self):
+            "slanted_triangle": {
-        return self._warmup_proportion
+                "cut_fraction": 0.0,
+                "ratio": 32
-    @property
+            }
-    def weight_decay(self):
+        }
-        return self._weight_decay
+        self.regularization = {
-    def execute(self, loss, data_reader, config):
+            "L2": 0.0,
-        main_program = loss.block.program
+            "L2SP": 0.0,
-        # calculate wamrup step
+            "weight_decay": 0.0,
-        dev_count = self._get_dev_count(config)
+        }
+        self.clip = {"GlobalNorm": 0.0, "Norm": 0.0}
+        if scheduler == None:
+            scheduler = {}
+        if regularization == None:
+            regularization = {}
+        if clip == None:
+            clip = {}
+        # check legality and assign
+        for name in scheduler:
+            self.check_assign(self.scheduler, name, scheduler[name])
+        for name in regularization:
+            self.check_assign(self.regularization, name, regularization[name])
+        for name in clip:
+            self.check_assign(self.clip, name, clip[name])
+        self.epoch = 0
+        self.main_program = None
+    def check_assign(self, dictionary, key, value):
+        if key not in dictionary:
+            raise ValueError("Invalid parameter: %s" % key)
+        if isinstance(value, dict) and isinstance(dictionary[key], dict):
+            sub_dict = dictionary[key]
+            for sub_name in value:
+                self.check_assign(sub_dict, sub_name, value[sub_name])
+        elif isinstance(dictionary[key],
+                        type(value)) or (isinstance(dictionary[key], float)
+                                         and isinstance(value, (float, int))):
+            dictionary[key] = value
+        else:
+            if isinstance(dictionary[key], dict):
+                raise ValueError(
+                    "The type of parameter %s should be a dict with keys: %s" %
+                    (key, dictionary[key].keys()))
+            else:
+                raise ValueError("The type of parameter %s should be %s" %
+                                 (key, type(dictionary[key])))
+    def add_scheduler(self, name="warmup", value=0, **values):
+        if values:
+            self.check_assign(self.scheduler, name, values)
+        else:
+            self.check_assign(self.scheduler, name, value)
+    def add_regularization(self, name="L2", value=1e-3, **values):
+        if values:
+            self.check_assign(self.regularization, name, values)
+        else:
+            self.check_assign(self.regularization, name, value)
+    def add_clip(self, name="GlobalNorm", value=1.0, **values):
+        if values:
+            self.check_assign(self.clip, name, values)
+        else:
+            self.check_assign(self.clip, name, value)
+    def scheduler_handler(self, max_train_steps):
+        scheduled_lr = fluid.layers.create_global_var(
+            shape=[1],
+            value=self.learning_rate,
+            dtype='float32',
+            persistable=True,
+            name="learning_rate")
+        if not self.scheduler["slanted_triangle"]["cut_fraction"]:
+            warmup_steps = int(max_train_steps * self.scheduler["warmup"])
+            linear_decay_start = int(
+                max_train_steps * self.scheduler["linear_decay"]["start_point"])
+            if linear_decay_start < warmup_steps:
+                logger.warning(
+                    "linear decay can not start during warmup process,"
+                    "it will start after warmup ends!")
+                linear_decay_start = warmup_steps
+            if self.scheduler["noam_decay"]:
+                if warmup_steps > 0:
+                    scheduled_lr = fluid.layers.learning_rate_scheduler \
+                        .noam_decay(1 / (warmup_steps * (self.learning_rate ** 2)),
+                                    warmup_steps)
+                else:
+                    logger.warning(
+                        "Noam decay learning rate scheduler should have positive \
+                        warmup steps, using constant learning rate instead!")
+            if not self.scheduler["noam_decay"] and \
+                    (warmup_steps > 0 or self.scheduler["linear_decay"]["start_point"]<1):
+                with self.main_program._lr_schedule_guard():
+                    global_step = lr_scheduler._decay_step_counter()
+                    with control_flow.Switch() as switch:
+                        if warmup_steps > 0:
+                            with switch.case(global_step < warmup_steps):
+                                decayed_lr = self.learning_rate * global_step * 1.0 / warmup_steps
+                                fluid.layers.assign(decayed_lr, scheduled_lr)
+                        if self.scheduler["linear_decay"]["start_point"] < 1:
+                            with switch.case(global_step >= linear_decay_start):
+                                decayed_lr = lr_scheduler.polynomial_decay(
+                                    learning_rate=self.learning_rate,
+                                    decay_steps=max_train_steps,
+                                    end_learning_rate=self.scheduler[
+                                        "linear_decay"]["end_learning_rate"],
+                                    power=1.0,
+                                    cycle=False)
+                                fluid.layers.assign(decayed_lr, scheduled_lr)
+        else:
+            if self.scheduler["warmup"] or self.scheduler[
+                    "noam_decay"] or self.scheduler["linear_decay"][
+                        "start_point"] < 1:
+                logger.warning(
+                    "You are using slanted_triangle learning rate "
+                    "which will make warmup, noam_decay and linear_decay unable"
+                )
+            cut_step = int(max_train_steps *
+                           self.scheduler["slanted_triangle"]["cut_fraction"])
+            ratio = self.scheduler["slanted_triangle"]["ratio"]
+            global_step = lr_scheduler._decay_step_counter()
+            with control_flow.Switch() as switch:
+                with switch.case(global_step <= cut_step):
+                    pct = global_step / cut_step
+                    decayed_lr = self.learning_rate * (1 + pct *
+                                                       (ratio - 1)) / ratio
+                    fluid.layers.assign(decayed_lr, scheduled_lr)
+                with switch.default():
+                    pct = 1 - (global_step - cut_step) / (
+                        max_train_steps - cut_step)
+                    decayed_lr = self.learning_rate * (1 + pct *
+                                                       (ratio - 1)) / ratio
+                    fluid.layers.assign(decayed_lr, scheduled_lr)
+        super(CombinedStrategy, self).__init__(
+            optimizer_name=self._optimizer_name, learning_rate=scheduled_lr)
+        if self.scheduler["discriminative"]["blocks"]:
+            _block_layers = math.ceil(
+                len(self.sorted_depth) /
+                self.scheduler["discriminative"]["blocks"])
+            power = 0
+            for cnt, depth in enumerate(self.sorted_depth):
+                for index, param in enumerate(self.depth_params_dict[depth]):
+                    param.optimize_attr["learning_rate"] *= \
+                        pow(1.0 / self.scheduler["discriminative"]["factor"], power)
+                if cnt and cnt % _block_layers == 0:
+                    power += 1
+        return scheduled_lr
+    def clip_handler(self):
+        if self.clip["GlobalNorm"]:
+            fluid.clip.set_gradient_clip(
+                clip=fluid.clip.GradientClipByGlobalNorm(
+                    clip_norm=self.clip["GlobalNorm"]))
+        elif self.clip["Norm"]:
+            fluid.clip.set_gradient_clip(
+                clip=fluid.clip.GradientClipByNorm(clip_norm=self.clip["Norm"]))
+    def regularization_handler(self, loss, scheduled_lr):
+        if self.regularization["L2"]:
+            for param in self.main_program.global_block().all_parameters():
+                param.regularizer = fluid.regularizer.L2Decay(
+                    regularization_coeff=self.regularization["L2"])
+        pretrained_params = get_pretrained_parameter(
+            self.main_program, fluid.default_startup_program())
+        if self.regularization["L2SP"]:
+            #TODO: L2SP can only run in one process now
+            for index, param in enumerate(pretrained_params):
+                param.regularizer = L2SPDecayRegularizer(
+                    regularization_coeff=self.regularization["L2SP"])
+        _, param_grads = self.optimizer.minimize(loss)
+        if self.regularization["weight_decay"]:
+            param_list = {}
+            for param in self.main_program.global_block().all_parameters():
+                param_list[param.name] = param * 1.0
+                param_list[param.name].stop_gradient = True
+            for param, grad in param_grads:
+                if self.exclude_from_weight_decay(param.name):
+                    continue
+                with param.block.program._optimized_guard(
+                    [param, grad]), fluid.framework.name_scope("weight_decay"):
+                    updated_param = param - param_list[
+                        param.name] * self.regularization[
+                            "weight_decay"] * scheduled_lr
+                    fluid.layers.assign(output=param, input=updated_param)
+    def execute(self, loss, data_reader, config, dev_count):
+        # base information
+        self.main_program = loss.block.program
+        self.config = config
+        # self.num_examples = {'train': -1, 'dev': -1, 'test': -1} before data_generator
        data_reader.data_generator(
            batch_size=config.batch_size, phase='train', shuffle=True)
-        data_reader.data_generator(
-            batch_size=config.batch_size, phase='val', shuffle=False)
        data_reader.data_generator(
            batch_size=config.batch_size, phase='dev', shuffle=False)
-        num_train_examples = data_reader.get_num_examples(phase='train')
+        data_reader.data_generator(
-        max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
+            batch_size=config.batch_size, phase='test', shuffle=False)
-        warmup_steps = int(max_train_steps * self.warmup_proportion)
+        num_train_examples = len(data_reader.get_train_examples())
-        scheduled_lr = adam_weight_decay_optimization(
-            loss, warmup_steps, max_train_steps, self.learning_rate,
-            main_program, self.weight_decay, self.lr_scheduler)
-        return scheduled_lr
+        max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
-    def _get_dev_count(self, config):
+        try:
-        if config.use_cuda:
+            # nlp_reader
-            dev_count = fluid.core.get_cuda_device_count()
+            _in_tokens = data_reader.in_tokens
+            if _in_tokens:
+                max_train_steps *= data_reader.max_seq_len
+        except:
+            # cv_reader without .in_tokens and .max_seq_len
+            pass
+        if self.scheduler["discriminative"]["blocks"] > 0 or self.scheduler[
+                "gradual_unfreeze"] > 0:
+            self.depth_params_dict = get_depth_parameter(self.main_program)
+            self.sorted_depth = sorted(
+                self.depth_params_dict.keys(), reverse=True)
+            self.max_depth = len(self.sorted_depth)
+        logger.info(self.__str__())
+        # handle scheduler
+        scheduled_lr = self.scheduler_handler(max_train_steps)
+        # handle clip
+        self.clip_handler()
+        # handle regularization
+        self.regularization_handler(loss, scheduled_lr)
+        return scheduled_lr, max_train_steps
+    def exclude_from_weight_decay(self, name):
+        if name.find("layer_norm") > -1:
+            return True
+        bias_suffix = ["_bias", "_b", ".b_0"]
+        for suffix in bias_suffix:
+            if name.endswith(suffix):
+                return True
+        return False
+    def step(self):
+        if self.scheduler["gradual_unfreeze"] > 0:
+            self.epoch += 1
+            if self.max_depth > 0 and self.epoch <= self.scheduler[
+                    "gradual_unfreeze"]:
+                set_gradual_unfreeze(
+                    self.main_program,
+                    unfreeze_depths=self.
+                    sorted_depth[:self.max_depth * self.epoch //
+                                 self.scheduler["gradual_unfreeze"]])
+            else:
+                logger.warning(
+                    "The max op-depth in the network is %s. That results in that can't use the gradual unfreeze finetune strategy."
+                    % (self.max_depth))
        else:
-            dev_count = int(
+            pass
-                os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-        return dev_count
-    # TODO complete __str__()
    def __str__(self):
-        return "AdamWeightDecayStrategy"
+        return "Strategy with sheduler: %s, regularization: %s and clip: %s" % (
+            self.scheduler, self.regularization, self.clip)
-class DefaultFinetuneStrategy(DefaultStrategy):
+class AdamWeightDecayStrategy(CombinedStrategy):
    def __init__(self,
                 learning_rate=1e-4,
-                 optimizer_name="adam",
+                 lr_scheduler="linear_decay",
-                 regularization_coeff=1e-3):
+                 warmup_proportion=0.1,
-        super(DefaultFinetuneStrategy, self).__init__(
+                 weight_decay=0.01,
-            learning_rate=learning_rate, optimizer_name=optimizer_name)
+                 optimizer_name="adam"):
-        self.learning_rate = learning_rate
+        scheduler = {"warmup": warmup_proportion}
-        self._optimizer_name = optimizer_name
+        if lr_scheduler == "noam_decay":
-        self.regularization_coeff = regularization_coeff
+            scheduler["noam_decay"] = True
+        elif lr_scheduler == "linear_decay":
-    def execute(self, loss, data_reader, config):
+            scheduler["linear_decay"] = {
-        # get pretrained parameters
+                "start_point": warmup_proportion,
-        program = loss.block.program
+                "end_learning_rate": 0,
-        global_block = program.global_block()
+            }
-        pretrained_params = get_pretrained_parameter(
-            program, fluid.default_startup_program())
-        # set parameter attrs
-        for index, param in enumerate(pretrained_params):
-            param.regularizer = fluid.regularizer.L2Decay(
-                regularization_coeff=self.regularization_coeff)
-        if self.optimizer is not None:
-            self.optimizer.minimize(loss)
        else:
-            raise ValueError("DefaultFinetuneStrategy's optimizer is None")
+            raise ValueError("lr_scheduler {} is not setup "
+                             "correctly".format(lr_scheduler))
+        regularization = {"weight_decay": weight_decay}
+        clip = {"GlobalNorm": 1.0}
+        super(AdamWeightDecayStrategy, self).__init__(
+            optimizer_name=optimizer_name,
+            learning_rate=learning_rate,
+            scheduler=scheduler,
+            regularization=regularization,
+            clip=clip)
-class L2SPFinetuneStrategy(DefaultStrategy):
+class L2SPFinetuneStrategy(CombinedStrategy):
    def __init__(self,
                 learning_rate=1e-4,
                 optimizer_name="adam",
                 regularization_coeff=1e-3):
+        scheduler = {}
+        regularization = {"L2SP": regularization_coeff}
+        clip = {}
        super(L2SPFinetuneStrategy, self).__init__(
-            learning_rate=learning_rate, optimizer_name=optimizer_name)
+            optimizer_name=optimizer_name,
-        self.learning_rate = learning_rate
+            learning_rate=learning_rate,
-        self._optimizer_name = optimizer_name
+            scheduler=scheduler,
-        self.regularization_coeff = regularization_coeff
+            regularization=regularization,
+            clip=clip)
-    def execute(self, loss, data_reader, config):
-        # get pretrained parameters
-        program = loss.block.program
-        global_block = program.global_block()
-        pretrained_params = get_pretrained_parameter(
-            program, fluid.default_startup_program())
-        # set parameter attrs
+class DefaultFinetuneStrategy(CombinedStrategy):
-        for index, param in enumerate(pretrained_params):
+    def __init__(self,
-            param.regularizer = L2SPDecayRegularizer(
+                 learning_rate=1e-4,
-                regularization_coeff=self.regularization_coeff)
+                 optimizer_name="adam",
+                 regularization_coeff=1e-3):
+        scheduler = {}
+        regularization = {"L2": regularization_coeff}
+        clip = {}
+        super(DefaultFinetuneStrategy, self).__init__(
+            optimizer_name=optimizer_name,
+            learning_rate=learning_rate,
+            scheduler=scheduler,
+            regularization=regularization,
+            clip=clip)
-        if self.optimizer is not None:
-            self.optimizer.minimize(loss)
+class ULMFiTStrategy(CombinedStrategy):
-        else:
+    def __init__(self,
-            raise ValueError("DefaultFinetuneStrategy's optimizer is None")
+                 learning_rate=1e-4,
+                 optimizer_name="adam",
+                 cut_fraction=0.1,
+                 ratio=32,
+                 dis_blocks=3,
+                 factor=2.6,
+                 frz_blocks=3):
+        scheduler = {
+            "slanted_triangle": {
+                "cut_fraction": cut_fraction,
+                "ratio": ratio
+            },
+            "gradual_unfreeze": frz_blocks,
+            "discriminative": {
+                "blocks": dis_blocks,
+                "factor": factor
+            }
+        }
+        regularization = {}
+        clip = {}
+        super(ULMFiTStrategy, self).__init__(
+            optimizer_name=optimizer_name,
+            learning_rate=learning_rate,
+            scheduler=scheduler,
+            regularization=regularization,
+            clip=clip)
--- a/paddlehub/finetune/task/__init__.py
+++ b/paddlehub/finetune/task/__init__.py
+#coding:utf-8
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .basic_task import BasicTask, RunEnv, RunState
+from .classifier_task import ClassifierTask, ImageClassifierTask, TextClassifierTask, MultiLabelClassifierTask
+from .reading_comprehension_task import ReadingComprehensionTask
+from .regression_task import RegressionTask
+from .sequence_task import SequenceLabelTask
--- a/paddlehub/finetune/task.py
+++ b/paddlehub/finetune/task.py
-#coding:utf-8
+# coding:utf-8
-#  Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"
 # you may not use this file except in compliance with the License.
@@ -18,29 +18,19 @@ from __future__ import division
 from __future__ import print_function
 import os
-import collections
 import contextlib
 import time
-import multiprocessing
 import copy
-import numpy as np
 import paddle.fluid as fluid
-from visualdl import LogWriter
+from tb_paddle import SummaryWriter
 import paddlehub as hub
 from paddlehub.common.paddle_helper import dtype_map, clone_program
 from paddlehub.common.utils import mkdir, to_list
 from paddlehub.common.logger import logger
 from paddlehub.finetune.checkpoint import load_checkpoint, save_checkpoint
-from paddlehub.finetune.evaluate import chunk_eval, calculate_f1
 from paddlehub.finetune.config import RunConfig
-__all__ = [
-    "ClassifierTask", "ImageClassifierTask", "TextClassifierTask",
-    "SequenceLabelTask", "MultiLabelClassifierTask"
-]
 class RunState(object):
    def __init__(self, length):
@@ -92,11 +82,24 @@ class BasicTask(object):
                 data_reader,
                 main_program=None,
                 startup_program=None,
-                 config=None):
+                 config=None,
+                 metrics_choices="default"):
        # base item
        self._base_data_reader = data_reader
        self._base_feed_list = feed_list
+        # metrics item
+        self.best_score = -999
+        if metrics_choices == "default":
+            metrics_choices = ["acc"]
+        elif metrics_choices == None:
+            metrics_choices = []
+        if isinstance(metrics_choices, list):
+            self.metrics_choices = metrics_choices
+        else:
+            self.metrics_choices = [metrics_choices]
        if main_program is None:
            self._base_main_program = clone_program(
                fluid.default_main_program(), for_test=False)
@@ -138,13 +141,16 @@ class BasicTask(object):
        if not os.path.exists(self.config.checkpoint_dir):
            mkdir(self.config.checkpoint_dir)
        vdl_log_dir = os.path.join(self.config.checkpoint_dir, "vdllog")
-        self.log_writer = LogWriter(vdl_log_dir, sync_cycle=1)
+        self.tb_writer = SummaryWriter(vdl_log_dir)
        # run environment
        self._phases = []
        self._envs = {}
        self._predict_data = None
+        # accelerate predict
+        self.is_best_model_loaded = False
        # set default phase
        self.enter_phase("train")
@@ -164,9 +170,24 @@ class BasicTask(object):
    def init_if_necessary(self):
        if not self.is_checkpoint_loaded:
-            self.is_checkpoint_loaded = True
            if not self.load_checkpoint():
                self.exe.run(self._base_startup_program)
+            self.is_checkpoint_loaded = True
+            self.is_best_model_loaded = False
+    def init_if_load_best_model(self):
+        if not self.is_best_model_loaded:
+            best_model_path = os.path.join(self.config.checkpoint_dir,
+                                           "best_model")
+            logger.info("Load the best model from %s" % best_model_path)
+            if os.path.exists(best_model_path):
+                self.load_parameters(best_model_path)
+                self.is_checkpoint_loaded = False
+                self.is_best_model_loaded = True
+            else:
+                self.init_if_necessary()
+        else:
+            logger.info("The best model has been loaded")
    def _build_env(self):
        if self.env.is_inititalized:
@@ -242,19 +263,16 @@ class BasicTask(object):
            with fluid.program_guard(self.env.main_program,
                                     self._base_startup_program):
                with fluid.unique_name.guard(self.env.UNG):
-                    self.config.strategy.execute(
+                    self.scheduled_lr, self.max_train_steps = self.config.strategy.execute(
-                        self.loss, self._base_data_reader, self.config)
+                        self.loss, self._base_data_reader, self.config,
+                        self.device_count)
        if self.is_train_phase:
            loss_name = self.env.loss.name
-            share_vars_from = None
        else:
            loss_name = None
-        if self._base_compiled_program is None:
+        share_vars_from = self._base_compiled_program
-            share_vars_from = None
-        else:
-            share_vars_from = self._base_compiled_program
        if not self.config.use_data_parallel:
            if self.config.enable_memory_optim:
@@ -267,9 +285,6 @@ class BasicTask(object):
                    share_vars_from=share_vars_from,
                    build_strategy=self.build_strategy)
-            if self._base_compiled_program is None:
-                self._base_compiled_program = self.env.main_program_compiled
        self.exe.run(self.env.startup_program)
        self._build_env_end_event()
@@ -348,6 +363,8 @@ class BasicTask(object):
    @property
    def main_program_to_be_run(self):
        if self.config.use_data_parallel:
+            if self._base_compiled_program is None:
+                self._base_compiled_program = self.env.main_program_compiled
            return self.main_program_compiled
        return self.main_program
@@ -420,7 +437,8 @@ class BasicTask(object):
        pass
    def _build_env_end_event(self):
-        pass
+        if not self.is_predict_phase:
+            self.env.score_scalar = {}
    def _finetune_start_event(self):
        logger.info("PaddleHub finetune start")
@@ -438,14 +456,61 @@ class BasicTask(object):
        logger.info("Evaluation on {} dataset start".format(self.phase))
    def _eval_end_event(self, run_states):
-        run_speed = self._calculate_metrics(run_states)
+        eval_scores, eval_loss, run_speed = self._calculate_metrics(run_states)
-        logger.info("[%s dataset evaluation result] [step/sec: %.2f]" %
+        self.tb_writer.add_scalar(
-                    (self.phase, run_speed))
+            tag=self.phase + "/Loss [{}]".format(self.phase),
+            scalar_value=eval_loss,
+            global_step=self.current_step)
+        log_scores = ""
+        for metric in eval_scores:
+            self.tb_writer.add_scalar(
+                tag=self.phase + "/{} [{}]".format(metric, self.phase),
+                scalar_value=eval_scores[metric],
+                global_step=self.current_step)
+            log_scores += "%s=%.5f " % (metric, eval_scores[metric])
+        logger.info(
+            "[%s dataset evaluation result] loss=%.5f %s[step/sec: %.2f]" %
+            (self.phase, eval_loss, log_scores, run_speed))
+        eval_scores_items = eval_scores.items()
+        if len(eval_scores_items):
+            # The first metric will be chose to eval
+            main_metric, main_value = list(eval_scores_items)[0]
+        else:
+            logger.warning(
+                "None of metrics has been implemented, loss will be used to evaluate."
+            )
+            # The larger, the better
+            main_metric, main_value = "negative loss", -eval_loss
+        if self.phase in ["dev", "val"] and main_value > self.best_score:
+            self.best_score = main_value
+            model_saved_dir = os.path.join(self.config.checkpoint_dir,
+                                           "best_model")
+            logger.info("best model saved to %s [best %s=%.5f]" %
+                        (model_saved_dir, main_metric, main_value))
+            save_result = fluid.io.save_persistables(
+                executor=self.exe,
+                dirname=model_saved_dir,
+                main_program=self.main_program)
    def _log_interval_event(self, run_states):
-        run_speed = self._calculate_metrics(run_states)
+        scores, avg_loss, run_speed = self._calculate_metrics(run_states)
-        logger.info(
+        self.tb_writer.add_scalar(
-            "step %d: [step/sec: %.2f]" % (self.current_step, run_speed))
+            tag=self.phase + "/Loss [{}]".format(self.phase),
+            scalar_value=avg_loss,
+            global_step=self.current_step)
+        log_scores = ""
+        for metric in scores:
+            self.tb_writer.add_scalar(
+                tag=self.phase + "/{} [{}]".format(metric, self.phase),
+                scalar_value=scores[metric],
+                global_step=self.current_step)
+            log_scores += "%s=%.5f " % (metric, scores[metric])
+        logger.info("step %d / %d: loss=%.5f %s[step/sec: %.2f]" %
+                    (self.current_step, self.max_train_steps, avg_loss,
+                     log_scores, run_speed))
    def _save_ckpt_interval_event(self):
        self.save_checkpoint()
@@ -467,9 +532,14 @@ class BasicTask(object):
        raise NotImplementedError
    def _add_metrics(self):
+        # Some metrics like acc, auc can be calculated by fluid.layers
+        # The others can be calculated in _calculate_metrics function
        raise NotImplementedError
    def _calculate_metrics(self, run_states):
+        # NOTE: if you want to customize the metrics
+        # you should make sure that the first parameter returned is a dict
+        # The first key will be used as main metrics to update the best model
        raise NotImplementedError
    # NOTE: current saved checkpoint machanism is not completed,
@@ -479,11 +549,12 @@ class BasicTask(object):
            checkpoint_dir=self.config.checkpoint_dir,
            current_epoch=self.current_epoch,
            global_step=self.current_step,
+            best_score=self.best_score,
            exe=self.exe,
            main_program=self.main_program)
    def load_checkpoint(self):
-        is_load_successful, self.env.current_epoch, self.env.current_step = load_checkpoint(
+        is_load_successful, self.env.current_epoch, self.env.current_step, self.best_score = load_checkpoint(
            self.config.checkpoint_dir,
            self.exe,
            main_program=self.main_program)
@@ -513,24 +584,30 @@ class BasicTask(object):
            run_states = []
            if self.current_epoch <= self.config.num_epoch:
                while self.current_epoch <= self.config.num_epoch:
+                    self.config.strategy.step()
                    run_states = self._run(do_eval=do_eval)
                    self.env.current_epoch += 1
                # Save checkpoint after finetune
                self.save_checkpoint()
                # Final evaluation
                if self._base_data_reader.get_dev_examples() != []:
                    self.eval(phase="dev")
                if self._base_data_reader.get_test_examples() != []:
-                    self.eval(phase="test")
+                    self.eval(phase="test", load_best_model=True)
            self._finetune_end_event(run_states)
            return run_states
-    def eval(self, phase="dev"):
+    def eval(self, phase="dev", load_best_model=False):
+        # Warning: DO NOT use eval(load_best_model=True) in finetune_and_eval
+        # It will cause trainer unable to continue training from checkpoint after eval
+        # More important, The model should evaluate current performance during training.
        with self.phase_guard(phase=phase):
-            self.init_if_necessary()
+            if load_best_model:
+                self.init_if_load_best_model()
+            else:
+                self.init_if_necessary()
            self._eval_start_event()
            run_states = self._run()
            self._eval_end_event(run_states)
@@ -538,11 +615,10 @@ class BasicTask(object):
    def predict(self, data, load_best_model=True):
        with self.phase_guard(phase="predict"):
-            self.init_if_necessary()
            if load_best_model:
-                best_model_path = os.path.join(self.config.checkpoint_dir,
+                self.init_if_load_best_model()
-                                               "best_model")
+            else:
-                self.load_parameters(best_model_path)
+                self.init_if_necessary()
            self._predict_data = data
            self._predict_start_event()
            run_states = self._run()
@@ -567,7 +643,6 @@ class BasicTask(object):
        for run_step, batch in enumerate(self.reader(), start=1):
            if self.config.use_data_parallel and len(batch) < self.device_count:
                continue
            step_run_state = RunState(len(self.fetch_list))
            step_run_state.run_step = 1
            num_batch_examples = len(batch)
@@ -652,460 +727,3 @@ class BasicTask(object):
                break
        return global_run_states
-class ClassifierTask(BasicTask):
-    def __init__(self,
-                 feature,
-                 num_classes,
-                 feed_list,
-                 data_reader,
-                 startup_program=None,
-                 config=None,
-                 hidden_units=None):
-        main_program = feature.block.program
-        super(ClassifierTask, self).__init__(
-            data_reader=data_reader,
-            main_program=main_program,
-            feed_list=feed_list,
-            startup_program=startup_program,
-            config=config)
-        self.feature = feature
-        self.num_classes = num_classes
-        self.hidden_units = hidden_units
-        self.best_accuracy = -1
-    def _build_net(self):
-        cls_feats = self.feature
-        if self.hidden_units is not None:
-            for n_hidden in self.hidden_units:
-                cls_feats = fluid.layers.fc(
-                    input=cls_feats, size=n_hidden, act="relu")
-        logits = fluid.layers.fc(
-            input=cls_feats,
-            size=self.num_classes,
-            param_attr=fluid.ParamAttr(
-                name="cls_out_w",
-                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
-            bias_attr=fluid.ParamAttr(
-                name="cls_out_b", initializer=fluid.initializer.Constant(0.)),
-            act="softmax")
-        return [logits]
-    def _add_label(self):
-        return [fluid.layers.data(name="label", dtype="int64", shape=[1])]
-    def _add_loss(self):
-        ce_loss = fluid.layers.cross_entropy(
-            input=self.outputs[0], label=self.labels[0])
-        return fluid.layers.mean(x=ce_loss)
-    def _add_metrics(self):
-        return [
-            fluid.layers.accuracy(input=self.outputs[0], label=self.labels[0])
-        ]
-    def _build_env_end_event(self):
-        with self.log_writer.mode(self.phase) as logw:
-            if not self.is_predict_phase:
-                self.env.loss_scalar = logw.scalar(
-                    tag="Loss [{}]".format(self.phase))
-                self.env.acc_scalar = logw.scalar(
-                    tag="Accuracy [{}]".format(self.phase))
-    def _calculate_metrics(self, run_states):
-        loss_sum = acc_sum = run_examples = 0
-        run_step = run_time_used = 0
-        for run_state in run_states:
-            run_examples += run_state.run_examples
-            run_step += run_state.run_step
-            loss_sum += np.mean(
-                run_state.run_results[-1]) * run_state.run_examples
-            acc_sum += np.mean(
-                run_state.run_results[0]) * run_state.run_examples
-        run_time_used = time.time() - run_states[0].run_time_begin
-        avg_loss = loss_sum / run_examples
-        avg_acc = acc_sum / run_examples
-        run_speed = run_step / run_time_used
-        return avg_loss, avg_acc, run_speed
-    def _log_interval_event(self, run_states):
-        avg_loss, avg_acc, run_speed = self._calculate_metrics(run_states)
-        self.env.loss_scalar.add_record(self.current_step, avg_loss)
-        self.env.acc_scalar.add_record(self.current_step, avg_acc)
-        logger.info("step %d: loss=%.5f acc=%.5f [step/sec: %.2f]" %
-                    (self.current_step, avg_loss, avg_acc, run_speed))
-    def _eval_end_event(self, run_states):
-        eval_loss, eval_acc, run_speed = self._calculate_metrics(run_states)
-        logger.info(
-            "[%s dataset evaluation result] loss=%.5f acc=%.5f [step/sec: %.2f]"
-            % (self.phase, eval_loss, eval_acc, run_speed))
-        self.env.loss_scalar.add_record(self.current_step, eval_loss)
-        self.env.acc_scalar.add_record(self.current_step, eval_acc)
-        if self.phase in ["dev", "val"] and eval_acc > self.best_accuracy:
-            self.best_accuracy = eval_acc
-            model_saved_dir = os.path.join(self.config.checkpoint_dir,
-                                           "best_model")
-            logger.info("best model saved to %s [best accuracy=%.5f]" %
-                        (model_saved_dir, self.best_accuracy))
-            save_result = fluid.io.save_persistables(
-                executor=self.exe,
-                dirname=model_saved_dir,
-                main_program=self.main_program)
-ImageClassifierTask = ClassifierTask
-class TextClassifierTask(ClassifierTask):
-    def __init__(self,
-                 feature,
-                 num_classes,
-                 feed_list,
-                 data_reader,
-                 startup_program=None,
-                 config=None,
-                 hidden_units=None):
-        main_program = feature.block.program
-        super(TextClassifierTask, self).__init__(
-            data_reader=data_reader,
-            feature=feature,
-            num_classes=num_classes,
-            feed_list=feed_list,
-            startup_program=startup_program,
-            config=config,
-            hidden_units=hidden_units)
-    def _build_net(self):
-        cls_feats = fluid.layers.dropout(
-            x=self.feature,
-            dropout_prob=0.1,
-            dropout_implementation="upscale_in_train")
-        if self.hidden_units is not None:
-            for n_hidden in self.hidden_units:
-                cls_feats = fluid.layers.fc(
-                    input=cls_feats, size=n_hidden, act="relu")
-        logits = fluid.layers.fc(
-            input=cls_feats,
-            size=self.num_classes,
-            param_attr=fluid.ParamAttr(
-                name="cls_out_w",
-                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
-            bias_attr=fluid.ParamAttr(
-                name="cls_out_b", initializer=fluid.initializer.Constant(0.)),
-            act="softmax")
-        return [logits]
-class SequenceLabelTask(BasicTask):
-    def __init__(
-            self,
-            feature,
-            max_seq_len,
-            num_classes,
-            feed_list,
-            data_reader,
-            startup_program=None,
-            config=None,
-    ):
-        main_program = feature.block.program
-        super(SequenceLabelTask, self).__init__(
-            data_reader=data_reader,
-            main_program=main_program,
-            feed_list=feed_list,
-            startup_program=startup_program,
-            config=config)
-        self.feature = feature
-        self.max_seq_len = max_seq_len
-        self.num_classes = num_classes
-        self.best_f1 = -1
-    def _build_net(self):
-        self.logits = fluid.layers.fc(
-            input=self.feature,
-            size=self.num_classes,
-            num_flatten_dims=2,
-            param_attr=fluid.ParamAttr(
-                name="cls_seq_label_out_w",
-                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
-            bias_attr=fluid.ParamAttr(
-                name="cls_seq_label_out_b",
-                initializer=fluid.initializer.Constant(0.)))
-        self.ret_infers = fluid.layers.reshape(
-            x=fluid.layers.argmax(self.logits, axis=2), shape=[-1, 1])
-        ret_infers = fluid.layers.assign(self.ret_infers)
-        self.seq_len = fluid.layers.data(
-            name="seq_len", shape=[1], dtype='int64')
-        seq_len = fluid.layers.assign(self.seq_len)
-        logits = self.logits
-        logits = fluid.layers.flatten(logits, axis=2)
-        logits = fluid.layers.softmax(logits)
-        self.num_labels = logits.shape[1]
-        return [logits]
-    def _add_label(self):
-        label = fluid.layers.data(
-            name="label", shape=[self.max_seq_len, 1], dtype='int64')
-        return [label]
-    def _add_loss(self):
-        labels = fluid.layers.flatten(self.labels[0], axis=2)
-        ce_loss = fluid.layers.cross_entropy(
-            input=self.outputs[0], label=labels)
-        loss = fluid.layers.mean(x=ce_loss)
-        return loss
-    def _add_metrics(self):
-        self.ret_labels = fluid.layers.reshape(x=self.labels[0], shape=[-1, 1])
-        return [self.ret_labels, self.ret_infers, self.seq_len]
-    def _build_env_end_event(self):
-        with self.log_writer.mode(self.phase) as logw:
-            if self.is_train_phase:
-                self.env.loss_scalar = logw.scalar(
-                    tag="Loss [{}]".format(self.phase))
-            if self.phase in ["dev", "val"]:
-                self.env.loss_scalar = logw.scalar(
-                    tag="Loss [{}]".format(self.phase))
-                self.env.f1_scalar = logw.scalar(
-                    tag="F1 [{}]".format(self.phase))
-                self.env.precision_scalar = logw.scalar(
-                    tag="Precision [{}]".format(self.phase))
-                self.env.recall_scalar = logw.scalar(
-                    tag="Recall [{}]".format(self.phase))
-    def _calculate_metrics(self, run_states):
-        total_infer = total_label = total_correct = loss_sum = 0
-        run_step = run_time_used = run_examples = 0
-        for run_state in run_states:
-            loss_sum += np.mean(run_state.run_results[-1])
-            np_labels = run_state.run_results[0]
-            np_infers = run_state.run_results[1]
-            np_lens = run_state.run_results[2]
-            label_num, infer_num, correct_num = chunk_eval(
-                np_labels, np_infers, np_lens, self.num_labels,
-                self.device_count)
-            total_infer += infer_num
-            total_label += label_num
-            total_correct += correct_num
-            run_examples += run_state.run_examples
-            run_step += run_state.run_step
-        run_time_used = time.time() - run_states[0].run_time_begin
-        run_speed = run_step / run_time_used
-        avg_loss = loss_sum / run_examples
-        precision, recall, f1 = calculate_f1(total_label, total_infer,
-                                             total_correct)
-        return precision, recall, f1, avg_loss, run_speed
-    def _log_interval_event(self, run_states):
-        precision, recall, f1, avg_loss, run_speed = self._calculate_metrics(
-            run_states)
-        self.env.loss_scalar.add_record(self.current_step, avg_loss)
-        logger.info("step %d: loss=%.5f [step/sec: %.2f]" %
-                    (self.current_step, avg_loss, run_speed))
-    def _eval_end_event(self, run_states):
-        precision, recall, f1, avg_loss, run_speed = self._calculate_metrics(
-            run_states)
-        self.env.loss_scalar.add_record(self.current_step, avg_loss)
-        self.env.f1_scalar.add_record(self.current_step, f1)
-        self.env.precision_scalar.add_record(self.current_step, precision)
-        self.env.recall_scalar.add_record(self.current_step, recall)
-        logger.info("[%s dataset evaluation result] [step/sec: %.2f]" %
-                    (self.phase, run_speed))
-        logger.info(
-            "[%s evaluation] F1-Score=%f, precision=%f, recall=%f [step/sec: %.2f]"
-            % (self.phase, f1, precision, recall, run_speed))
-        if self.phase in ["dev", "val"] and f1 > self.best_f1:
-            self.best_f1 = f1
-            model_saved_dir = os.path.join(self.config.checkpoint_dir,
-                                           "best_model")
-            logger.info("best model saved to %s [best F1=%.5f]" %
-                        (model_saved_dir, self.best_f1))
-            fluid.io.save_persistables(self.exe, dirname=model_saved_dir)
-    @property
-    def feed_list(self):
-        feed_list = [varname for varname in self._base_feed_list]
-        if self.is_train_phase or self.is_test_phase:
-            feed_list += [self.labels[0].name, self.seq_len.name]
-        else:
-            feed_list += [self.seq_len.name]
-        return feed_list
-    @property
-    def fetch_list(self):
-        if self.is_train_phase or self.is_test_phase:
-            return [metric.name for metric in self.metrics] + [self.loss.name]
-        elif self.is_predict_phase:
-            return [self.ret_infers.name] + [self.seq_len.name]
-        return [output.name for output in self.outputs]
-class MultiLabelClassifierTask(ClassifierTask):
-    def __init__(self,
-                 feature,
-                 num_classes,
-                 feed_list,
-                 data_reader,
-                 startup_program=None,
-                 config=None,
-                 hidden_units=None):
-        main_program = feature.block.program
-        super(MultiLabelClassifierTask, self).__init__(
-            data_reader=data_reader,
-            feature=feature,
-            num_classes=num_classes,
-            feed_list=feed_list,
-            startup_program=startup_program,
-            config=config,
-            hidden_units=hidden_units)
-        self.best_avg_auc = -1
-    def _build_net(self):
-        cls_feats = fluid.layers.dropout(
-            x=self.feature,
-            dropout_prob=0.1,
-            dropout_implementation="upscale_in_train")
-        if self.hidden_units is not None:
-            for n_hidden in self.hidden_units:
-                cls_feats = fluid.layers.fc(
-                    input=cls_feats, size=n_hidden, act="relu")
-        probs = []
-        for i in range(self.num_classes):
-            probs.append(
-                fluid.layers.fc(
-                    input=cls_feats,
-                    size=2,
-                    param_attr=fluid.ParamAttr(
-                        name="cls_out_w_%d" % i,
-                        initializer=fluid.initializer.TruncatedNormal(
-                            scale=0.02)),
-                    bias_attr=fluid.ParamAttr(
-                        name="cls_out_b_%d" % i,
-                        initializer=fluid.initializer.Constant(0.)),
-                    act="softmax"))
-        return probs
-    def _add_label(self):
-        label = fluid.layers.data(
-            name="label", shape=[self.num_classes], dtype='int64')
-        return [label]
-    def _add_loss(self):
-        label_split = fluid.layers.split(
-            self.labels[0], self.num_classes, dim=-1)
-        total_loss = fluid.layers.fill_constant(
-            shape=[1], value=0.0, dtype='float64')
-        for index, probs in enumerate(self.outputs):
-            ce_loss = fluid.layers.cross_entropy(
-                input=probs, label=label_split[index])
-            total_loss += fluid.layers.reduce_sum(ce_loss)
-        loss = fluid.layers.mean(x=total_loss)
-        return loss
-    def _add_metrics(self):
-        label_split = fluid.layers.split(
-            self.labels[0], self.num_classes, dim=-1)
-        # metrics change to auc of every class
-        eval_list = []
-        for index, probs in enumerate(self.outputs):
-            current_auc, _, _ = fluid.layers.auc(
-                input=probs, label=label_split[index])
-            eval_list.append(current_auc)
-        return eval_list
-    def _build_env_end_event(self):
-        with self.log_writer.mode(self.phase) as logw:
-            if not self.is_predict_phase:
-                self.env.loss_scalar = logw.scalar(
-                    tag="Loss [{}]".format(self.phase))
-                if self.is_train_phase:
-                    self.env.auc_scalar_list = []
-                    for i in range(self.num_classes):
-                        self.env.auc_scalar_list.append(
-                            logw.scalar(tag="AUC_{} [{}]".format(i, "train")))
-                self.env.avg_auc_scalar = logw.scalar(
-                    tag="Average auc [{}]".format(self.phase))
-    def _calculate_metrics(self, run_states):
-        loss_sum = acc_sum = run_examples = 0
-        run_step = run_time_used = 0
-        for run_state in run_states:
-            run_examples += run_state.run_examples
-            run_step += run_state.run_step
-            loss_sum += np.mean(
-                run_state.run_results[-1]) * run_state.run_examples
-        auc_list = run_states[-1].run_results[:-1]
-        run_time_used = time.time() - run_states[0].run_time_begin
-        avg_loss = loss_sum / (run_examples * self.num_classes)
-        run_speed = run_step / run_time_used
-        return avg_loss, auc_list, run_speed
-    def _log_interval_event(self, run_states):
-        avg_loss, auc_list, run_speed = self._calculate_metrics(run_states)
-        self.env.loss_scalar.add_record(self.current_step, avg_loss)
-        avg_auc = np.mean(auc_list)
-        self.env.avg_auc_scalar.add_record(self.current_step, avg_auc)
-        logger.info("step %d: loss=%.5f avg_auc=%.5f [step/sec: %.2f]" %
-                    (self.current_step, avg_loss, avg_auc, run_speed))
-        for index, auc_scalar in enumerate(self.env.auc_scalar_list):
-            auc_scalar.add_record(self.current_step, auc_list[index][0])
-            logger.info("label_%d_auc = %.5f" % (index, auc_list[index][0]))
-    def _eval_end_event(self, run_states):
-        eval_loss, auc_list, run_speed = self._calculate_metrics(run_states)
-        avg_auc = np.mean(auc_list)
-        logger.info(
-            "[%s dataset evaluation result] loss=%.5f avg_auc=%.5f [step/sec: %.2f]"
-            % (self.phase, eval_loss, avg_auc, run_speed))
-        for index, auc in enumerate(auc_list):
-            logger.info("label_%d_auc = %.5f" % (index, auc_list[index][0]))
-        self.env.loss_scalar.add_record(self.current_step, eval_loss)
-        self.env.avg_auc_scalar.add_record(self.current_step, avg_auc)
-        if self.phase in ["dev", "val"] and avg_auc > self.best_avg_auc:
-            self.best_avg_auc = avg_auc
-            model_saved_dir = os.path.join(self.config.checkpoint_dir,
-                                           "best_model")
-            logger.info("best model saved to %s [best average auc=%.5f]" %
-                        (model_saved_dir, self.best_avg_auc))
-            save_result = fluid.io.save_persistables(
-                executor=self.exe,
-                dirname=model_saved_dir,
-                main_program=self.main_program)
-    @property
-    def fetch_list(self):
-        if self.is_train_phase or self.is_test_phase:
-            return [metric.name for metric in self.metrics] + [self.loss.name]
-        return self.outputs
--- a/paddlehub/finetune/task/classifier_task.py
+++ b/paddlehub/finetune/task/classifier_task.py
+#coding:utf-8
+#  Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import time
+from collections import OrderedDict
+import numpy as np
+import paddle.fluid as fluid
+from paddlehub.finetune.evaluate import calculate_f1_np, matthews_corrcoef
+from .basic_task import BasicTask
+class ClassifierTask(BasicTask):
+    def __init__(self,
+                 feature,
+                 num_classes,
+                 feed_list,
+                 data_reader,
+                 startup_program=None,
+                 config=None,
+                 hidden_units=None,
+                 metrics_choices="default"):
+        if metrics_choices == "default":
+            metrics_choices = ["acc"]
+        main_program = feature.block.program
+        super(ClassifierTask, self).__init__(
+            data_reader=data_reader,
+            main_program=main_program,
+            feed_list=feed_list,
+            startup_program=startup_program,
+            config=config,
+            metrics_choices=metrics_choices)
+        self.feature = feature
+        self.num_classes = num_classes
+        self.hidden_units = hidden_units
+    def _build_net(self):
+        cls_feats = self.feature
+        if self.hidden_units is not None:
+            for n_hidden in self.hidden_units:
+                cls_feats = fluid.layers.fc(
+                    input=cls_feats, size=n_hidden, act="relu")
+        logits = fluid.layers.fc(
+            input=cls_feats,
+            size=self.num_classes,
+            param_attr=fluid.ParamAttr(
+                name="cls_out_w",
+                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
+            bias_attr=fluid.ParamAttr(
+                name="cls_out_b", initializer=fluid.initializer.Constant(0.)),
+            act="softmax")
+        self.ret_infers = fluid.layers.reshape(
+            x=fluid.layers.argmax(logits, axis=1), shape=[-1, 1])
+        return [logits]
+    def _add_label(self):
+        return [fluid.layers.data(name="label", dtype="int64", shape=[1])]
+    def _add_loss(self):
+        ce_loss = fluid.layers.cross_entropy(
+            input=self.outputs[0], label=self.labels[0])
+        return fluid.layers.mean(x=ce_loss)
+    def _add_metrics(self):
+        acc = fluid.layers.accuracy(input=self.outputs[0], label=self.labels[0])
+        return [acc]
+    @property
+    def fetch_list(self):
+        if self.is_train_phase or self.is_test_phase:
+            return [self.labels[0].name, self.ret_infers.name
+                    ] + [metric.name
+                         for metric in self.metrics] + [self.loss.name]
+        return [output.name for output in self.outputs]
+    def _calculate_metrics(self, run_states):
+        loss_sum = acc_sum = run_examples = 0
+        run_step = run_time_used = 0
+        all_labels = np.array([])
+        all_infers = np.array([])
+        for run_state in run_states:
+            run_examples += run_state.run_examples
+            run_step += run_state.run_step
+            loss_sum += np.mean(
+                run_state.run_results[-1]) * run_state.run_examples
+            acc_sum += np.mean(
+                run_state.run_results[2]) * run_state.run_examples
+            np_labels = run_state.run_results[0]
+            np_infers = run_state.run_results[1]
+            all_labels = np.hstack((all_labels, np_labels.reshape([-1])))
+            all_infers = np.hstack((all_infers, np_infers.reshape([-1])))
+        run_time_used = time.time() - run_states[0].run_time_begin
+        avg_loss = loss_sum / run_examples
+        run_speed = run_step / run_time_used
+        # The first key will be used as main metrics to update the best model
+        scores = OrderedDict()
+        for metric in self.metrics_choices:
+            if metric == "acc":
+                avg_acc = acc_sum / run_examples
+                scores["acc"] = avg_acc
+            elif metric == "f1":
+                f1 = calculate_f1_np(all_infers, all_labels)
+                scores["f1"] = f1
+            elif metric == "matthews":
+                matthews = matthews_corrcoef(all_infers, all_labels)
+                scores["matthews"] = matthews
+            else:
+                raise ValueError("Not Support Metric: \"%s\"" % metric)
+        return scores, avg_loss, run_speed
+ImageClassifierTask = ClassifierTask
+class TextClassifierTask(ClassifierTask):
+    def __init__(self,
+                 feature,
+                 num_classes,
+                 feed_list,
+                 data_reader,
+                 startup_program=None,
+                 config=None,
+                 hidden_units=None,
+                 metrics_choices="default"):
+        if metrics_choices == "default":
+            metrics_choices = ["acc"]
+        super(TextClassifierTask, self).__init__(
+            data_reader=data_reader,
+            feature=feature,
+            num_classes=num_classes,
+            feed_list=feed_list,
+            startup_program=startup_program,
+            config=config,
+            hidden_units=hidden_units,
+            metrics_choices=metrics_choices)
+    def _build_net(self):
+        cls_feats = fluid.layers.dropout(
+            x=self.feature,
+            dropout_prob=0.1,
+            dropout_implementation="upscale_in_train")
+        if self.hidden_units is not None:
+            for n_hidden in self.hidden_units:
+                cls_feats = fluid.layers.fc(
+                    input=cls_feats, size=n_hidden, act="relu")
+        logits = fluid.layers.fc(
+            input=cls_feats,
+            size=self.num_classes,
+            param_attr=fluid.ParamAttr(
+                name="cls_out_w",
+                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
+            bias_attr=fluid.ParamAttr(
+                name="cls_out_b", initializer=fluid.initializer.Constant(0.)),
+            act="softmax")
+        self.ret_infers = fluid.layers.reshape(
+            x=fluid.layers.argmax(logits, axis=1), shape=[-1, 1])
+        return [logits]
+class MultiLabelClassifierTask(ClassifierTask):
+    def __init__(self,
+                 feature,
+                 num_classes,
+                 feed_list,
+                 data_reader,
+                 startup_program=None,
+                 config=None,
+                 hidden_units=None,
+                 metrics_choices="default"):
+        if metrics_choices == "default":
+            metrics_choices = ["auc"]
+        main_program = feature.block.program
+        super(MultiLabelClassifierTask, self).__init__(
+            data_reader=data_reader,
+            feature=feature,
+            num_classes=num_classes,
+            feed_list=feed_list,
+            startup_program=startup_program,
+            config=config,
+            hidden_units=hidden_units,
+            metrics_choices=metrics_choices)
+        self.class_name = list(data_reader.label_map.keys())
+    def _build_net(self):
+        cls_feats = fluid.layers.dropout(
+            x=self.feature,
+            dropout_prob=0.1,
+            dropout_implementation="upscale_in_train")
+        if self.hidden_units is not None:
+            for n_hidden in self.hidden_units:
+                cls_feats = fluid.layers.fc(
+                    input=cls_feats, size=n_hidden, act="relu")
+        probs = []
+        for i in range(self.num_classes):
+            probs.append(
+                fluid.layers.fc(
+                    input=cls_feats,
+                    size=2,
+                    param_attr=fluid.ParamAttr(
+                        name="cls_out_w_%d" % i,
+                        initializer=fluid.initializer.TruncatedNormal(
+                            scale=0.02)),
+                    bias_attr=fluid.ParamAttr(
+                        name="cls_out_b_%d" % i,
+                        initializer=fluid.initializer.Constant(0.)),
+                    act="softmax"))
+        return probs
+    def _add_label(self):
+        label = fluid.layers.data(
+            name="label", shape=[self.num_classes], dtype='int64')
+        return [label]
+    def _add_loss(self):
+        label_split = fluid.layers.split(
+            self.labels[0], self.num_classes, dim=-1)
+        total_loss = fluid.layers.fill_constant(
+            shape=[1], value=0.0, dtype='float64')
+        for index, probs in enumerate(self.outputs):
+            ce_loss = fluid.layers.cross_entropy(
+                input=probs, label=label_split[index])
+            total_loss += fluid.layers.reduce_sum(ce_loss)
+        loss = fluid.layers.mean(x=total_loss)
+        return loss
+    def _add_metrics(self):
+        label_split = fluid.layers.split(
+            self.labels[0], self.num_classes, dim=-1)
+        # metrics change to auc of every class
+        eval_list = []
+        for index, probs in enumerate(self.outputs):
+            current_auc, _, _ = fluid.layers.auc(
+                input=probs, label=label_split[index])
+            eval_list.append(current_auc)
+        return eval_list
+    def _calculate_metrics(self, run_states):
+        loss_sum = acc_sum = run_examples = 0
+        run_step = run_time_used = 0
+        for run_state in run_states:
+            run_examples += run_state.run_examples
+            run_step += run_state.run_step
+            loss_sum += np.mean(
+                run_state.run_results[-1]) * run_state.run_examples
+        auc_list = run_states[-1].run_results[:-1]
+        run_time_used = time.time() - run_states[0].run_time_begin
+        avg_loss = loss_sum / (run_examples * self.num_classes)
+        run_speed = run_step / run_time_used
+        # The first key will be used as main metrics to update the best model
+        scores = OrderedDict()
+        for metric in self.metrics_choices:
+            if metric == "auc":
+                scores["auc"] = np.mean(auc_list)
+                # NOTE: for MultiLabelClassifierTask, the metrics will be used to evaluate all the label
+                #      and their mean value will also be reported.
+                for index, auc in enumerate(auc_list):
+                    scores["auc_" + self.class_name[index]] = auc_list[index][0]
+            else:
+                raise ValueError("Not Support Metric: \"%s\"" % metric)
+        return scores, avg_loss, run_speed
+    @property
+    def fetch_list(self):
+        if self.is_train_phase or self.is_test_phase:
+            return [metric.name for metric in self.metrics] + [self.loss.name]
+        return self.outputs
--- a/paddlehub/finetune/task/reading_comprehension_task.py
+++ b/paddlehub/finetune/task/reading_comprehension_task.py
+#coding:utf-8
+#  Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import time
+from collections import OrderedDict
+import numpy as np
+import paddle.fluid as fluid
+from .basic_task import BasicTask
+class ReadingComprehensionTask(BasicTask):
+    def __init__(self,
+                 feature,
+                 feed_list,
+                 data_reader,
+                 startup_program=None,
+                 config=None,
+                 metrics_choices=None):
+        main_program = feature.block.program
+        super(ReadingComprehensionTask, self).__init__(
+            data_reader=data_reader,
+            main_program=main_program,
+            feed_list=feed_list,
+            startup_program=startup_program,
+            config=config,
+            metrics_choices=metrics_choices)
+        self.feature = feature
+    def _build_net(self):
+        if self.is_predict_phase:
+            self.unique_id = fluid.layers.data(
+                name="start_positions",
+                shape=[-1, 1],
+                lod_level=0,
+                dtype="int64")
+        logits = fluid.layers.fc(
+            input=self.feature,
+            size=2,
+            num_flatten_dims=2,
+            param_attr=fluid.ParamAttr(
+                name="cls_seq_label_out_w",
+                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
+            bias_attr=fluid.ParamAttr(
+                name="cls_seq_label_out_b",
+                initializer=fluid.initializer.Constant(0.)))
+        logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
+        start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)
+        batch_ones = fluid.layers.fill_constant_batch_size_like(
+            input=start_logits, dtype='int64', shape=[1], value=1)
+        num_seqs = fluid.layers.reduce_sum(input=batch_ones)
+        return [start_logits, end_logits, num_seqs]
+    def _add_label(self):
+        start_positions = fluid.layers.data(
+            name="start_positions", shape=[-1, 1], lod_level=0, dtype="int64")
+        end_positions = fluid.layers.data(
+            name="end_positions", shape=[-1, 1], lod_level=0, dtype="int64")
+        return [start_positions, end_positions]
+    def _add_loss(self):
+        start_positions = self.labels[0]
+        end_positions = self.labels[1]
+        start_logits = self.outputs[0]
+        end_logits = self.outputs[1]
+        start_loss = fluid.layers.softmax_with_cross_entropy(
+            logits=start_logits, label=start_positions)
+        start_loss = fluid.layers.mean(x=start_loss)
+        end_loss = fluid.layers.softmax_with_cross_entropy(
+            logits=end_logits, label=end_positions)
+        end_loss = fluid.layers.mean(x=end_loss)
+        total_loss = (start_loss + end_loss) / 2.0
+        return total_loss
+    def _add_metrics(self):
+        return []
+    @property
+    def feed_list(self):
+        feed_list = [varname for varname in self._base_feed_list]
+        if self.is_train_phase:
+            feed_list += [self.labels[0].name, self.labels[1].name]
+        elif self.is_predict_phase:
+            feed_list += [self.unique_id.name]
+        return feed_list
+    @property
+    def fetch_list(self):
+        if self.is_train_phase:
+            return [metric.name for metric in self.metrics
+                    ] + [self.loss.name, self.outputs[-1].name]
+        elif self.is_predict_phase:
+            return [self.unique_id.name
+                    ] + [output.name for output in self.outputs]
+    def _calculate_metrics(self, run_states):
+        total_cost, total_num_seqs = [], []
+        run_step = run_time_used = run_examples = 0
+        for run_state in run_states:
+            np_loss = run_state.run_results[0]
+            np_num_seqs = run_state.run_results[1]
+            total_cost.extend(np_loss * np_num_seqs)
+            total_num_seqs.extend(np_num_seqs)
+            run_examples += run_state.run_examples
+            run_step += run_state.run_step
+        run_time_used = time.time() - run_states[0].run_time_begin
+        run_speed = run_step / run_time_used
+        avg_loss = np.sum(total_cost) / np.sum(total_num_seqs)
+        scores = OrderedDict()
+        # If none of metrics has been implemented, loss will be used to evaluate.
+        return scores, avg_loss, run_speed
--- a/paddlehub/finetune/task/regression_task.py
+++ b/paddlehub/finetune/task/regression_task.py
+#coding:utf-8
+#  Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import time
+from collections import OrderedDict
+import numpy as np
+import paddle.fluid as fluid
+from scipy.stats import spearmanr
+from .basic_task import BasicTask
+class RegressionTask(BasicTask):
+    def __init__(self,
+                 feature,
+                 feed_list,
+                 data_reader,
+                 startup_program=None,
+                 config=None,
+                 hidden_units=None,
+                 metrics_choices="default"):
+        if metrics_choices == "default":
+            metrics_choices = ["spearman"]
+        main_program = feature.block.program
+        super(RegressionTask, self).__init__(
+            data_reader=data_reader,
+            main_program=main_program,
+            feed_list=feed_list,
+            startup_program=startup_program,
+            config=config,
+            metrics_choices=metrics_choices)
+        self.feature = feature
+        self.hidden_units = hidden_units
+    def _build_net(self):
+        cls_feats = fluid.layers.dropout(
+            x=self.feature,
+            dropout_prob=0.1,
+            dropout_implementation="upscale_in_train")
+        if self.hidden_units is not None:
+            for n_hidden in self.hidden_units:
+                cls_feats = fluid.layers.fc(
+                    input=cls_feats, size=n_hidden, act="relu")
+        logits = fluid.layers.fc(
+            input=cls_feats,
+            size=1,
+            param_attr=fluid.ParamAttr(
+                name="cls_out_w",
+                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
+            bias_attr=fluid.ParamAttr(
+                name="cls_out_b", initializer=fluid.initializer.Constant(0.)),
+            act=None)
+        return [logits]
+    def _add_label(self):
+        return [fluid.layers.data(name="label", dtype="float32", shape=[1])]
+    def _add_loss(self):
+        cost = fluid.layers.square_error_cost(
+            input=self.outputs[0], label=self.labels[0])
+        return fluid.layers.mean(x=cost)
+    def _add_metrics(self):
+        return []
+    @property
+    def fetch_list(self):
+        if self.is_train_phase or self.is_test_phase:
+            return [self.labels[0].name, self.outputs[0].name
+                    ] + [metric.name
+                         for metric in self.metrics] + [self.loss.name]
+        return [output.name for output in self.outputs]
+    def _calculate_metrics(self, run_states):
+        loss_sum = run_examples = 0
+        run_step = run_time_used = 0
+        all_labels = np.array([])
+        all_infers = np.array([])
+        for run_state in run_states:
+            run_examples += run_state.run_examples
+            run_step += run_state.run_step
+            loss_sum += np.mean(
+                run_state.run_results[-1]) * run_state.run_examples
+            np_labels = run_state.run_results[0]
+            np_infers = run_state.run_results[1]
+            all_labels = np.hstack((all_labels, np_labels.reshape([-1])))
+            all_infers = np.hstack((all_infers, np_infers.reshape([-1])))
+        run_time_used = time.time() - run_states[0].run_time_begin
+        avg_loss = loss_sum / run_examples
+        run_speed = run_step / run_time_used
+        # The first key will be used as main metrics to update the best model
+        scores = OrderedDict()
+        for metric in self.metrics_choices:
+            if metric == "spearman":
+                spearman_correlations = spearmanr(all_labels, all_infers)[0]
+                scores["spearman"] = spearman_correlations
+            else:
+                raise ValueError("Not Support Metric: \"%s\"" % metric)
+        return scores, avg_loss, run_speed
--- a/paddlehub/finetune/task/sequence_task.py
+++ b/paddlehub/finetune/task/sequence_task.py
+#coding:utf-8
+#  Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import time
+from collections import OrderedDict
+import numpy as np
+import paddle.fluid as fluid
+from paddlehub.finetune.evaluate import chunk_eval, calculate_f1
+from .basic_task import BasicTask
+class SequenceLabelTask(BasicTask):
+    def __init__(self,
+                 feature,
+                 max_seq_len,
+                 num_classes,
+                 feed_list,
+                 data_reader,
+                 startup_program=None,
+                 config=None,
+                 metrics_choices="default"):
+        if metrics_choices == "default":
+            metrics_choices = ["f1", "precision", "recall"]
+        main_program = feature.block.program
+        super(SequenceLabelTask, self).__init__(
+            data_reader=data_reader,
+            main_program=main_program,
+            feed_list=feed_list,
+            startup_program=startup_program,
+            config=config,
+            metrics_choices=metrics_choices)
+        self.feature = feature
+        self.max_seq_len = max_seq_len
+        self.num_classes = num_classes
+    def _build_net(self):
+        self.logits = fluid.layers.fc(
+            input=self.feature,
+            size=self.num_classes,
+            num_flatten_dims=2,
+            param_attr=fluid.ParamAttr(
+                name="cls_seq_label_out_w",
+                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
+            bias_attr=fluid.ParamAttr(
+                name="cls_seq_label_out_b",
+                initializer=fluid.initializer.Constant(0.)))
+        self.ret_infers = fluid.layers.reshape(
+            x=fluid.layers.argmax(self.logits, axis=2), shape=[-1, 1])
+        ret_infers = fluid.layers.assign(self.ret_infers)
+        self.seq_len = fluid.layers.data(
+            name="seq_len", shape=[1], dtype='int64')
+        seq_len = fluid.layers.assign(self.seq_len)
+        logits = self.logits
+        logits = fluid.layers.flatten(logits, axis=2)
+        logits = fluid.layers.softmax(logits)
+        self.num_labels = logits.shape[1]
+        return [logits]
+    def _add_label(self):
+        label = fluid.layers.data(
+            name="label", shape=[self.max_seq_len, 1], dtype='int64')
+        return [label]
+    def _add_loss(self):
+        labels = fluid.layers.flatten(self.labels[0], axis=2)
+        ce_loss = fluid.layers.cross_entropy(
+            input=self.outputs[0], label=labels)
+        loss = fluid.layers.mean(x=ce_loss)
+        return loss
+    def _add_metrics(self):
+        self.ret_labels = fluid.layers.reshape(x=self.labels[0], shape=[-1, 1])
+        return [self.ret_labels, self.ret_infers, self.seq_len]
+    def _calculate_metrics(self, run_states):
+        total_infer = total_label = total_correct = loss_sum = 0
+        run_step = run_time_used = run_examples = 0
+        for run_state in run_states:
+            loss_sum += np.mean(run_state.run_results[-1])
+            np_labels = run_state.run_results[0]
+            np_infers = run_state.run_results[1]
+            np_lens = run_state.run_results[2]
+            label_num, infer_num, correct_num = chunk_eval(
+                np_labels, np_infers, np_lens, self.num_labels,
+                self.device_count)
+            total_infer += infer_num
+            total_label += label_num
+            total_correct += correct_num
+            run_examples += run_state.run_examples
+            run_step += run_state.run_step
+        run_time_used = time.time() - run_states[0].run_time_begin
+        run_speed = run_step / run_time_used
+        avg_loss = loss_sum / run_examples
+        precision, recall, f1 = calculate_f1(total_label, total_infer,
+                                             total_correct)
+        # The first key will be used as main metrics to update the best model
+        scores = OrderedDict()
+        for metric in self.metrics_choices:
+            if metric == "precision":
+                scores["precision"] = precision
+            elif metric == "recall":
+                scores["recall"] = recall
+            elif metric == "f1":
+                scores["f1"] = f1
+            else:
+                raise ValueError("Not Support Metric: \"%s\"" % metric)
+        return scores, avg_loss, run_speed
+    @property
+    def feed_list(self):
+        feed_list = [varname for varname in self._base_feed_list]
+        if self.is_train_phase or self.is_test_phase:
+            feed_list += [self.labels[0].name, self.seq_len.name]
+        else:
+            feed_list += [self.seq_len.name]
+        return feed_list
+    @property
+    def fetch_list(self):
+        if self.is_train_phase or self.is_test_phase:
+            return [metric.name for metric in self.metrics] + [self.loss.name]
+        elif self.is_predict_phase:
+            return [self.ret_infers.name] + [self.seq_len.name]
+        return [output.name for output in self.outputs]
--- a/paddlehub/module/manager.py
+++ b/paddlehub/module/manager.py
@@ -26,6 +26,7 @@ from paddlehub.common.downloader import default_downloader
 from paddlehub.common.dir import MODULE_HOME
 from paddlehub.module import module_desc_pb2
 import paddlehub as hub
+from paddlehub.common.logger import logger
 class LocalModuleManager(object):
@@ -35,23 +36,26 @@ class LocalModuleManager(object):
        if not os.path.exists(self.local_modules_dir):
            utils.mkdir(self.local_modules_dir)
        elif os.path.isfile(self.local_modules_dir):
-            #TODO(wuzewu): give wanring
+            raise ValueError("Module home should be a folder, not a file")
-            pass
    def check_module_valid(self, module_path):
-        #TODO(wuzewu): code
-        info = {}
        try:
            desc_pb_path = os.path.join(module_path, 'module_desc.pb')
            if os.path.exists(desc_pb_path) and os.path.isfile(desc_pb_path):
+                info = {}
                desc = module_desc_pb2.ModuleDesc()
                with open(desc_pb_path, "rb") as fp:
                    desc.ParseFromString(fp.read())
                info['version'] = desc.attr.map.data["module_info"].map.data[
                    "version"].s
+                return True, info
+            else:
+                logger.warning(
+                    "%s does not exist, the module will be reinstalled" %
+                    desc_pb_path)
        except:
-            return False, None
+            pass
-        return True, info
+        return False, None
    def all_modules(self, update=False):
        if not update and self.modules_dict:
@@ -60,7 +64,6 @@ class LocalModuleManager(object):
        for sub_dir_name in os.listdir(self.local_modules_dir):
            sub_dir_path = os.path.join(self.local_modules_dir, sub_dir_name)
            if os.path.isdir(sub_dir_path):
-                #TODO(wuzewu): get module name
                valid, info = self.check_module_valid(sub_dir_path)
                if valid:
                    module_name = sub_dir_name
@@ -92,7 +95,6 @@ class LocalModuleManager(object):
        url = search_result.get('url', None)
        md5_value = search_result.get('md5', None)
        installed_module_version = search_result.get('version', None)
-        #TODO(wuzewu): add compatibility check
        if not url or (module_version is not None and installed_module_version
                       != module_version) or (name != module_name):
            tips = "Can't find module %s" % module_name

--- a/paddlehub/module/module.py
+++ b/paddlehub/module/module.py
@@ -117,7 +117,6 @@ class Module(object):
        self.cache_fetch_dict = None
        self.cache_program = None
-        # TODO(wuzewu): print more module loading info log
        if name:
            self._init_with_name(name=name, version=version)
        elif module_dir:
@@ -458,7 +457,6 @@ class Module(object):
        fetch_dict = self.cache_fetch_dict
        program = self.cache_program
-        #TODO(wuzewu): more option
        fetch_list = list(set([value for key, value in fetch_dict.items()]))
        with fluid.program_guard(program):
            result = []
@@ -554,7 +552,6 @@ class Module(object):
        self._recover_variable_info(program)
        paddle_helper.set_op_attr(program, is_test=for_test)
-        #TODO(wuzewu): return feed_list and fetch_list directly
        feed_dict = {}
        fetch_dict = {}
        for index, var in enumerate(signature.inputs):
@@ -569,7 +566,6 @@ class Module(object):
            if key:
                fetch_dict[key] = program.global_block().var(var.name)
-        # TODO(ZeyuChen) encapsulate into a funtion
        # update BERT/ERNIE's input tensor's sequence length to max_seq_len
        if self.name.startswith("bert") or self.name.startswith("ernie"):
            MAX_SEQ_LENGTH = 512

--- a/paddlehub/reader/__init__.py
+++ b/paddlehub/reader/__init__.py
@@ -17,4 +17,6 @@ from .nlp_reader import ClassifyReader
 from .nlp_reader import SequenceLabelReader
 from .nlp_reader import LACClassifyReader
 from .nlp_reader import MultiLabelClassifyReader
+from .nlp_reader import ReadingComprehensionReader
+from .nlp_reader import RegressionReader
 from .cv_reader import ImageClassificationReader
--- a/paddlehub/reader/nlp_reader.py
+++ b/paddlehub/reader/nlp_reader.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import csv
+import collections
 import json
 import numpy as np
 import platform
@@ -31,7 +31,7 @@ from paddlehub.reader import tokenization
 from paddlehub.common.logger import logger
 from paddlehub.common.utils import sys_stdout_encoding
 from paddlehub.dataset.dataset import InputExample
-from .batching import pad_batch_data
+from .batching import pad_batch_data, prepare_batch_data
 import paddlehub as hub
@@ -43,7 +43,8 @@ class BaseReader(object):
                 max_seq_len=512,
                 do_lower_case=True,
                 random_seed=None,
-                 use_task_id=False):
+                 use_task_id=False,
+                 in_tokens=False):
        self.max_seq_len = max_seq_len
        self.tokenizer = tokenization.FullTokenizer(
            vocab_file=vocab_path, do_lower_case=do_lower_case)
@@ -52,7 +53,7 @@ class BaseReader(object):
        self.pad_id = self.vocab["[PAD]"]
        self.cls_id = self.vocab["[CLS]"]
        self.sep_id = self.vocab["[SEP]"]
-        self.in_tokens = False
+        self.in_tokens = in_tokens
        self.use_task_id = use_task_id
        if self.use_task_id:
@@ -202,6 +203,9 @@ class BaseReader(object):
        return record
+    def _pad_batch_records(self, batch_records, phase):
+        raise NotImplementedError
    def _prepare_batch_data(self, examples, batch_size, phase=None):
        """generate batch records"""
        batch_records, max_len = [], 0
@@ -494,7 +498,7 @@ class SequenceLabelReader(BaseReader):
 class LACClassifyReader(object):
-    def __init__(self, dataset, vocab_path):
+    def __init__(self, dataset, vocab_path, in_tokens=False):
        self.dataset = dataset
        self.lac = hub.Module(name="lac")
        self.tokenizer = tokenization.FullTokenizer(
@@ -505,6 +509,7 @@ class LACClassifyReader(object):
                sign_name="lexical_analysis").keys())[0]
        self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
+        self.in_tokens = in_tokens
    def get_num_examples(self, phase):
        """Get number of examples for train, dev or test."""
@@ -719,5 +724,536 @@ class MultiLabelClassifyReader(BaseReader):
        return record
+class SquadInputFeatures(object):
+    """A single set of features of squad_data."""
+    def __init__(self,
+                 unique_id,
+                 example_index,
+                 doc_span_index,
+                 tokens,
+                 token_to_orig_map,
+                 token_is_max_context,
+                 input_ids,
+                 input_mask,
+                 segment_ids,
+                 start_position=None,
+                 end_position=None,
+                 is_impossible=None):
+        self.unique_id = unique_id
+        self.example_index = example_index
+        self.doc_span_index = doc_span_index
+        self.tokens = tokens
+        self.token_to_orig_map = token_to_orig_map
+        self.token_is_max_context = token_is_max_context
+        self.input_ids = input_ids
+        self.input_mask = input_mask
+        self.segment_ids = segment_ids
+        self.start_position = start_position
+        self.end_position = end_position
+        self.is_impossible = is_impossible
+class RegressionReader(BaseReader):
+    def __init__(self,
+                 dataset,
+                 vocab_path,
+                 label_map_config=None,
+                 max_seq_len=128,
+                 do_lower_case=True,
+                 random_seed=None):
+        self.max_seq_len = max_seq_len
+        self.tokenizer = tokenization.FullTokenizer(
+            vocab_file=vocab_path, do_lower_case=do_lower_case)
+        self.vocab = self.tokenizer.vocab
+        self.dataset = dataset
+        self.pad_id = self.vocab["[PAD]"]
+        self.cls_id = self.vocab["[CLS]"]
+        self.sep_id = self.vocab["[SEP]"]
+        self.in_tokens = False
+        np.random.seed(random_seed)
+        # generate label map
+        self.label_map = {}  # Unlike BaseReader, it's not filled
+        self.current_example = 0
+        self.current_epoch = 0
+        self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
+    def _pad_batch_records(self, batch_records, phase=None):
+        batch_token_ids = [record.token_ids for record in batch_records]
+        batch_text_type_ids = [record.text_type_ids for record in batch_records]
+        batch_position_ids = [record.position_ids for record in batch_records]
+        padded_token_ids, input_mask = pad_batch_data(
+            batch_token_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id,
+            return_input_mask=True)
+        padded_text_type_ids = pad_batch_data(
+            batch_text_type_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id)
+        padded_position_ids = pad_batch_data(
+            batch_position_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id)
+        if phase != "predict":
+            batch_labels = [record.label_id for record in batch_records]
+            # the only diff with ClassifyReader: astype("float32")
+            batch_labels = np.array(batch_labels).astype("float32").reshape(
+                [-1, 1])
+            return_list = [
+                padded_token_ids, padded_position_ids, padded_text_type_ids,
+                input_mask, batch_labels
+            ]
+        else:
+            return_list = [
+                padded_token_ids, padded_position_ids, padded_text_type_ids,
+                input_mask
+            ]
+        return return_list
+    def data_generator(self,
+                       batch_size=1,
+                       phase='train',
+                       shuffle=True,
+                       data=None):
+        if phase == 'train':
+            shuffle = True
+            examples = self.get_train_examples()
+            self.num_examples['train'] = len(examples)
+        elif phase == 'val' or phase == 'dev':
+            shuffle = False
+            examples = self.get_dev_examples()
+            self.num_examples['dev'] = len(examples)
+        elif phase == 'test':
+            shuffle = False
+            examples = self.get_test_examples()
+            self.num_examples['test'] = len(examples)
+        elif phase == 'predict':
+            shuffle = False
+            examples = []
+            seq_id = 0
+            for item in data:
+                # set label in order to run the program
+                label = -1  # different from BaseReader
+                if len(item) == 1:
+                    item_i = InputExample(
+                        guid=seq_id, text_a=item[0], label=label)
+                elif len(item) == 2:
+                    item_i = InputExample(
+                        guid=seq_id,
+                        text_a=item[0],
+                        text_b=item[1],
+                        label=label)
+                else:
+                    raise ValueError(
+                        "The length of input_text is out of handling, which must be 1 or 2!"
+                    )
+                examples.append(item_i)
+                seq_id += 1
+        else:
+            raise ValueError(
+                "Unknown phase, which should be in ['train', 'dev', 'test', 'predict']."
+            )
+        def wrapper():
+            if shuffle:
+                np.random.shuffle(examples)
+            for batch_data in self._prepare_batch_data(
+                    examples, batch_size, phase=phase):
+                yield [batch_data]
+        return wrapper
+class ReadingComprehensionReader(object):
+    def __init__(self,
+                 dataset,
+                 vocab_path,
+                 do_lower_case=True,
+                 max_seq_length=512,
+                 doc_stride=128,
+                 max_query_length=64,
+                 random_seed=None):
+        self.dataset = dataset
+        self._tokenizer = tokenization.FullTokenizer(
+            vocab_file=vocab_path, do_lower_case=do_lower_case)
+        self._max_seq_length = max_seq_length
+        self._doc_stride = doc_stride
+        self._max_query_length = max_query_length
+        self._in_tokens = False
+        np.random.seed(random_seed)
+        self.vocab = self._tokenizer.vocab
+        self.vocab_size = len(self.vocab)
+        self.pad_id = self.vocab["[PAD]"]
+        self.cls_id = self.vocab["[CLS]"]
+        self.sep_id = self.vocab["[SEP]"]
+        self.mask_id = self.vocab["[MASK]"]
+        self.current_train_example = 0
+        self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
+    def get_train_progress(self):
+        """Gets progress for training phase."""
+        return self.current_train_example
+    def get_train_examples(self):
+        """Gets a collection of `SquadExample`s for the train set."""
+        return self.dataset.get_train_examples()
+    def get_dev_examples(self):
+        """Gets a collection of `SquadExample`s for the dev set."""
+        return self.dataset.get_dev_examples()
+    def get_test_examples(self):
+        """Gets a collection of `SquadExample`s for prediction."""
+        return self.dataset.get_test_examples()
+    def get_num_examples(self, phase):
+        if phase not in ['train', 'dev', 'test']:
+            raise ValueError(
+                "Unknown phase, which should be in ['train', 'predict'].")
+        return self.num_examples[phase]
+    def data_generator(self,
+                       batch_size=1,
+                       phase='train',
+                       shuffle=False,
+                       data=None):
+        if phase == 'train':
+            shuffle = True
+            examples = self.get_train_examples()
+            self.num_examples['train'] = len(examples)
+        elif phase == 'dev':
+            shuffle = False
+            examples = self.get_dev_examples()
+            self.num_examples['dev'] = len(examples)
+        elif phase == 'test':
+            shuffle = False
+            examples = self.get_test_examples()
+            self.num_examples['test'] = len(examples)
+        elif phase == 'predict':
+            shuffle = False
+            examples = data
+        else:
+            raise ValueError(
+                "Unknown phase, which should be in ['train', 'dev', 'test', 'predict']."
+            )
+        def batch_reader(features, batch_size, in_tokens):
+            batch, total_token_num, max_len = [], 0, 0
+            for (index, feature) in enumerate(features):
+                if phase == 'train':
+                    self.current_train_example = index + 1
+                seq_len = len(feature.input_ids)
+                labels = [feature.unique_id
+                          ] if feature.start_position is None else [
+                              feature.start_position, feature.end_position
+                          ]
+                example = [
+                    feature.input_ids, feature.segment_ids,
+                    range(seq_len)
+                ] + labels
+                max_len = max(max_len, seq_len)
+                #max_len = max(max_len, len(token_ids))
+                if in_tokens:
+                    to_append = (len(batch) + 1) * max_len <= batch_size
+                else:
+                    to_append = len(batch) < batch_size
+                if to_append:
+                    batch.append(example)
+                    total_token_num += seq_len
+                else:
+                    yield batch, total_token_num
+                    batch, total_token_num, max_len = [example
+                                                       ], seq_len, seq_len
+            if len(batch) > 0:
+                yield batch, total_token_num
+        def wrapper():
+            if shuffle:
+                np.random.shuffle(examples)
+            if phase == "train":
+                features = self.convert_examples_to_features(
+                    examples, is_training=True)
+            else:
+                features = self.convert_examples_to_features(
+                    examples, is_training=False)
+            for batch_data, total_token_num in batch_reader(
+                    features, batch_size, self._in_tokens):
+                batch_data = prepare_batch_data(
+                    batch_data,
+                    total_token_num,
+                    self._max_seq_length,
+                    pad_id=self.pad_id,
+                    cls_id=self.cls_id,
+                    sep_id=self.sep_id,
+                    return_input_mask=True,
+                    return_max_len=False,
+                    return_num_token=False)
+                yield [batch_data]
+        return wrapper
+    def convert_examples_to_features(self, examples, is_training):
+        """Loads a data file into a list of `InputBatch`s."""
+        unique_id = 1000000000
+        for (example_index, example) in enumerate(examples):
+            query_tokens = self._tokenizer.tokenize(example.question_text)
+            if len(query_tokens) > self._max_query_length:
+                query_tokens = query_tokens[0:self._max_query_length]
+            tok_to_orig_index = []
+            orig_to_tok_index = []
+            all_doc_tokens = []
+            for (i, token) in enumerate(example.doc_tokens):
+                orig_to_tok_index.append(len(all_doc_tokens))
+                sub_tokens = self._tokenizer.tokenize(token)
+                for sub_token in sub_tokens:
+                    tok_to_orig_index.append(i)
+                    all_doc_tokens.append(sub_token)
+            tok_start_position = None
+            tok_end_position = None
+            if is_training and example.is_impossible:
+                tok_start_position = -1
+                tok_end_position = -1
+            if is_training and not example.is_impossible:
+                tok_start_position = orig_to_tok_index[example.start_position]
+                if example.end_position < len(example.doc_tokens) - 1:
+                    tok_end_position = orig_to_tok_index[example.end_position +
+                                                         1] - 1
+                else:
+                    tok_end_position = len(all_doc_tokens) - 1
+                (tok_start_position,
+                 tok_end_position) = self.improve_answer_span(
+                     all_doc_tokens, tok_start_position, tok_end_position,
+                     self._tokenizer, example.orig_answer_text)
+            # The -3 accounts for [CLS], [SEP] and [SEP]
+            max_tokens_for_doc = self._max_seq_length - len(query_tokens) - 3
+            # We can have documents that are longer than the maximum sequence length.
+            # To deal with this we do a sliding window approach, where we take chunks
+            # of the up to our max length with a stride of `doc_stride`.
+            _DocSpan = collections.namedtuple("DocSpan", ["start", "length"])
+            doc_spans = []
+            start_offset = 0
+            while start_offset < len(all_doc_tokens):
+                length = len(all_doc_tokens) - start_offset
+                if length > max_tokens_for_doc:
+                    length = max_tokens_for_doc
+                doc_spans.append(_DocSpan(start=start_offset, length=length))
+                if start_offset + length == len(all_doc_tokens):
+                    break
+                start_offset += min(length, self._doc_stride)
+            for (doc_span_index, doc_span) in enumerate(doc_spans):
+                tokens = []
+                token_to_orig_map = {}
+                token_is_max_context = {}
+                segment_ids = []
+                tokens.append("[CLS]")
+                segment_ids.append(0)
+                for token in query_tokens:
+                    tokens.append(token)
+                    segment_ids.append(0)
+                tokens.append("[SEP]")
+                segment_ids.append(0)
+                for i in range(doc_span.length):
+                    split_token_index = doc_span.start + i
+                    token_to_orig_map[len(
+                        tokens)] = tok_to_orig_index[split_token_index]
+                    is_max_context = self.check_is_max_context(
+                        doc_spans, doc_span_index, split_token_index)
+                    token_is_max_context[len(tokens)] = is_max_context
+                    tokens.append(all_doc_tokens[split_token_index])
+                    segment_ids.append(1)
+                tokens.append("[SEP]")
+                segment_ids.append(1)
+                input_ids = self._tokenizer.convert_tokens_to_ids(tokens)
+                # The mask has 1 for real tokens and 0 for padding tokens. Only real
+                # tokens are attended to.
+                input_mask = [1] * len(input_ids)
+                # Zero-pad up to the sequence length.
+                #while len(input_ids) < max_seq_length:
+                #  input_ids.append(0)
+                #  input_mask.append(0)
+                #  segment_ids.append(0)
+                #assert len(input_ids) == max_seq_length
+                #assert len(input_mask) == max_seq_length
+                #assert len(segment_ids) == max_seq_length
+                start_position = None
+                end_position = None
+                if is_training and not example.is_impossible:
+                    # For training, if our document chunk does not contain an annotation
+                    # we throw it out, since there is nothing to predict.
+                    doc_start = doc_span.start
+                    doc_end = doc_span.start + doc_span.length - 1
+                    out_of_span = False
+                    if not (tok_start_position >= doc_start
+                            and tok_end_position <= doc_end):
+                        out_of_span = True
+                    if out_of_span:
+                        start_position = 0
+                        end_position = 0
+                    else:
+                        doc_offset = len(query_tokens) + 2
+                        start_position = tok_start_position - doc_start + doc_offset
+                        end_position = tok_end_position - doc_start + doc_offset
+                if is_training and example.is_impossible:
+                    start_position = 0
+                    end_position = 0
+                if example_index < 3:
+                    logger.debug("*** Example ***")
+                    logger.debug("unique_id: %s" % (unique_id))
+                    logger.debug("example_index: %s" % (example_index))
+                    logger.debug("doc_span_index: %s" % (doc_span_index))
+                    logger.debug("tokens: %s" % " ".join(
+                        [tokenization.printable_text(x) for x in tokens]))
+                    logger.debug("token_to_orig_map: %s" % " ".join([
+                        "%d:%d" % (x, y)
+                        for (x, y) in six.iteritems(token_to_orig_map)
+                    ]))
+                    logger.debug("token_is_max_context: %s" % " ".join([
+                        "%d:%s" % (x, y)
+                        for (x, y) in six.iteritems(token_is_max_context)
+                    ]))
+                    logger.debug(
+                        "input_ids: %s" % " ".join([str(x) for x in input_ids]))
+                    logger.debug("input_mask: %s" % " ".join(
+                        [str(x) for x in input_mask]))
+                    logger.debug("segment_ids: %s" % " ".join(
+                        [str(x) for x in segment_ids]))
+                    if is_training and example.is_impossible:
+                        logger.debug("impossible example")
+                    if is_training and not example.is_impossible:
+                        answer_text = " ".join(
+                            tokens[start_position:(end_position + 1)])
+                        logger.debug("start_position: %d" % (start_position))
+                        logger.debug("end_position: %d" % (end_position))
+                        logger.debug("answer: %s" %
+                                     (tokenization.printable_text(answer_text)))
+                feature = SquadInputFeatures(
+                    unique_id=unique_id,
+                    example_index=example_index,
+                    doc_span_index=doc_span_index,
+                    tokens=tokens,
+                    token_to_orig_map=token_to_orig_map,
+                    token_is_max_context=token_is_max_context,
+                    input_ids=input_ids,
+                    input_mask=input_mask,
+                    segment_ids=segment_ids,
+                    start_position=start_position,
+                    end_position=end_position,
+                    is_impossible=example.is_impossible)
+                unique_id += 1
+                yield feature
+    def improve_answer_span(self, doc_tokens, input_start, input_end, tokenizer,
+                            orig_answer_text):
+        """Returns tokenized answer spans that better match the annotated answer."""
+        # The SQuAD annotations are character based. We first project them to
+        # whitespace-tokenized words. But then after WordPiece tokenization, we can
+        # often find a "better match". For example:
+        #
+        #   Question: What year was John Smith born?
+        #   Context: The leader was John Smith (1895-1943).
+        #   Answer: 1895
+        #
+        # The original whitespace-tokenized answer will be "(1895-1943).". However
+        # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match
+        # the exact answer, 1895.
+        #
+        # However, this is not always possible. Consider the following:
+        #
+        #   Question: What country is the top exporter of electornics?
+        #   Context: The Japanese electronics industry is the lagest in the world.
+        #   Answer: Japan
+        #
+        # In this case, the annotator chose "Japan" as a character sub-span of
+        # the word "Japanese". Since our WordPiece tokenizer does not split
+        # "Japanese", we just use "Japanese" as the annotation. This is fairly rare
+        # in SQuAD, but does happen.
+        tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text))
+        for new_start in range(input_start, input_end + 1):
+            for new_end in range(input_end, new_start - 1, -1):
+                text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
+                if text_span == tok_answer_text:
+                    return (new_start, new_end)
+        return (input_start, input_end)
+    def check_is_max_context(self, doc_spans, cur_span_index, position):
+        """Check if this is the 'max context' doc span for the token."""
+        # Because of the sliding window approach taken to scoring documents, a single
+        # token can appear in multiple documents. E.g.
+        #  Doc: the man went to the store and bought a gallon of milk
+        #  Span A: the man went to the
+        #  Span B: to the store and bought
+        #  Span C: and bought a gallon of
+        #  ...
+        #
+        # Now the word 'bought' will have two scores from spans B and C. We only
+        # want to consider the score with "maximum context", which we define as
+        # the *minimum* of its left and right context (the *sum* of left and
+        # right context will always be the same, of course).
+        #
+        # In the example the maximum context for 'bought' would be span C since
+        # it has 1 left context and 3 right context, while span B has 4 left context
+        # and 0 right context.
+        best_score = None
+        best_span_index = None
+        for (span_index, doc_span) in enumerate(doc_spans):
+            end = doc_span.start + doc_span.length - 1
+            if position < doc_span.start:
+                continue
+            if position > end:
+                continue
+            num_left_context = position - doc_span.start
+            num_right_context = end - position
+            score = min(num_left_context,
+                        num_right_context) + 0.01 * doc_span.length
+            if best_score is None or score > best_score:
+                best_score = score
+                best_span_index = span_index
+        return cur_span_index == best_span_index
 if __name__ == '__main__':
    pass
--- a/paddlehub/reader/tokenization.py
+++ b/paddlehub/reader/tokenization.py
@@ -21,7 +21,6 @@ from __future__ import print_function
 import collections
 import io
 import unicodedata
 import six

--- a/requirements.txt
+++ b/requirements.txt
-visualdl >= 1.3.0
 pre-commit
 protobuf >= 3.1.0
 yapf == 0.26.0
@@ -12,3 +11,5 @@ requests
 pandas
 #[py2]pandas == 0.24.0
 flake8
+tb-paddle
+cma == 2.7.0
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ max_version, mid_version, min_version = python_version()
 REQUIRED_PACKAGES = [
    'six >= 1.10.0', 'protobuf >= 3.1.0', 'pyyaml', 'Pillow', 'requests',
-    "visualdl >= 1.3.0"
+    'visualdl >= 1.3.0', 'cma == 2.7.0'
 ]
 if max_version < 3: