Make Transformer, seq2seq, sequence_tagging adapt to paddle.incubate.hapi

386f2d13 · guosheng · b337063c · 386f2d13 · 386f2d13 · 386f2d13
14 changed file
--- a/examples/seq2seq/seq2seq_attn.py
+++ b/examples/seq2seq/seq2seq_attn.py
@@ -19,7 +19,8 @@ from paddle.fluid.initializer import UniformInitializer
 from paddle.fluid.dygraph import Embedding, Linear, Layer
 from paddle.fluid.layers import BeamSearchDecoder
-from paddle.incubate.hapi.model import Model, Loss
+from paddle.incubate.hapi.model import Model
+from paddle.incubate.hapi.loss import Loss
 from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
 from seq2seq_base import Encoder

--- a/examples/seq2seq/seq2seq_base.py
+++ b/examples/seq2seq/seq2seq_base.py
@@ -19,7 +19,8 @@ from paddle.fluid.initializer import UniformInitializer
 from paddle.fluid.dygraph import Embedding, Linear, Layer
 from paddle.fluid.layers import BeamSearchDecoder
-from paddle.incubate.hapi.model import Model, Loss
+from paddle.incubate.hapi.model import Model
+from paddle.incubate.hapi.loss import Loss
 from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell

--- a/examples/seq2seq/train.py
+++ b/examples/seq2seq/train.py
@@ -26,7 +26,7 @@ from args import parse_args
 from seq2seq_base import BaseModel, CrossEntropyCriterion
 from seq2seq_attn import AttentionModel
 from reader import create_data_loader
-from utility import PPL, TrainCallback
+from utility import PPL, TrainCallback, get_model_cls
 def do_train(args):
@@ -56,7 +56,8 @@ def do_train(args):
    # def dataloader
    train_loader, eval_loader = create_data_loader(args, device)
-    model_maker = AttentionModel if args.attention else BaseModel
+    model_maker = get_model_cls(
+        AttentionModel) if args.attention else get_model_cls(BaseModel)
    model = model_maker(args.src_vocab_size, args.tar_vocab_size,
                        args.hidden_size, args.hidden_size, args.num_layers,
                        args.dropout)

--- a/examples/seq2seq/utility.py
+++ b/examples/seq2seq/utility.py
@@ -18,6 +18,7 @@ import paddle.fluid as fluid
 from paddle.incubate.hapi.metrics import Metric
 from paddle.incubate.hapi.callbacks import ProgBarLogger
+from paddle.incubate.hapi.text import BasicLSTMCell
 class TrainCallback(ProgBarLogger):
@@ -78,3 +79,21 @@ class PPL(Metric):
        self.total_loss += batch_loss * batch_size
        ppl = math.exp(self.total_loss / self.word_count)
        return ppl
+def get_model_cls(model_cls):
+    """
+    Patch for BasicLSTMCell to make `_forget_bias.stop_gradient=True`
+    Remove this workaround when BasicLSTMCell or recurrent_op is fixed.
+    """
+    def __lstm_patch__(self, *args, **kwargs):
+        self._raw_init(*args, **kwargs)
+        layers = self.sublayers(include_sublayers=True)
+        for layer in layers:
+            if isinstance(layer, BasicLSTMCell):
+                layer._forget_bias.stop_gradient = False
+    model_cls._raw_init = model_cls.__init__
+    model_cls.__init__ = __lstm_patch__
+    return model_cls
--- a/examples/sequence_tagging/eval.py
+++ b/examples/sequence_tagging/eval.py
@@ -18,24 +18,14 @@ SequenceTagging eval structure
 from __future__ import division
 from __future__ import print_function
-import io
-import os
-import sys
-import math
-import argparse
-import numpy as np
-work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.append(os.path.join(work_dir, "../"))
-from paddle.incubate.hapi.model import set_device, Input
-from paddle.incubate.hapi.text.sequence_tagging import SeqTagging, ChunkEval, LacLoss
-from paddle.incubate.hapi.text.sequence_tagging import LacDataset, LacDataLoader
-from paddle.incubate.hapi.text.sequence_tagging import check_gpu, check_version
-from paddle.incubate.hapi.text.sequence_tagging import PDConfig
 import paddle.fluid as fluid
 from paddle.fluid.layers.utils import flatten
+from paddle.incubate.hapi.model import Input, set_device
+from sequence_tagging import SeqTagging, LacLoss, ChunkEval
+from reader import LacDataset, LacDataLoader
+from utils.check import check_gpu, check_version
+from utils.configure import PDConfig
 def main(args):
@@ -79,5 +69,6 @@ if __name__ == '__main__':
    use_gpu = True if args.device == "gpu" else False
    check_gpu(use_gpu)
-    check_version()
+    # TODO: add check for 2.0.0-alpha0 if fluid.require_version support
+    # check_version()
    main(args)
--- a/examples/sequence_tagging/predict.py
+++ b/examples/sequence_tagging/predict.py
@@ -18,25 +18,16 @@ SequenceTagging predict structure
 from __future__ import division
 from __future__ import print_function
-import io
-import os
-import sys
 import six
-import math
-import argparse
-import numpy as np
-work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.append(os.path.join(work_dir, "../"))
-from paddle.incubate.hapi.text.sequence_tagging import SeqTagging
-from paddle.incubate.hapi.model import Input, set_device
-from paddle.incubate.hapi.text.sequence_tagging import LacDataset, LacDataLoader
-from paddle.incubate.hapi.text.sequence_tagging import check_gpu, check_version
-from paddle.incubate.hapi.text.sequence_tagging import PDConfig
 import paddle.fluid as fluid
 from paddle.fluid.layers.utils import flatten
+from paddle.incubate.hapi.model import Input, set_device
+from sequence_tagging import SeqTagging, LacLoss, ChunkEval
+from reader import LacDataset, LacDataLoader
+from utils.check import check_gpu, check_version
+from utils.configure import PDConfig
 def main(args):
@@ -45,8 +36,9 @@ def main(args):
    inputs = [
        Input(
-            [None, None], 'int64', name='words'), Input(
+            [None, None], 'int64', name='words'),
-                [None], 'int64', name='length')
+        Input(
+            [None], 'int64', name='length'),
    ]
    dataset = LacDataset(args)
@@ -87,5 +79,6 @@ if __name__ == '__main__':
    use_gpu = True if args.device == "gpu" else False
    check_gpu(use_gpu)
-    check_version()
+    # TODO: add check for 2.0.0-alpha0 if fluid.require_version support
+    # check_version()
    main(args)
--- a/examples/sequence_tagging/reader.py
+++ b/examples/sequence_tagging/reader.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+SequenceTagging dataset
+"""
+from __future__ import division
+from __future__ import print_function
+import io
+import os
+import numpy as np
+import shutil
+from functools import partial
+import paddle
+from paddle.io import BatchSampler, DataLoader, Dataset
+from paddle.fluid.dygraph.parallel import ParallelEnv
+from paddle.incubate.hapi.distributed import DistributedBatchSampler
+class LacDataset(Dataset):
+    """
+    Load lexical analysis dataset
+    """
+    def __init__(self, args):
+        self.word_dict_path = args.word_dict_path
+        self.label_dict_path = args.label_dict_path
+        self.word_rep_dict_path = args.word_rep_dict_path
+        self._load_dict()
+        self.examples = []
+    def _load_dict(self):
+        self.word2id_dict = self.load_kv_dict(
+            self.word_dict_path, reverse=True, value_func=np.int64)
+        self.id2word_dict = self.load_kv_dict(self.word_dict_path)
+        self.label2id_dict = self.load_kv_dict(
+            self.label_dict_path, reverse=True, value_func=np.int64)
+        self.id2label_dict = self.load_kv_dict(self.label_dict_path)
+        if self.word_rep_dict_path is None:
+            self.word_replace_dict = dict()
+        else:
+            self.word_replace_dict = self.load_kv_dict(self.word_rep_dict_path)
+    def load_kv_dict(self,
+                     dict_path,
+                     reverse=False,
+                     delimiter="\t",
+                     key_func=None,
+                     value_func=None):
+        """
+        Load key-value dict from file
+        """
+        result_dict = {}
+        for line in io.open(dict_path, "r", encoding='utf8'):
+            terms = line.strip("\n").split(delimiter)
+            if len(terms) != 2:
+                continue
+            if reverse:
+                value, key = terms
+            else:
+                key, value = terms
+            if key in result_dict:
+                raise KeyError("key duplicated with [%s]" % (key))
+            if key_func:
+                key = key_func(key)
+            if value_func:
+                value = value_func(value)
+            result_dict[key] = value
+        return result_dict
+    @property
+    def vocab_size(self):
+        return max(self.word2id_dict.values()) + 1
+    @property
+    def num_labels(self):
+        return max(self.label2id_dict.values()) + 1
+    def get_num_examples(self, filename):
+        """num of line of file"""
+        return sum(1 for line in io.open(filename, "r", encoding='utf8'))
+    def word_to_ids(self, words):
+        """convert word to word index"""
+        word_ids = []
+        for word in words:
+            word = self.word_replace_dict.get(word, word)
+            if word not in self.word2id_dict:
+                word = "OOV"
+            word_id = self.word2id_dict[word]
+            word_ids.append(word_id)
+        return word_ids
+    def label_to_ids(self, labels):
+        """convert label to label index"""
+        label_ids = []
+        for label in labels:
+            if label not in self.label2id_dict:
+                label = "O"
+            label_id = self.label2id_dict[label]
+            label_ids.append(label_id)
+        return label_ids
+    def file_reader(self, filename, phase="train"):
+        """
+        yield (word_idx, target_idx) one by one from file,
+            or yield (word_idx, ) in `infer` mode
+        """
+        self.phase = phase
+        with io.open(filename, "r", encoding="utf8") as fr:
+            if phase in ["train", "test"]:
+                headline = next(fr)
+                headline = headline.strip().split('\t')
+                assert len(headline) == 2 and headline[
+                    0] == "text_a" and headline[1] == "label"
+                for line in fr:
+                    line_str = line.strip("\n")
+                    if len(line_str) < 1 and len(line_str.split('\t')) < 2:
+                        continue
+                    self.examples.append(line_str)
+            else:
+                for idx, line in enumerate(fr):
+                    words = line.strip("\n").split("\t")[0]
+                    self.examples.append(words)
+    def __getitem__(self, idx):
+        line_str = self.examples[idx]
+        if self.phase in ["train", "test"]:
+            words, labels = line_str.split('\t')
+            word_ids = self.word_to_ids(words.split("\002"))
+            label_ids = self.label_to_ids(labels.split("\002"))
+            assert len(word_ids) == len(label_ids)
+            return word_ids, label_ids
+        else:
+            words = [w for w in line_str]
+            word_ids = self.word_to_ids(words)
+            return word_ids
+    def __len__(self):
+        return len(self.examples)
+def create_lexnet_data_generator(args, insts, phase="train"):
+    def padding_data(max_len, batch_data, if_len=False):
+        padding_batch_data = []
+        padding_lens = []
+        for data in batch_data:
+            data = data[:max_len]
+            if if_len:
+                seq_len = np.int64(len(data))
+                padding_lens.append(seq_len)
+            data += [0 for _ in range(max_len - len(data))]
+            padding_batch_data.append(data)
+        if if_len:
+            return np.array(padding_batch_data), np.array(padding_lens)
+        else:
+            return np.array(padding_batch_data)
+    if phase == "train":
+        batch_words = [inst[0] for inst in insts]
+        batch_labels = [inst[1] for inst in insts]
+        padding_batch_words, padding_lens = padding_data(
+            args.max_seq_len, batch_words, if_len=True)
+        padding_batch_labels = padding_data(args.max_seq_len, batch_labels)
+        return [
+            padding_batch_words, padding_lens, padding_batch_labels,
+            padding_batch_labels
+        ]
+    elif phase == "test":
+        batch_words = [inst[0] for inst in insts]
+        seq_len = [len(inst[0]) for inst in insts]
+        max_seq_len = max(seq_len)
+        batch_labels = [inst[1] for inst in insts]
+        padding_batch_words, padding_lens = padding_data(
+            max_seq_len, batch_words, if_len=True)
+        padding_batch_labels = padding_data(max_seq_len, batch_labels)
+        return [
+            padding_batch_words, padding_lens, padding_batch_labels,
+            padding_batch_labels
+        ]
+    else:
+        batch_words = insts
+        seq_len = [len(inst) for inst in insts]
+        max_seq_len = max(seq_len)
+        padding_batch_words, padding_lens = padding_data(
+            max_seq_len, batch_words, if_len=True)
+        return [padding_batch_words, padding_lens]
+class LacDataLoader(object):
+    def __init__(self,
+                 args,
+                 place,
+                 phase="train",
+                 shuffle=False,
+                 num_workers=0,
+                 drop_last=False):
+        assert phase in [
+            "train", "test", "predict"
+        ], "phase should be in [train, test, predict], but get %s" % phase
+        if phase == "train":
+            file_name = args.train_file
+        elif phase == "test":
+            file_name = args.test_file
+        elif phase == "predict":
+            file_name = args.predict_file
+        self.dataset = LacDataset(args)
+        self.dataset.file_reader(file_name, phase=phase)
+        if phase == "train":
+            self.sampler = DistributedBatchSampler(
+                dataset=self.dataset,
+                batch_size=args.batch_size,
+                shuffle=shuffle,
+                drop_last=drop_last)
+        else:
+            self.sampler = BatchSampler(
+                dataset=self.dataset,
+                batch_size=args.batch_size,
+                shuffle=shuffle,
+                drop_last=drop_last)
+        self.dataloader = DataLoader(
+            dataset=self.dataset,
+            batch_sampler=self.sampler,
+            places=place,
+            collate_fn=partial(
+                create_lexnet_data_generator, args, phase=phase),
+            num_workers=num_workers,
+            return_list=True)
--- a/examples/sequence_tagging/sequence_tagging.py
+++ b/examples/sequence_tagging/sequence_tagging.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+SequenceTagging network structure
+"""
+from __future__ import division
+from __future__ import print_function
+import io
+import os
+import sys
+import math
+import argparse
+import numpy as np
+import paddle.fluid as fluid
+from paddle.incubate.hapi.metrics import Metric
+from paddle.incubate.hapi.model import Model
+from paddle.incubate.hapi.loss import Loss
+from paddle.incubate.hapi.text import SequenceTagging
+from utils.check import check_gpu, check_version
+from utils.configure import PDConfig
+class SeqTagging(Model):
+    def __init__(self, args, vocab_size, num_labels, length=None,
+                 mode="train"):
+        super(SeqTagging, self).__init__()
+        """
+        define the lexical analysis network structure
+        word: stores the input of the model
+        for_infer: a boolean value, indicating if the model to be created is for training or predicting.
+        return:
+            for infer: return the prediction
+            otherwise: return the prediction
+        """
+        self.mode_type = mode
+        self.word_emb_dim = args.word_emb_dim
+        self.vocab_size = vocab_size
+        self.num_labels = num_labels
+        self.grnn_hidden_dim = args.grnn_hidden_dim
+        self.emb_lr = args.emb_learning_rate if 'emb_learning_rate' in dir(
+            args) else 1.0
+        self.crf_lr = args.emb_learning_rate if 'crf_learning_rate' in dir(
+            args) else 1.0
+        self.bigru_num = args.bigru_num
+        self.batch_size = args.batch_size
+        self.init_bound = 0.1
+        self.length = length
+        self.sequence_tagging = SequenceTagging(
+            vocab_size=self.vocab_size,
+            num_labels=self.num_labels,
+            word_emb_dim=self.word_emb_dim,
+            grnn_hidden_dim=self.grnn_hidden_dim,
+            emb_learning_rate=self.emb_lr,
+            crf_learning_rate=self.crf_lr,
+            bigru_num=self.bigru_num,
+            init_bound=self.init_bound)
+    def forward(self, *inputs):
+        """
+        Configure the network
+        """
+        word = inputs[0]
+        lengths = inputs[1]
+        if self.mode_type == "train" or self.mode_type == "test":
+            target = inputs[2]
+            outputs = self.sequence_tagging(word, lengths, target)
+        else:
+            outputs = self.sequence_tagging(word, lengths)
+        return outputs
+class Chunk_eval(fluid.dygraph.Layer):
+    def __init__(self,
+                 num_chunk_types,
+                 chunk_scheme,
+                 excluded_chunk_types=None):
+        super(Chunk_eval, self).__init__()
+        self.num_chunk_types = num_chunk_types
+        self.chunk_scheme = chunk_scheme
+        self.excluded_chunk_types = excluded_chunk_types
+    def forward(self, input, label, seq_length=None):
+        precision = self._helper.create_variable_for_type_inference(
+            dtype="float32")
+        recall = self._helper.create_variable_for_type_inference(
+            dtype="float32")
+        f1_score = self._helper.create_variable_for_type_inference(
+            dtype="float32")
+        num_infer_chunks = self._helper.create_variable_for_type_inference(
+            dtype="int64")
+        num_label_chunks = self._helper.create_variable_for_type_inference(
+            dtype="int64")
+        num_correct_chunks = self._helper.create_variable_for_type_inference(
+            dtype="int64")
+        this_input = {"Inference": input, "Label": label}
+        if seq_length is not None:
+            this_input["SeqLength"] = seq_length
+        self._helper.append_op(
+            type='chunk_eval',
+            inputs=this_input,
+            outputs={
+                "Precision": [precision],
+                "Recall": [recall],
+                "F1-Score": [f1_score],
+                "NumInferChunks": [num_infer_chunks],
+                "NumLabelChunks": [num_label_chunks],
+                "NumCorrectChunks": [num_correct_chunks]
+            },
+            attrs={
+                "num_chunk_types": self.num_chunk_types,
+                "chunk_scheme": self.chunk_scheme,
+                "excluded_chunk_types": self.excluded_chunk_types or []
+            })
+        return (num_infer_chunks, num_label_chunks, num_correct_chunks)
+class LacLoss(Loss):
+    def __init__(self):
+        super(LacLoss, self).__init__()
+        pass
+    def forward(self, outputs, labels):
+        avg_cost = outputs[1]
+        return avg_cost
+class ChunkEval(Metric):
+    def __init__(self, num_labels, name=None, *args, **kwargs):
+        super(ChunkEval, self).__init__(*args, **kwargs)
+        self._init_name(name)
+        self.chunk_eval = Chunk_eval(
+            int(math.ceil((num_labels - 1) / 2.0)), "IOB")
+        self.reset()
+    def add_metric_op(self, *args):
+        crf_decode = args[0]
+        lengths = args[2]
+        label = args[3]
+        (num_infer_chunks, num_label_chunks,
+         num_correct_chunks) = self.chunk_eval(
+             input=crf_decode, label=label, seq_length=lengths)
+        return [num_infer_chunks, num_label_chunks, num_correct_chunks]
+    def update(self, num_infer_chunks, num_label_chunks, num_correct_chunks,
+               *args, **kwargs):
+        self.infer_chunks_total += num_infer_chunks
+        self.label_chunks_total += num_label_chunks
+        self.correct_chunks_total += num_correct_chunks
+        precision = float(
+            num_correct_chunks) / num_infer_chunks if num_infer_chunks else 0
+        recall = float(
+            num_correct_chunks) / num_label_chunks if num_label_chunks else 0
+        f1_score = float(2 * precision * recall) / (
+            precision + recall) if num_correct_chunks else 0
+        return [precision, recall, f1_score]
+    def reset(self):
+        self.infer_chunks_total = 0
+        self.label_chunks_total = 0
+        self.correct_chunks_total = 0
+    def accumulate(self):
+        precision = float(
+            self.correct_chunks_total
+        ) / self.infer_chunks_total if self.infer_chunks_total else 0
+        recall = float(
+            self.correct_chunks_total
+        ) / self.label_chunks_total if self.label_chunks_total else 0
+        f1_score = float(2 * precision * recall) / (
+            precision + recall) if self.correct_chunks_total else 0
+        res = [precision, recall, f1_score]
+        return res
+    def _init_name(self, name):
+        name = name or 'chunk eval'
+        self._name = ['precision', 'recall', 'F1']
+    def name(self):
+        return self._name
--- a/examples/sequence_tagging/train.py
+++ b/examples/sequence_tagging/train.py
@@ -18,24 +18,14 @@ SequenceTagging network structure
 from __future__ import division
 from __future__ import print_function
-import io
-import os
-import sys
-import math
-import argparse
-import numpy as np
-work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-sys.path.append(os.path.join(work_dir, "../"))
-from paddle.incubate.hapi.model import Input, set_device
-from paddle.incubate.hapi.text.sequence_tagging import SeqTagging, LacLoss, ChunkEval
-from paddle.incubate.hapi.text.sequence_tagging import LacDataset, LacDataLoader
-from paddle.incubate.hapi.text.sequence_tagging import check_gpu, check_version
-from paddle.incubate.hapi.text.sequence_tagging import PDConfig
 import paddle.fluid as fluid
 from paddle.fluid.optimizer import AdamOptimizer
+from paddle.incubate.hapi.model import Input, set_device
+from sequence_tagging import SeqTagging, LacLoss, ChunkEval
+from reader import LacDataset, LacDataLoader
+from utils.check import check_gpu, check_version
+from utils.configure import PDConfig
 def main(args):
@@ -44,17 +34,15 @@ def main(args):
    inputs = [
        Input(
-            [None, None], 'int64', name='words'), Input(
+            [None, None], 'int64', name='words'),
-                [None], 'int64', name='length'), Input(
+        Input(
-                    [None, None], 'int64', name='target')
+            [None], 'int64', name='length'),
+        Input(
+            [None, None], 'int64', name='target'),
    ]
    labels = [Input([None, None], 'int64', name='labels')]
-    feed_list = None if args.dynamic else [
-        x.forward() for x in inputs + labels
-    ]
    dataset = LacDataset(args)
    train_dataset = LacDataLoader(args, place, phase="train")
@@ -95,6 +83,7 @@ if __name__ == '__main__':
    use_gpu = True if args.device == "gpu" else False
    check_gpu(use_gpu)
-    check_version()
+    # TODO: add check for 2.0.0-alpha0 if fluid.require_version support
+    # check_version()
    main(args)
--- a/examples/sequence_tagging/utils/__init__.py
+++ b/examples/sequence_tagging/utils/__init__.py
--- a/examples/sequence_tagging/utils/check.py
+++ b/examples/sequence_tagging/utils/check.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+import paddle.fluid as fluid
+__all__ = ['check_gpu', 'check_version']
+def check_gpu(use_gpu):
+    """
+     Log error and exit when set use_gpu=true in paddlepaddle
+     cpu version.
+     """
+    err = "Config use_gpu cannot be set as true while you are " \
+          "using paddlepaddle cpu version ! \nPlease try: \n" \
+          "\t1. Install paddlepaddle-gpu to run model on GPU \n" \
+          "\t2. Set use_gpu as false in config file to run " \
+          "model on CPU"
+    try:
+        if use_gpu and not fluid.is_compiled_with_cuda():
+            print(err)
+            sys.exit(1)
+    except Exception as e:
+        pass
+def check_version():
+    """
+    Log error and exit when the installed version of paddlepaddle is
+    not satisfied.
+    """
+    err = "PaddlePaddle version 2.0 or higher is required, " \
+          "or a suitable develop version is satisfied as well. \n" \
+          "Please make sure the version is good with your code." \
+    try:
+        fluid.require_version('2.0.0')
+    except Exception as e:
+        print(err)
+        sys.exit(1)
--- a/examples/sequence_tagging/utils/configure.py
+++ b/examples/sequence_tagging/utils/configure.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import argparse
+import json
+import yaml
+import six
+import logging
+logging_only_message = "%(message)s"
+logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
+class JsonConfig(object):
+    """
+    A high-level api for handling json configure file.
+    """
+    def __init__(self, config_path):
+        self._config_dict = self._parse(config_path)
+    def _parse(self, config_path):
+        try:
+            with open(config_path) as json_file:
+                config_dict = json.load(json_file)
+        except:
+            raise IOError("Error in parsing bert model config file '%s'" %
+                          config_path)
+        else:
+            return config_dict
+    def __getitem__(self, key):
+        return self._config_dict[key]
+    def print_config(self):
+        for arg, value in sorted(six.iteritems(self._config_dict)):
+            print('%s: %s' % (arg, value))
+        print('------------------------------------------------')
+class ArgumentGroup(object):
+    def __init__(self, parser, title, des):
+        self._group = parser.add_argument_group(title=title, description=des)
+    def add_arg(self, name, type, default, help, **kwargs):
+        type = str2bool if type == bool else type
+        self._group.add_argument(
+            "--" + name,
+            default=default,
+            type=type,
+            help=help + ' Default: %(default)s.',
+            **kwargs)
+class ArgConfig(object):
+    """
+    A high-level api for handling argument configs.
+    """
+    def __init__(self):
+        parser = argparse.ArgumentParser()
+        train_g = ArgumentGroup(parser, "training", "training options.")
+        train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
+        train_g.add_arg("learning_rate", float, 5e-5,
+                        "Learning rate used to train with warmup.")
+        train_g.add_arg(
+            "lr_scheduler",
+            str,
+            "linear_warmup_decay",
+            "scheduler of learning rate.",
+            choices=['linear_warmup_decay', 'noam_decay'])
+        train_g.add_arg("weight_decay", float, 0.01,
+                        "Weight decay rate for L2 regularizer.")
+        train_g.add_arg(
+            "warmup_proportion", float, 0.1,
+            "Proportion of training steps to perform linear learning rate warmup for."
+        )
+        train_g.add_arg("save_steps", int, 1000,
+                        "The steps interval to save checkpoints.")
+        train_g.add_arg("use_fp16", bool, False,
+                        "Whether to use fp16 mixed precision training.")
+        train_g.add_arg(
+            "loss_scaling", float, 1.0,
+            "Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled."
+        )
+        train_g.add_arg("pred_dir", str, None,
+                        "Path to save the prediction results")
+        log_g = ArgumentGroup(parser, "logging", "logging related.")
+        log_g.add_arg("skip_steps", int, 10,
+                      "The steps interval to print loss.")
+        log_g.add_arg("verbose", bool, False, "Whether to output verbose log.")
+        run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
+        run_type_g.add_arg("use_cuda", bool, True,
+                           "If set, use GPU for training.")
+        run_type_g.add_arg(
+            "use_fast_executor", bool, False,
+            "If set, use fast parallel executor (in experiment).")
+        run_type_g.add_arg(
+            "num_iteration_per_drop_scope", int, 1,
+            "Ihe iteration intervals to clean up temporary variables.")
+        run_type_g.add_arg("do_train", bool, True,
+                           "Whether to perform training.")
+        run_type_g.add_arg("do_predict", bool, True,
+                           "Whether to perform prediction.")
+        custom_g = ArgumentGroup(parser, "customize", "customized options.")
+        self.custom_g = custom_g
+        self.parser = parser
+    def add_arg(self, name, dtype, default, descrip):
+        self.custom_g.add_arg(name, dtype, default, descrip)
+    def build_conf(self):
+        return self.parser.parse_args()
+def str2bool(v):
+    # because argparse does not support to parse "true, False" as python
+    # boolean directly
+    return v.lower() in ("true", "t", "1")
+def print_arguments(args, log=None):
+    if not log:
+        print('-----------  Configuration Arguments -----------')
+        for arg, value in sorted(six.iteritems(vars(args))):
+            print('%s: %s' % (arg, value))
+        print('------------------------------------------------')
+    else:
+        log.info('-----------  Configuration Arguments -----------')
+        for arg, value in sorted(six.iteritems(vars(args))):
+            log.info('%s: %s' % (arg, value))
+        log.info('------------------------------------------------')
+class PDConfig(object):
+    """
+    A high-level API for managing configuration files in PaddlePaddle.
+    Can jointly work with command-line-arugment, json files and yaml files.
+    """
+    def __init__(self, json_file="", yaml_file="", fuse_args=True):
+        """
+            Init funciton for PDConfig.
+            json_file: the path to the json configure file.
+            yaml_file: the path to the yaml configure file.
+            fuse_args: if fuse the json/yaml configs with argparse.
+        """
+        assert isinstance(json_file, str)
+        assert isinstance(yaml_file, str)
+        if json_file != "" and yaml_file != "":
+            raise Warning(
+                "json_file and yaml_file can not co-exist for now. please only use one configure file type."
+            )
+            return
+        self.args = None
+        self.arg_config = {}
+        self.json_config = {}
+        self.yaml_config = {}
+        parser = argparse.ArgumentParser()
+        self.default_g = ArgumentGroup(parser, "default", "default options.")
+        self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
+        self.json_g = ArgumentGroup(parser, "json", "options from json.")
+        self.com_g = ArgumentGroup(parser, "custom", "customized options.")
+        self.default_g.add_arg("do_train", bool, False,
+                               "Whether to perform training.")
+        self.default_g.add_arg("do_predict", bool, False,
+                               "Whether to perform predicting.")
+        self.default_g.add_arg("do_eval", bool, False,
+                               "Whether to perform evaluating.")
+        self.default_g.add_arg(
+            "do_save_inference_model", bool, False,
+            "Whether to perform model saving for inference.")
+        # NOTE: args for profiler
+        self.default_g.add_arg(
+            "is_profiler", int, 0,
+            "the switch of profiler tools. (used for benchmark)")
+        self.default_g.add_arg(
+            "profiler_path", str, './',
+            "the profiler output file path. (used for benchmark)")
+        self.default_g.add_arg("max_iter", int, 0,
+                               "the max train batch num.(used for benchmark)")
+        self.parser = parser
+        if json_file != "":
+            self.load_json(json_file, fuse_args=fuse_args)
+        if yaml_file:
+            self.load_yaml(yaml_file, fuse_args=fuse_args)
+    def load_json(self, file_path, fuse_args=True):
+        if not os.path.exists(file_path):
+            raise Warning("the json file %s does not exist." % file_path)
+            return
+        with open(file_path, "r") as fin:
+            self.json_config = json.loads(fin.read())
+            fin.close()
+        if fuse_args:
+            for name in self.json_config:
+                if isinstance(self.json_config[name], list):
+                    self.json_g.add_arg(
+                        name,
+                        type(self.json_config[name][0]),
+                        self.json_config[name],
+                        "This is from %s" % file_path,
+                        nargs=len(self.json_config[name]))
+                    continue
+                if not isinstance(self.json_config[name], int) \
+                    and not isinstance(self.json_config[name], float) \
+                    and not isinstance(self.json_config[name], str) \
+                    and not isinstance(self.json_config[name], bool):
+                    continue
+                self.json_g.add_arg(name,
+                                    type(self.json_config[name]),
+                                    self.json_config[name],
+                                    "This is from %s" % file_path)
+    def load_yaml(self, file_path, fuse_args=True):
+        if not os.path.exists(file_path):
+            raise Warning("the yaml file %s does not exist." % file_path)
+            return
+        with open(file_path, "r") as fin:
+            self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
+            fin.close()
+        if fuse_args:
+            for name in self.yaml_config:
+                if isinstance(self.yaml_config[name], list):
+                    self.yaml_g.add_arg(
+                        name,
+                        type(self.yaml_config[name][0]),
+                        self.yaml_config[name],
+                        "This is from %s" % file_path,
+                        nargs=len(self.yaml_config[name]))
+                    continue
+                if not isinstance(self.yaml_config[name], int) \
+                    and not isinstance(self.yaml_config[name], float) \
+                    and not isinstance(self.yaml_config[name], str) \
+                    and not isinstance(self.yaml_config[name], bool):
+                    continue
+                self.yaml_g.add_arg(name,
+                                    type(self.yaml_config[name]),
+                                    self.yaml_config[name],
+                                    "This is from %s" % file_path)
+    def build(self):
+        self.args = self.parser.parse_args()
+        self.arg_config = vars(self.args)
+    def __add__(self, new_arg):
+        assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
+        assert len(new_arg) >= 3
+        assert self.args is None
+        name = new_arg[0]
+        dtype = new_arg[1]
+        dvalue = new_arg[2]
+        desc = new_arg[3] if len(
+            new_arg) == 4 else "Description is not provided."
+        self.com_g.add_arg(name, dtype, dvalue, desc)
+        return self
+    def __getattr__(self, name):
+        if name in self.arg_config:
+            return self.arg_config[name]
+        if name in self.json_config:
+            return self.json_config[name]
+        if name in self.yaml_config:
+            return self.yaml_config[name]
+        raise Warning("The argument %s is not defined." % name)
+    def Print(self):
+        print("-" * 70)
+        for name in self.arg_config:
+            print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
+        for name in self.json_config:
+            if name not in self.arg_config:
+                print("%s:\t\t\t\t%s" %
+                      (str(name), str(self.json_config[name])))
+        for name in self.yaml_config:
+            if name not in self.arg_config:
+                print("%s:\t\t\t\t%s" %
+                      (str(name), str(self.yaml_config[name])))
+        print("-" * 70)
+if __name__ == "__main__":
+    """
+    pd_config = PDConfig(json_file = "./test/bert_config.json")
+    pd_config.build()
+    print(pd_config.do_train)
+    print(pd_config.hidden_size)
+    pd_config = PDConfig(yaml_file = "./test/bert_config.yaml")
+    pd_config.build()
+    print(pd_config.do_train)
+    print(pd_config.hidden_size)
+    """
+    pd_config = PDConfig(yaml_file="./test/bert_config.yaml")
+    pd_config += ("my_age", int, 18, "I am forever 18.")
+    pd_config.build()
+    print(pd_config.do_train)
+    print(pd_config.hidden_size)
+    print(pd_config.my_age)
--- a/examples/sequence_tagging/utils/metrics.py
+++ b/examples/sequence_tagging/utils/metrics.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+import paddle.fluid as fluid
+__all__ = ['chunk_count', "build_chunk"]
+def build_chunk(data_list, id2label_dict):
+    """
+    Assembly entity
+    """
+    tag_list = [id2label_dict.get(str(id)) for id in data_list]
+    ner_dict = {}
+    ner_str = ""
+    ner_start = 0
+    for i in range(len(tag_list)):
+        tag = tag_list[i]
+        if tag == u"O":
+            if i != 0:
+                key = "%d_%d" % (ner_start, i - 1)
+                ner_dict[key] = ner_str
+            ner_start = i
+            ner_str = tag
+        elif tag.endswith(u"B"):
+            if i != 0:
+                key = "%d_%d" % (ner_start, i - 1)
+                ner_dict[key] = ner_str
+            ner_start = i
+            ner_str = tag.split('-')[0]
+        elif tag.endswith(u"I"):
+            if tag.split('-')[0] != ner_str:
+                if i != 0:
+                    key = "%d_%d" % (ner_start, i - 1)
+                    ner_dict[key] = ner_str
+                ner_start = i
+                ner_str = tag.split('-')[0]
+    return ner_dict
+def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
+    """
+    calculate num_correct_chunks num_error_chunks total_num for metrics
+    """
+    num_infer_chunks, num_label_chunks, num_correct_chunks = 0, 0, 0
+    assert infer_numpy.shape[0] == label_numpy.shape[0]
+    for i in range(infer_numpy.shape[0]):
+        infer_list = infer_numpy[i][:seq_len[i]]
+        label_list = label_numpy[i][:seq_len[i]]
+        infer_dict = build_chunk(infer_list, id2label_dict)
+        num_infer_chunks += len(infer_dict)
+        label_dict = build_chunk(label_list, id2label_dict)
+        num_label_chunks += len(label_dict)
+        for key in infer_dict:
+            if key in label_dict and label_dict[key] == infer_dict[key]:
+                num_correct_chunks += 1
+    return num_infer_chunks, num_label_chunks, num_correct_chunks
--- a/examples/transformer/transformer.py
+++ b/examples/transformer/transformer.py
@@ -18,9 +18,9 @@ import numpy as np
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
-from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer, to_variable
+from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer
-from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
+from paddle.incubate.hapi.model import Model
-from paddle.incubate.hapi.model import Model, CrossEntropy, Loss
+from paddle.incubate.hapi.loss import Loss
 from paddle.incubate.hapi.text import TransformerBeamSearchDecoder, DynamicDecode
@@ -43,31 +43,6 @@ def position_encoding_init(n_position, d_pos_vec):
    return position_enc.astype("float32")
-class NoamDecay(LearningRateDecay):
-    """
-    learning rate scheduler
-    """
-    def __init__(self,
-                 d_model,
-                 warmup_steps,
-                 static_lr=2.0,
-                 begin=1,
-                 step=1,
-                 dtype='float32'):
-        super(NoamDecay, self).__init__(begin, step, dtype)
-        self.d_model = d_model
-        self.warmup_steps = warmup_steps
-        self.static_lr = static_lr
-    def step(self):
-        a = self.create_lr_var(self.step_num**-0.5)
-        b = self.create_lr_var((self.warmup_steps**-1.5) * self.step_num)
-        lr_value = (self.d_model**-0.5) * layers.elementwise_min(
-            a, b) * self.static_lr
-        return lr_value
 class PrePostProcessLayer(Layer):
    """
    PrePostProcessLayer