update Dgu and ade module (#2971)

* dgu_and_ade * fix_comment

update Dgu and ade module (#2971)
* dgu_and_ade * fix_comment
d5fbe650 · 0YuanZhang0 · pkpk · dc9116d0 · d5fbe650 · d5fbe650
84 changed file
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/README.md
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/README.md
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/_ce.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/_ce.py
-# this file is only used for continuous evaluation test!
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""this file is only used for continuous evaluation test!"""

 import os
 import sys

--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/__init__.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/__init__.py
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/evaluation.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/evaluation.py
-"""
-Evaluation for auto dialogue evaluation
-"""
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Evaluation for auto dialogue evaluation"""

 import sys
 import numpy as np

--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade/prepare_data_and_model.sh
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade/prepare_data_and_model.sh
+#!/bin/bash
+
+#check data directory
+cd ..
+echo "Start download data and models.............."
+if [ ! -d "data" ]; then
+	echo "Directory data does not exist, make new data directory"
+	mkdir data
+fi
+cd data
+
+#check configure file
+if [ ! -d "config" ]; then
+	echo "config directory not exist........"
+	exit 255
+else
+	if [ ! -f "config/ade.yaml" ]; then
+		echo "config file dgu.yaml has been lost........"
+		exit 255
+	fi
+fi
+
+#check and download input data
+if [ ! -d "input" ]; then
+	echo "Directory input does not exist, make new input directory"
+	mkdir input
+fi
+cd input
+wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_dataset-1.0.0.tar.gz
+tar -zxvf auto_dialogue_evaluation_dataset-1.0.0.tar.gz
+rm auto_dialogue_evaluation_dataset-1.0.0.tar.gz
+cd ..
+
+#check and download pretrain model
+if [ ! -d "pretrain_model" ]; then
+	echo "Directory pretrain_model does not exist, make new pretrain_model directory"
+	mkdir pretrain_model
+fi
+
+#check and download inferenece model
+if [ ! -d "inference_models" ]; then
+	echo "Directory inferenece_model does not exist, make new inferenece_model directory"
+	mkdir inference_models
+fi
+
+#check output
+if [ ! -d "output" ]; then
+	echo "Directory output does not exist, make new output directory"
+	mkdir output
+fi
+
+#check saved model
+if [ ! -d "saved_models" ]; then
+	echo "Directory saved_models does not exist, make new saved_models directory"
+	mkdir saved_models
+fi
+
+cd saved_models
+wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_models.2.0.0.tar.gz
+tar -xvf auto_dialogue_evaluation_models.2.0.0.tar.gz
+rm auto_dialogue_evaluation_models.2.0.0.tar.gz
+echo "Finish.............."
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade/reader.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade/reader.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Reader for auto dialogue evaluation"""
+
+import sys
+import time
+import random
+import numpy as np
+
+import paddle
+import paddle.fluid as fluid
+
+
+class DataProcessor(object): 
+    def __init__(self, data_path, max_seq_length, batch_size): 
+        """init"""
+        self.data_file = data_path
+        self.max_seq_len = max_seq_length
+        self.batch_size = batch_size
+        self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
+
+    def get_examples(self): 
+        """load examples"""
+        examples = []
+        with open(self.data_file, 'r') as fr: 
+            for line in fr: 
+                examples.append(line.strip())
+        return examples
+
+    def get_num_examples(self, phase): 
+        """Get number of examples for train, dev or test."""
+        if phase not in ['train', 'dev', 'test']: 
+            raise ValueError(
+                "Unknown phase, which should be in ['train', 'dev', 'test'].")
+        count = len(open(self.data_file,'rU').readlines())
+        self.num_examples[phase] = count
+        return self.num_examples[phase]
+
+    def data_generator(self,
+                       place,
+                       phase="train",
+                       shuffle=True,
+                       sample_pro=1):
+        """
+        Generate data for train, dev or test.
+
+        Args:
+            phase: string. The phase for which to generate data.
+            shuffle: bool. Whether to shuffle examples.
+            sample_pro: sample data ratio
+        """
+        examples = self.get_examples()
+        if shuffle: 
+            np.random.shuffle(examples)
+        
+        def batch_reader():  
+            """read batch data"""
+            batch = []
+            for example in examples: 
+                if sample_pro < 1:
+                    if random.random() > sample_pro:
+                        continue
+                tokens = example.strip().split('\t')
+                assert len(tokens) == 3 
+                context = [int(x) for x in tokens[0].split()[: self.max_seq_len]]
+                response = [int(x) for x in tokens[1].split()[: self.max_seq_len]]
+                label = [int(tokens[2])]
+                instance = (context, response, label)
+
+                if len(batch) < self.batch_size:
+                    batch.append(instance)
+                else:
+                    if len(batch) == self.batch_size:
+                        yield batch
+                    batch = [instance]
+
+            if len(batch) > 0: 
+                yield batch
+
+        def create_lodtensor(data_ids, place): 
+            """create LodTensor for input ids"""
+            cur_len = 0
+            lod = [cur_len]
+            seq_lens = [len(ids) for ids in data_ids]
+            for l in seq_lens: 
+                cur_len += l
+                lod.append(cur_len)
+            flattened_data = np.concatenate(data_ids, axis=0).astype("int64")
+            flattened_data = flattened_data.reshape([len(flattened_data), 1])
+            res = fluid.LoDTensor()
+            res.set(flattened_data, place)
+            res.set_lod([lod])
+            return res
+
+        def wrapper(): 
+            """yield batch data to network""" 
+            for batch_data in batch_reader(): 
+                context_ids = [batch[0] for batch in batch_data]
+                response_ids = [batch[1] for batch in batch_data]
+                label_ids = [batch[2] for batch in batch_data]
+                context_res = create_lodtensor(context_ids, place)
+                response_res = create_lodtensor(response_ids, place)
+                label_ids = np.array(label_ids).astype("int64").reshape([-1, 1])
+                input_batch = [context_res, response_res, label_ids]
+                yield input_batch
+        
+        return wrapper
+
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/reader/__init__.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/reader/__init__.py
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade/utils/configure.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade/utils/configure.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import argparse
+import json
+import yaml
+import six
+import logging
+
+logging_only_message = "%(message)s"
+logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
+
+
+class JsonConfig(object):
+    """
+    A high-level api for handling json configure file.
+    """
+
+    def __init__(self, config_path):
+        self._config_dict = self._parse(config_path)
+
+    def _parse(self, config_path):
+        try:
+            with open(config_path) as json_file:
+                config_dict = json.load(json_file)
+        except:
+            raise IOError("Error in parsing bert model config file '%s'" %
+                          config_path)
+        else:
+            return config_dict
+
+    def __getitem__(self, key):
+        return self._config_dict[key]
+
+    def print_config(self):
+        for arg, value in sorted(six.iteritems(self._config_dict)):
+            print('%s: %s' % (arg, value))
+        print('------------------------------------------------')
+
+
+class ArgumentGroup(object):
+    def __init__(self, parser, title, des):
+        self._group = parser.add_argument_group(title=title, description=des)
+
+    def add_arg(self, name, type, default, help, **kwargs):
+        type = str2bool if type == bool else type
+        self._group.add_argument(
+            "--" + name,
+            default=default,
+            type=type,
+            help=help + ' Default: %(default)s.',
+            **kwargs)
+
+
+class ArgConfig(object):
+    """
+    A high-level api for handling argument configs.
+    """
+
+    def __init__(self):
+        parser = argparse.ArgumentParser()
+
+        train_g = ArgumentGroup(parser, "training", "training options.")
+        train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
+        train_g.add_arg("learning_rate", float, 5e-5,
+                        "Learning rate used to train with warmup.")
+        train_g.add_arg(
+            "lr_scheduler",
+            str,
+            "linear_warmup_decay",
+            "scheduler of learning rate.",
+            choices=['linear_warmup_decay', 'noam_decay'])
+        train_g.add_arg("weight_decay", float, 0.01,
+                        "Weight decay rate for L2 regularizer.")
+        train_g.add_arg(
+            "warmup_proportion", float, 0.1,
+            "Proportion of training steps to perform linear learning rate warmup for."
+        )
+        train_g.add_arg("save_steps", int, 1000,
+                        "The steps interval to save checkpoints.")
+        train_g.add_arg("use_fp16", bool, False,
+                        "Whether to use fp16 mixed precision training.")
+        train_g.add_arg(
+            "loss_scaling", float, 1.0,
+            "Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled."
+        )
+        train_g.add_arg("pred_dir", str, None,
+                        "Path to save the prediction results")
+
+        log_g = ArgumentGroup(parser, "logging", "logging related.")
+        log_g.add_arg("skip_steps", int, 10,
+                      "The steps interval to print loss.")
+        log_g.add_arg("verbose", bool, False, "Whether to output verbose log.")
+
+        run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
+        run_type_g.add_arg("use_cuda", bool, True,
+                           "If set, use GPU for training.")
+        run_type_g.add_arg(
+            "use_fast_executor", bool, False,
+            "If set, use fast parallel executor (in experiment).")
+        run_type_g.add_arg(
+            "num_iteration_per_drop_scope", int, 1,
+            "Ihe iteration intervals to clean up temporary variables.")
+        run_type_g.add_arg("do_train", bool, True,
+                           "Whether to perform training.")
+        run_type_g.add_arg("do_predict", bool, True,
+                           "Whether to perform prediction.")
+
+        custom_g = ArgumentGroup(parser, "customize", "customized options.")
+
+        self.custom_g = custom_g
+
+        self.parser = parser
+
+    def add_arg(self, name, dtype, default, descrip):
+        self.custom_g.add_arg(name, dtype, default, descrip)
+
+    def build_conf(self):
+        return self.parser.parse_args()
+
+
+def str2bool(v):
+    # because argparse does not support to parse "true, False" as python
+    # boolean directly
+    return v.lower() in ("true", "t", "1")
+
+
+def print_arguments(args, log=None):
+    if not log:
+        print('-----------  Configuration Arguments -----------')
+        for arg, value in sorted(six.iteritems(vars(args))):
+            print('%s: %s' % (arg, value))
+        print('------------------------------------------------')
+    else:
+        log.info('-----------  Configuration Arguments -----------')
+        for arg, value in sorted(six.iteritems(vars(args))):
+            log.info('%s: %s' % (arg, value))
+        log.info('------------------------------------------------')
+
+
+class PDConfig(object):
+    """
+    A high-level API for managing configuration files in PaddlePaddle.
+    Can jointly work with command-line-arugment, json files and yaml files.
+    """
+
+    def __init__(self, json_file="", yaml_file="", fuse_args=True):
+        """
+            Init funciton for PDConfig.
+            json_file: the path to the json configure file.
+            yaml_file: the path to the yaml configure file.
+            fuse_args: if fuse the json/yaml configs with argparse.
+        """
+        assert isinstance(json_file, str)
+        assert isinstance(yaml_file, str)
+
+        if json_file != "" and yaml_file != "":
+            raise Warning(
+                "json_file and yaml_file can not co-exist for now. please only use one configure file type."
+            )
+            return
+
+        self.args = None
+        self.arg_config = {}
+        self.json_config = {}
+        self.yaml_config = {}
+
+        parser = argparse.ArgumentParser()
+
+        self.default_g = ArgumentGroup(parser, "default", "default options.")
+        self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
+        self.json_g = ArgumentGroup(parser, "json", "options from json.")
+        self.com_g = ArgumentGroup(parser, "custom", "customized options.")
+
+        self.default_g.add_arg("epoch", int, 2,
+                               "Number of epoches for training.")
+        self.default_g.add_arg("learning_rate", float, 1e-2,
+                               "Learning rate used to train.")
+        self.default_g.add_arg("do_train", bool, False,
+                               "Whether to perform training.")
+        self.default_g.add_arg("do_predict", bool, False,
+                               "Whether to perform predicting.")
+        self.default_g.add_arg("do_eval", bool, False,
+                               "Whether to perform evaluating.")
+
+        self.parser = parser
+
+        if json_file != "":
+            self.load_json(json_file, fuse_args=fuse_args)
+
+        if yaml_file:
+            self.load_yaml(yaml_file, fuse_args=fuse_args)
+
+    def load_json(self, file_path, fuse_args=True):
+
+        if not os.path.exists(file_path):
+            raise Warning("the json file %s does not exist." % file_path)
+            return
+
+        with open(file_path, "r") as fin:
+            self.json_config = json.loads(fin.read())
+            fin.close()
+
+        if fuse_args:
+            for name in self.json_config:
+                if not isinstance(self.json_config[name], int) \
+                    and not isinstance(self.json_config[name], float) \
+                    and not isinstance(self.json_config[name], str) \
+                    and not isinstance(self.json_config[name], bool):
+
+                    continue
+
+                self.json_g.add_arg(name,
+                                    type(self.json_config[name]),
+                                    self.json_config[name],
+                                    "This is from %s" % file_path)
+
+    def load_yaml(self, file_path, fuse_args=True):
+
+        if not os.path.exists(file_path):
+            raise Warning("the yaml file %s does not exist." % file_path)
+            return
+
+        with open(file_path, "r") as fin:
+            self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
+            fin.close()
+
+        if fuse_args:
+            for name in self.yaml_config:
+                if not isinstance(self.yaml_config[name], int) \
+                    and not isinstance(self.yaml_config[name], float) \
+                    and not isinstance(self.yaml_config[name], str) \
+                    and not isinstance(self.yaml_config[name], bool):
+
+                    continue
+
+                self.yaml_g.add_arg(name,
+                                    type(self.yaml_config[name]),
+                                    self.yaml_config[name],
+                                    "This is from %s" % file_path)
+
+    def build(self):
+        self.args = self.parser.parse_args()
+        self.arg_config = vars(self.args)
+
+    def __add__(self, new_arg):
+        assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
+        assert len(new_arg) >= 3
+        assert self.args is None
+
+        name = new_arg[0]
+        dtype = new_arg[1]
+        dvalue = new_arg[2]
+        desc = new_arg[3] if len(
+            new_arg) == 4 else "Description is not provided."
+
+        self.com_g.add_arg(name, dtype, dvalue, desc)
+
+        return self
+
+    def __getattr__(self, name):
+        if name in self.arg_config:
+            return self.arg_config[name]
+
+        if name in self.json_config:
+            return self.json_config[name]
+
+        if name in self.yaml_config:
+            return self.yaml_config[name]
+
+        raise Warning("The argument %s is not defined." % name)
+
+    def Print(self):
+
+        print("-" * 70)
+        for name in self.arg_config:
+            print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
+
+        for name in self.json_config:
+            if name not in self.arg_config:
+                print("%s:\t\t\t\t%s" %
+                      (str(name), str(self.json_config[name])))
+
+        for name in self.yaml_config:
+            if name not in self.arg_config:
+                print("%s:\t\t\t\t%s" %
+                      (str(name), str(self.yaml_config[name])))
+
+        print("-" * 70)
+
+
+if __name__ == "__main__":
+    """
+    pd_config = PDConfig(json_file = "./test/bert_config.json")
+    pd_config.build()
+
+    print(pd_config.do_train)
+    print(pd_config.hidden_size)
+
+    pd_config = PDConfig(yaml_file = "./test/bert_config.yaml")
+    pd_config.build()
+
+    print(pd_config.do_train)
+    print(pd_config.hidden_size)
+    """
+
+    pd_config = PDConfig(yaml_file="./test/bert_config.yaml")
+    pd_config += ("my_age", int, 18, "I am forever 18.")
+    pd_config.build()
+
+    print(pd_config.do_train)
+    print(pd_config.hidden_size)
+    print(pd_config.my_age)
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/init.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/init.py
-"""
-Init for pretrained para
-"""
-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,6 +12,8 @@ Init for pretrained para
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import print_function
+from __future__ import division
 from __future__ import print_function

 import os
@@ -27,25 +25,9 @@ import numpy as np
 import paddle.fluid as fluid


-def init_pretraining_params(exe, pretraining_params_path, main_program):
-    """
-    Init pretraining params
-    """
-    assert os.path.exists(pretraining_params_path
-                          ), "[%s] cann't be found." % pretraining_params_path
-
-    def existed_params(var):
-        """
-        Test existed
-        """
-        if not isinstance(var, fluid.framework.Parameter):
-            return False
-        return os.path.exists(os.path.join(pretraining_params_path, var.name))
-
-    fluid.io.load_vars(
-        exe,
-        pretraining_params_path,
-        main_program=main_program,
-        predicate=existed_params)
-    print("Load pretraining parameters from {}.".format(
-        pretraining_params_path))
+class InputField(object): 
+    def __init__(self, input_field): 
+        """init inpit field"""
+        self.context_wordseq = input_field[0]
+        self.response_wordseq = input_field[1]
+        self.labels = input_field[2]
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade/utils/model_check.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade/utils/model_check.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import paddle
+import paddle.fluid as fluid
+
+
+def check_cuda(use_cuda, err = \
+    "\nYou can not set use_cuda = True in the model because you are using paddlepaddle-cpu.\n \
+    Please: 1. Install paddlepaddle-gpu to run your models on GPU or 2. Set use_cuda = False to run models on CPU.\n"
+                                                                                                                     ):
+    try:
+        if use_cuda == True and fluid.is_compiled_with_cuda() == False:
+            print(err)
+            sys.exit(1)
+    except Exception as e:
+        pass
+
+
+if __name__ == "__main__":
+    
+    check_cuda(True)
+
+    check_cuda(False)
+
+    check_cuda(True, "This is only for testing.")
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade/utils/save_load_io.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade/utils/save_load_io.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""save or load model api"""
+
+import os
+import sys
+
+import paddle
+import paddle.fluid as fluid
+
+
+def init_from_pretrain_model(args, exe, program):
+
+    assert isinstance(args.init_from_pretrain_model, str)
+
+    if not os.path.exists(args.init_from_pretrain_model):
+        raise Warning("The pretrained params do not exist.")
+        return False
+
+    def existed_params(var):
+        if not isinstance(var, fluid.framework.Parameter):
+            return False
+        return os.path.exists(
+            os.path.join(args.init_from_pretrain_model, var.name))
+
+    fluid.io.load_vars(
+        exe,
+        args.init_from_pretrain_model,
+        main_program=program,
+        predicate=existed_params)
+
+    print("finish initing model from pretrained params from %s" %
+          (args.init_from_pretrain_model))
+
+    return True
+
+
+def init_from_checkpoint(args, exe, program):
+
+    assert isinstance(args.init_from_checkpoint, str)
+
+    if not os.path.exists(args.init_from_checkpoint):
+        raise Warning("the checkpoint path does not exist.")
+        return False
+
+    fluid.io.load_persistables(
+        executor=exe,
+        dirname=args.init_from_checkpoint,
+        main_program=program,
+        filename="checkpoint.pdckpt")
+
+    print("finish initing model from checkpoint from %s" %
+          (args.init_from_checkpoint))
+
+    return True
+
+
+def init_from_params(args, exe, program):
+
+    assert isinstance(args.init_from_params, str)
+    
+    if not os.path.exists(args.init_from_params): 
+        raise Warning("the params path does not exist.")
+        return False
+
+    fluid.io.load_params(
+        executor=exe,
+        dirname=args.init_from_params,
+        main_program=program,
+        filename="params.pdparams")
+
+    print("finish init model from params from %s" % (args.init_from_params))
+
+    return True
+
+
+def save_checkpoint(args, exe, program, dirname):
+
+    assert isinstance(args.save_model_path, str)
+
+    checkpoint_dir = os.path.join(args.save_model_path, args.save_checkpoint)
+
+    if not os.path.exists(checkpoint_dir):
+        os.mkdir(checkpoint_dir)
+
+    fluid.io.save_persistables(
+        exe,
+        os.path.join(checkpoint_dir, dirname),
+        main_program=program,
+        filename="checkpoint.pdckpt")
+
+    print("save checkpoint at %s" % (os.path.join(checkpoint_dir, dirname)))
+
+    return True
+
+
+def save_param(args, exe, program, dirname):
+
+    assert isinstance(args.save_model_path, str)
+
+    param_dir = os.path.join(args.save_model_path, args.save_param)
+
+    if not os.path.exists(param_dir):
+        os.mkdir(param_dir)
+
+    fluid.io.save_params(
+        exe,
+        os.path.join(param_dir, dirname),
+        main_program=program,
+        filename="params.pdparams")
+    print("save parameters at %s" % (os.path.join(param_dir, dirname)))
+
+    return True
+
+
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade_net.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/ade_net.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Network for auto dialogue evaluation"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.fluid as fluid
+
+
+def create_net(
+    is_training,
+    model_input,
+    args, 
+    clip_value=10.0,
+    word_emb_name="shared_word_emb",
+    lstm_W_name="shared_lstm_W",
+    lstm_bias_name="shared_lstm_bias"): 
+
+    context_wordseq = model_input.context_wordseq
+    response_wordseq = model_input.response_wordseq
+    label = model_input.labels
+
+    #emb
+    context_emb = fluid.layers.embedding(
+        input=context_wordseq,
+        size=[args.vocab_size, args.emb_size],
+        is_sparse=True,
+        param_attr=fluid.ParamAttr(
+            name=word_emb_name,
+            initializer=fluid.initializer.Normal(scale=0.1)))
+
+    response_emb = fluid.layers.embedding(
+        input=response_wordseq,
+        size=[args.vocab_size, args.emb_size],
+        is_sparse=True,
+        param_attr=fluid.ParamAttr(
+            name=word_emb_name,
+            initializer=fluid.initializer.Normal(scale=0.1)))
+
+    #fc to fit dynamic LSTM
+    context_fc = fluid.layers.fc(
+        input=context_emb,
+        size=args.hidden_size * 4,
+        param_attr=fluid.ParamAttr(name='fc_weight'),
+        bias_attr=fluid.ParamAttr(name='fc_bias'))
+
+    response_fc = fluid.layers.fc(
+        input=response_emb,
+        size=args.hidden_size * 4,
+        param_attr=fluid.ParamAttr(name='fc_weight'),
+        bias_attr=fluid.ParamAttr(name='fc_bias'))
+
+    #LSTM
+    context_rep, _ = fluid.layers.dynamic_lstm(
+        input=context_fc,
+        size=args.hidden_size * 4,
+        param_attr=fluid.ParamAttr(name=lstm_W_name),
+        bias_attr=fluid.ParamAttr(name=lstm_bias_name))
+    context_rep = fluid.layers.sequence_last_step(context_rep)
+
+    response_rep, _ = fluid.layers.dynamic_lstm(
+        input=response_fc,
+        size=args.hidden_size * 4,
+        param_attr=fluid.ParamAttr(name=lstm_W_name),
+        bias_attr=fluid.ParamAttr(name=lstm_bias_name))
+    response_rep = fluid.layers.sequence_last_step(input=response_rep)
+
+    logits = fluid.layers.bilinear_tensor_product(
+        context_rep, response_rep, size=1)
+
+    if args.loss_type == 'CLS': 
+        label = fluid.layers.cast(x=label, dtype='float32')
+        loss = fluid.layers.sigmoid_cross_entropy_with_logits(logits, label)
+        loss = fluid.layers.reduce_mean(
+            fluid.layers.clip(
+                loss, min=-clip_value, max=clip_value))
+    elif args.loss_type == 'L2':
+        norm_score = 2 * fluid.layers.sigmoid(logits)
+        label = fluid.layers.cast(x=label, dtype='float32')
+        loss = fluid.layers.square_error_cost(norm_score, label) / 4
+        loss = fluid.layers.reduce_mean(loss)
+    else:
+        raise ValueError
+    
+    if is_training: 
+        return loss
+    else: 
+        return logits
+
+
+def set_word_embedding(word_emb, place, word_emb_name="shared_word_emb"):
+    """
+    Set word embedding
+    """
+    word_emb_param = fluid.global_scope().find_var(
+        word_emb_name).get_tensor()
+    word_emb_param.set(word_emb, place)
+
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/config.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/config.py
-"""
-Auto Dialogue Evaluation.
-"""
-
-import argparse
-import six
-
-def parse_args():
-    """
-    Auto Dialogue Evaluation Config
-    """
-    parser = argparse.ArgumentParser('Automatic Dialogue Evaluation.')
-    parser.add_argument(
-        '--do_train', type=bool, default=False, help='Whether to perform training.')
-    parser.add_argument(
-        '--do_val', type=bool, default=False, help='Whether to perform evaluation.')
-    parser.add_argument(
-        '--do_infer', type=bool, default=False, help='Whether to perform inference.')
-    parser.add_argument(
-        '--loss_type', type=str, default='CLS', help='Loss type, CLS or L2.')
-
-    #data path
-    parser.add_argument(
-        '--train_path', type=str, default=None, help='Path of training data')
-    parser.add_argument(
-        '--val_path', type=str, default=None, help='Path of validation data')
-    parser.add_argument(
-        '--test_path', type=str, default=None, help='Path of test data')
-    parser.add_argument(
-        '--save_path', type=str, default='tmp', help='Save path')
-
-    #step fit for data size
-    parser.add_argument(
-        '--print_step', type=int, default=50, help='Print step')
-    parser.add_argument(
-        '--save_step', type=int, default=400, help='Save step')
-    parser.add_argument(
-        '--num_scan_data', type=int, default=20, help='Save step')
-
-    parser.add_argument(
-        '--word_emb_init', type=str, default=None, help='Path to the initial word embedding')
-    parser.add_argument(
-        '--init_model', type=str, default=None, help='Path to the init model')
-
-    parser.add_argument(
-        '--use_cuda',
-        action='store_true',
-        help='If set, use cuda for training.')
-    parser.add_argument(
-        '--batch_size', type=int, default=256, help='Batch size')
-    parser.add_argument(
-        '--hidden_size', type=int, default=256, help='Hidden size')
-    parser.add_argument(
-        '--emb_size', type=int, default=256, help='Embedding size')
-    parser.add_argument(
-        '--vocab_size', type=int, default=484016, help='Vocabulary size')
-    parser.add_argument(
-        '--learning_rate', type=float, default=0.001, help='Learning rate')
-    parser.add_argument(
-        '--sample_pro', type=float, default=1, help='Sample probability for training data')
-    parser.add_argument(
-        '--max_len', type=int, default=50, help='Max length for sentences')
-
-    args = parser.parse_args()
-    return args
-
-
-def print_arguments(args):
-    """
-    Print Config
-    """
-    print('-----------  Configuration Arguments -----------')
-    for arg, value in sorted(six.iteritems(vars(args))):
-        print('%s: %s' % (arg, value))
-    print('------------------------------------------------')
-
-
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/config/ade.yaml
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/config/ade.yaml
+loss_type: "CLS"
+training_file: ""
+val_file: ""
+predict_file: ""
+print_steps: 10
+save_steps: 10
+num_scan_data: ""
+word_emb_init: ""
+init_model: ""
+use_cuda: ""
+batch_size: 256
+hidden_size: 256
+emb_size: 256
+vocab_size: 484016
+sample_pro: 1.0
+output_prediction_file: ""
+init_from_checkpoint: ""
+init_from_params: ""
+init_from_pretrain_model: ""
+inference_model_dir: ""
+save_model_path: ""
+save_checkpoint: ""
+save_param: ""
+evaluation_file: ""
+vocab_path: ""
+max_seq_len: 128
+random_seed: 110
+do_save_inference_model: False
+enable_ce: "store_true"
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/inference_models/inference_models.md
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/inference_models/inference_models.md
+save inference model directory
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/input/input.md
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/input/input.md
+training data directory
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/output/output.md
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/output/output.md
+save predict results output directory
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/pretrain_model/pretrain_model.md
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/pretrain_model/pretrain_model.md
+pretrain model directory: model for network initialization
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/saved_models/saved_models.md
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/data/saved_models/saved_models.md
+save user finetuning models and trained model we provided
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/download_data.sh
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/download_data.sh
-wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_dataset-1.0.0.tar.gz
-tar -xzf auto_dialogue_evaluation_dataset-1.0.0.tar.gz
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/eval.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/eval.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""evaluation metrics"""
+
+import os
+import sys
+import numpy as np
+
+import ade.evaluate as evaluate
+from ade.utils.configure import PDConfig
+
+
+def do_eval(args): 
+    """evaluate metrics"""
+    labels = []
+    with open(args.evaluation_file, 'r') as fr: 
+        for line in fr: 
+            tokens = line.strip().split('\t')
+            assert len(tokens) == 3 
+            label = int(tokens[2])
+            labels.append(label)
+
+    scores = []
+    with open(args.output_prediction_file, 'r') as fr: 
+        for line in fr:
+            tokens = line.strip().split('\t')
+            assert len(tokens) == 2
+            score = tokens[1].strip("[]").split()
+            score = np.array(score)
+            score = score.astype(np.float64)
+            scores.append(score)
+
+    if args.loss_type == 'CLS': 
+        recall_dict = evaluate.evaluate_Recall(list(zip(scores, labels)))
+        mean_score = sum(scores) / len(scores)
+        print('mean score: %.6f' % mean_score)
+        print('evaluation recall result:')
+        print('1_in_2: %.6f\t1_in_10: %.6f\t2_in_10: %.6f\t5_in_10: %.6f' %
+             (recall_dict['1_in_2'], recall_dict['1_in_10'],
+             recall_dict['2_in_10'], recall_dict['5_in_10']))
+    elif args.loss_type == 'L2': 
+        scores = [x[0] for x in scores]
+        mean_score = sum(scores) / len(scores)
+        cor = evaluate.evaluate_cor(scores, labels)
+        print('mean score: %.6f\nevaluation cor results:%.6f' %
+            (mean_score, cor))
+    else:
+        raise ValueError
+    
+
+if __name__ == "__main__": 
+    args = PDConfig(yaml_file="./data/config/ade.yaml")
+    args.build()
+
+    do_eval(args)
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/inference_model.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/inference_model.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""save inference model for auto dialogue evaluation"""
+
+import os
+import sys
+import six
+import numpy as np
+import time
+import multiprocessing
+import paddle
+import paddle.fluid as fluid
+
+import ade.reader as reader
+from ade_net import create_net
+
+from ade.utils.configure import PDConfig
+from ade.utils.input_field import InputField
+from ade.utils.model_check import check_cuda
+import ade.utils.save_load_io as save_load_io
+
+
+def do_save_inference_model(args):
+
+    test_prog = fluid.default_main_program()
+    startup_prog = fluid.default_startup_program()
+
+    with fluid.program_guard(test_prog, startup_prog):
+        test_prog.random_seed = args.random_seed
+        startup_prog.random_seed = args.random_seed
+
+        with fluid.unique_name.guard():
+
+            context_wordseq = fluid.layers.data(
+                    name='context_wordseq', shape=[1], dtype='int64', lod_level=1)
+            response_wordseq = fluid.layers.data(
+                    name='response_wordseq', shape=[1], dtype='int64', lod_level=1)
+            labels = fluid.layers.data(
+                    name='labels', shape=[1], dtype='int64')
+
+            input_inst = [context_wordseq, response_wordseq, labels]
+            input_field = InputField(input_inst)
+            data_reader = fluid.io.PyReader(feed_list=input_inst, 
+                        capacity=4, iterable=False)
+
+            logits = create_net(
+                    is_training=False,
+                    model_input=input_field, 
+                    args=args
+                )
+
+    if args.use_cuda:
+        place = fluid.CUDAPlace(0)
+    else:
+        place = fluid.CPUPlace()
+
+    exe = fluid.Executor(place)
+    exe.run(startup_prog)
+
+    assert (args.init_from_params) or (args.init_from_pretrain_model)
+    
+    if args.init_from_params:
+        save_load_io.init_from_params(args, exe, test_prog)
+    elif args.init_from_pretrain_model:
+        save_load_io.init_from_pretrain_model(args, exe, test_prog)
+
+    # saving inference model
+    fluid.io.save_inference_model(
+            args.inference_model_dir,
+            feeded_var_names=[
+                input_field.context_wordseq.name, 
+                input_field.response_wordseq.name,
+            ],
+            target_vars=[
+                logits,
+            ],
+            executor=exe,
+            main_program=test_prog,
+            model_filename="model.pdmodel",
+            params_filename="params.pdparams")
+
+    print("save inference model at %s" % (args.inference_model_dir))
+
+
+if __name__ == "__main__":
+    args = PDConfig(yaml_file="./data/config/ade.yaml")   
+    args.build()
+
+    check_cuda(args.use_cuda)
+
+    do_save_inference_model(args)
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/main.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/main.py
-"""
-Auto dialogue evaluation task
-"""
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 import os
 import sys
-import six
 import numpy as np
-import time
-import multiprocessing
+
 import paddle
 import paddle.fluid as fluid
-import reader as reader
-import evaluation as eva
-import init as init
-
-try:
-    import cPickle as pickle  #python 2
-except ImportError as e:
-    import pickle  #python 3
-
-sys.path.append('../../models/dialogue_model_toolkit/auto_dialogue_evaluation/')
-sys.path.append('../../models/')
-from net import Network
-import config
-
-from model_check import check_cuda
-
-
-def train(args):
-    """Train
-    """
-    if not os.path.exists(args.save_path):
-        os.makedirs(args.save_path)
-
-    net = Network(args.vocab_size, args.emb_size, args.hidden_size)
-
-    train_program = fluid.Program()
-    train_startup = fluid.Program()
-    if "CE_MODE_X" in os.environ:
-        train_program.random_seed = 110
-        train_startup.random_seed = 110
-    with fluid.program_guard(train_program, train_startup):
-        with fluid.unique_name.guard():
-            logits, loss = net.network(args.loss_type)
-            loss.persistable = True
-            logits.persistable = True
-            # gradient clipping
-            fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
-                max=1.0, min=-1.0))
-
-            optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
-            optimizer.minimize(loss)
-            print("begin memory optimization ...")
-            fluid.memory_optimize(train_program)
-            print("end memory optimization ...")
-
-    test_program = fluid.Program()
-    test_startup = fluid.Program()
-    if "CE_MODE_X" in os.environ:
-        test_program.random_seed = 110
-        test_startup.random_seed = 110
-    with fluid.program_guard(test_program, test_startup):
-        with fluid.unique_name.guard():
-            logits, loss = net.network(args.loss_type)
-            loss.persistable = True
-            logits.persistable = True
-
-    test_program = test_program.clone(for_test=True)
-    if args.use_cuda:
-        place = fluid.CUDAPlace(0)
-        dev_count = fluid.core.get_cuda_device_count()
-    else:
-        place = fluid.CPUPlace()
-        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-
-    print("device count %d" % dev_count)
-    print("theoretical memory usage: ")
-    print(
-        fluid.contrib.memory_usage(
-            program=train_program, batch_size=args.batch_size))
-
-    exe = fluid.Executor(place)
-    exe.run(train_startup)
-    exe.run(test_startup)
-
-    train_exe = fluid.ParallelExecutor(
-        use_cuda=args.use_cuda, loss_name=loss.name, main_program=train_program)
-
-    test_exe = fluid.ParallelExecutor(
-        use_cuda=args.use_cuda,
-        main_program=test_program,
-        share_vars_from=train_exe)
-
-    if args.word_emb_init is not None:
-        print("start loading word embedding init ...")
-        if six.PY2:
-            word_emb = np.array(pickle.load(open(args.word_emb_init,
-                                                 'rb'))).astype('float32')
-        else:
-            word_emb = np.array(
-                pickle.load(
-                    open(args.word_emb_init, 'rb'), encoding="bytes")).astype(
-                        'float32')
-        net.set_word_embedding(word_emb, place)
-        print("finish init word embedding  ...")
-
-    print("start loading data ...")
-
-    def train_with_feed(batch_data):
-        """
-        Train on one batch
-        """
-        #to do get_feed_names
-        feed_dict = dict(zip(net.get_feed_names(), batch_data))
-
-        cost = train_exe.run(feed=feed_dict, fetch_list=[loss.name])
-        return cost[0]
-
-    def test_with_feed(batch_data):
-        """
-        Test on one batch
-        """
-        feed_dict = dict(zip(net.get_feed_names(), batch_data))
-
-        score = test_exe.run(feed=feed_dict, fetch_list=[logits.name])
-        return score[0]
-
-    def evaluate():
-        """
-        Evaluate to choose model
-        """
-        val_batches = reader.batch_reader(args.val_path, args.batch_size, place,
-                                          args.max_len, 1)
-        scores = []
-        labels = []
-        for batch in val_batches:
-            scores.extend(test_with_feed(batch))
-            labels.extend([x[0] for x in batch[2]])
-
-        return eva.evaluate_Recall(list(zip(scores, labels)))
-
-    def save_exe(step, best_recall):
-        """
-        Save exe conditional
-        """
-        recall_dict = evaluate()
-        print('evaluation recall result:')
-        print('1_in_2: %s\t1_in_10: %s\t2_in_10: %s\t5_in_10: %s' %
-              (recall_dict['1_in_2'], recall_dict['1_in_10'],
-               recall_dict['2_in_10'], recall_dict['5_in_10']))
-
-        if recall_dict['1_in_10'] > best_recall and step != 0:
-            fluid.io.save_inference_model(
-                args.save_path,
-                net.get_feed_inference_names(),
-                logits,
-                exe,
-                main_program=train_program)
-
-            print("Save model at step %d ... " % step)
-            print(
-                time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
-            best_recall = recall_dict['1_in_10']
-        return best_recall
-
-    # train over different epoches
-    global_step, train_time = 0, 0.0
-    best_recall = 0
-    for epoch in six.moves.xrange(args.num_scan_data):
-        train_batches = reader.batch_reader(args.train_path, args.batch_size,
-                                            place, args.max_len,
-                                            args.sample_pro)
-
-        begin_time = time.time()
-        sum_cost = 0
-        ce_cost = 0
-        for batch in train_batches:
-            if (args.save_path is not None) and (
-                    global_step % args.save_step == 0):
-                best_recall = save_exe(global_step, best_recall)
-
-            cost = train_with_feed(batch)
-            global_step += 1
-            sum_cost += cost.mean()
-            ce_cost = cost.mean()
-
-            if global_step % args.print_step == 0:
-                print('training step %s avg loss %s' %
-                      (global_step, sum_cost / args.print_step))
-                sum_cost = 0
-
-        pass_time_cost = time.time() - begin_time
-        train_time += pass_time_cost
-        print("Pass {0}, pass_time_cost {1}"
-              .format(epoch, "%2.2f sec" % pass_time_cost))
-        if "CE_MODE_X" in os.environ and epoch == args.num_scan_data - 1:
-            card_num = get_cards()
-            print("kpis\ttrain_duration_card%s\t%s" %
-                  (card_num, pass_time_cost))
-            print("kpis\ttrain_loss_card%s\t%s" % (card_num, ce_cost))
-
-
-def finetune(args):
-    """
-    Finetune
-    """
-    if not os.path.exists(args.save_path):
-        os.makedirs(args.save_path)
-
-    net = Network(args.vocab_size, args.emb_size, args.hidden_size)
-
-    train_program = fluid.Program()
-    train_startup = fluid.Program()
-    if "CE_MODE_X" in os.environ:
-        train_program.random_seed = 110
-        train_startup.random_seed = 110
-    with fluid.program_guard(train_program, train_startup):
-        with fluid.unique_name.guard():
-            logits, loss = net.network(args.loss_type)
-            loss.persistable = True
-            logits.persistable = True
-            # gradient clipping
-            fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
-                max=1.0, min=-1.0))
-
-            optimizer = fluid.optimizer.Adam(
-                learning_rate=fluid.layers.exponential_decay(
-                    learning_rate=args.learning_rate,
-                    decay_steps=400,
-                    decay_rate=0.9,
-                    staircase=True))
-            optimizer.minimize(loss)
-            print("begin memory optimization ...")
-            fluid.memory_optimize(train_program)
-            print("end memory optimization ...")
-
-    test_program = fluid.Program()
-    test_startup = fluid.Program()
-    if "CE_MODE_X" in os.environ:
-        test_program.random_seed = 110
-        test_startup.random_seed = 110
-    with fluid.program_guard(test_program, test_startup):
-        with fluid.unique_name.guard():
-            logits, loss = net.network(args.loss_type)
-            loss.persistable = True
-            logits.persistable = True
-
-    test_program = test_program.clone(for_test=True)
-    if args.use_cuda:
-        place = fluid.CUDAPlace(0)
-        dev_count = fluid.core.get_cuda_device_count()
-    else:
-        place = fluid.CPUPlace()
-        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-
-    print("device count %d" % dev_count)
-    print("theoretical memory usage: ")
-    print(
-        fluid.contrib.memory_usage(
-            program=train_program, batch_size=args.batch_size))
-
-    exe = fluid.Executor(place)
-    exe.run(train_startup)
-    exe.run(test_startup)
-
-    train_exe = fluid.ParallelExecutor(
-        use_cuda=args.use_cuda, loss_name=loss.name, main_program=train_program)
-
-    test_exe = fluid.ParallelExecutor(
-        use_cuda=args.use_cuda,
-        main_program=test_program,
-        share_vars_from=train_exe)
-
-    if args.init_model:
-        init.init_pretraining_params(
-            exe, args.init_model, main_program=train_startup)
-        print('sccuess init %s' % args.init_model)
-
-    print("start loading data ...")
-
-    def train_with_feed(batch_data):
-        """
-        Train on one batch
-        """
-        #to do get_feed_names
-        feed_dict = dict(zip(net.get_feed_names(), batch_data))
-
-        cost = train_exe.run(feed=feed_dict, fetch_list=[loss.name])
-        return cost[0]
-
-    def test_with_feed(batch_data):
-        """
-        Test on one batch
-        """
-        feed_dict = dict(zip(net.get_feed_names(), batch_data))
-
-        score = test_exe.run(feed=feed_dict, fetch_list=[logits.name])
-        return score[0]
-
-    def evaluate():
-        """
-        Evaluate to choose model
-        """
-        val_batches = reader.batch_reader(args.val_path, args.batch_size, place,
-                                          args.max_len, 1)
-        scores = []
-        labels = []
-        for batch in val_batches:
-            scores.extend(test_with_feed(batch))
-            labels.extend([x[0] for x in batch[2]])
-        scores = [x[0] for x in scores]
-        return eva.evaluate_cor(scores, labels)
-
-    def save_exe(step, best_cor):
-        """
-        Save exe conditional
-        """
-        cor = evaluate()
-        print('evaluation cor relevance %s' % cor)
-        if cor > best_cor and step != 0:
-            fluid.io.save_inference_model(
-                args.save_path,
-                net.get_feed_inference_names(),
-                logits,
-                exe,
-                main_program=train_program)
-            print("Save model at step %d ... " % step)
-            print(
-                time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
-            best_cor = cor
-        return best_cor
-
-    # train over different epoches
-    global_step, train_time = 0, 0.0
-    best_cor = 0.0
-    pre_index = -1
-    for epoch in six.moves.xrange(args.num_scan_data):
-        train_batches = reader.batch_reader(args.train_path, args.batch_size,
-                                            place, args.max_len,
-                                            args.sample_pro)
-
-        begin_time = time.time()
-        sum_cost = 0
-        for batch in train_batches:
-            if (args.save_path is not None) and (
-                    global_step % args.save_step == 0):
-                best_cor = save_exe(global_step, best_cor)
-
-            cost = train_with_feed(batch)
-            global_step += 1
-            sum_cost += cost.mean()
-
-            if global_step % args.print_step == 0:
-                print('training step %s avg loss %s' %
-                      (global_step, sum_cost / args.print_step))
-                sum_cost = 0
-
-        pass_time_cost = time.time() - begin_time
-        train_time += pass_time_cost
-        print("Pass {0}, pass_time_cost {1}"
-              .format(epoch, "%2.2f sec" % pass_time_cost))
-
-
-def evaluate(args):
-    """
-    Evaluate model for both pretrained and finetuned 
-    """
-    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-
-    t0 = time.time()
-
-    with fluid.scope_guard(fluid.Scope()):
-        infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model(
-            args.init_model, exe)
-        print('init model %s' % args.init_model)
-
-        global_step, infer_time = 0, 0.0
-        test_batches = reader.batch_reader(args.test_path, args.batch_size,
-                                           place, args.max_len, 1)
-        scores = []
-        labels = []
-        for batch in test_batches:
-            logits = exe.run(infer_program,
-                             feed={
-                                 'context_wordseq': batch[0],
-                                 'response_wordseq': batch[1]
-                             },
-                             fetch_list=fetch_vars)
-            logits = [x[0] for x in logits[0]]
-
-            scores.extend(logits)
-            labels.extend([x[0] for x in batch[2]])
-
-        print('len scores: %s len labels: %s' % (len(scores), len(labels)))
-        mean_score = sum(scores) / len(scores)
-        if args.loss_type == 'CLS':
-            recall_dict = eva.evaluate_Recall(list(zip(scores, labels)))
-            print('mean score: %s' % mean_score)
-            print('evaluation recall result:')
-            print('1_in_2: %s\t1_in_10: %s\t2_in_10: %s\t5_in_10: %s' %
-                  (recall_dict['1_in_2'], recall_dict['1_in_10'],
-                   recall_dict['2_in_10'], recall_dict['5_in_10']))
-        elif args.loss_type == 'L2':
-            cor = eva.evaluate_cor(scores, labels)
-            print('mean score: %s\nevaluation cor resuls:%s' %
-                  (mean_score, cor))
-        else:
-            raise ValueError
-
-        t1 = time.time()
-        print("finish evaluate model:%s on data:%s time_cost(s):%.2f" %
-              (args.init_model, args.test_path, t1 - t0))
-
-
-def infer(args):
-    """
-    Inference function 
-    """
-    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
-    exe = fluid.Executor(place)
-
-    t0 = time.time()
-
-    with fluid.scope_guard(fluid.Scope()):
-        infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model(
-            args.init_model, exe)
-
-        global_step, infer_time = 0, 0.0
-        test_batches = reader.batch_reader(args.test_path, args.batch_size,
-                                           place, args.max_len, 1)
-        scores = []
-        for batch in test_batches:
-            logits = exe.run(infer_program,
-                             feed={
-                                 'context_wordseq': batch[0],
-                                 'response_wordseq': batch[1]
-                             },
-                             fetch_list=fetch_vars)
-            logits = [x[0] for x in logits[0]]
-
-            scores.extend(logits)
-
-        in_file = open(args.test_path, 'r')
-        out_path = args.test_path + '.infer'
-        out_file = open(out_path, 'w')
-        for line, s in zip(in_file, scores):
-            out_file.write('%s\t%s\n' % (line.strip(), s))

-        in_file.close()
-        out_file.close()
+from eval import do_eval
+from train import do_train
+from predict import do_predict
+from inference_model import do_save_inference_model

-        t1 = time.time()
-        print("finish infer model:%s out file: %s time_cost(s):%.2f" %
-              (args.init_model, out_path, t1 - t0))
+from ade.utils.configure import PDConfig


-def get_cards():
-    num = 0
-    cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
-    if cards != '':
-        num = len(cards.split(","))
-    return num
+if __name__ == "__main__":

+    args = PDConfig(yaml_file="./data/config/ade.yaml")
+    args.build()
+    args.Print()

-def main():
-    """
-    main
-    """
-    args = config.parse_args()
-    config.print_arguments(args)
+    if args.do_train:
+        do_train(args)

-    check_cuda(args.use_cuda)
+    if args.do_predict:
+        do_predict(args)

-    if args.do_train == True:
-        if args.loss_type == 'CLS':
-            train(args)
-        elif args.loss_type == 'L2':
-            finetune(args)
-        else:
-            raise ValueError
-    elif args.do_val == True:
-        evaluate(args)
-    elif args.do_infer == True:
-        infer(args)
-    else:
-        raise ValueError
+    if args.do_eval:
+        do_eval(args)

+    if args.do_save_inference_model:
+        do_save_inference_model(args)

-if __name__ == '__main__':
-    main()
+# vim: set ts=4 sw=4 sts=4 tw=100:
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/model_files/download_model.sh
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/model_files/download_model.sh
-#matching pretrained
-wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_matching_pretrained-1.0.0.tar.gz
-tar -xzf auto_dialogue_evaluation_matching_pretrained-1.0.0.tar.gz
-
-#finetuned
-for task in seq2seq_naive seq2seq_att keywords human
-do
-  wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/auto_dialogue_evaluation_${task}_finetuned-1.0.0.tar.gz
-  tar -xzf auto_dialogue_evaluation_${task}_finetuned-1.0.0.tar.gz
-done
-
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/predict.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/predict.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""predict auto dialogue evaluation task"""
+
+import os
+import sys
+import six
+import time
+import numpy as np
+import multiprocessing
+
+import paddle
+import paddle.fluid as fluid
+
+import ade.reader as reader
+from ade_net import create_net
+
+from ade.utils.configure import PDConfig
+from ade.utils.input_field import InputField
+from ade.utils.model_check import check_cuda
+import ade.utils.save_load_io as save_load_io
+
+
+def do_predict(args): 
+    """
+    predict function
+    """
+    test_prog = fluid.default_main_program()
+    startup_prog = fluid.default_startup_program()
+
+    with fluid.program_guard(test_prog, startup_prog):
+        test_prog.random_seed = args.random_seed
+        startup_prog.random_seed = args.random_seed
+
+        with fluid.unique_name.guard():
+
+            context_wordseq = fluid.layers.data(
+                    name='context_wordseq', shape=[1], dtype='int64', lod_level=1)
+            response_wordseq = fluid.layers.data(
+                    name='response_wordseq', shape=[1], dtype='int64', lod_level=1)
+            labels = fluid.layers.data(
+                    name='labels', shape=[1], dtype='int64')
+
+            input_inst = [context_wordseq, response_wordseq, labels]
+            input_field = InputField(input_inst)
+            data_reader = fluid.io.PyReader(feed_list=input_inst, 
+                        capacity=4, iterable=False)
+
+            logits = create_net(
+                    is_training=False,
+                    model_input=input_field, 
+                    args=args
+                )
+            logits.persistable = True
+
+            fetch_list = [logits.name]
+    #for_test is True if change the is_test attribute of operators to True
+    test_prog = test_prog.clone(for_test=True)
+    if args.use_cuda: 
+        place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
+    else: 
+        place = fluid.CPUPlace()
+
+    exe = fluid.Executor(place)
+    exe.run(startup_prog)
+
+    assert (args.init_from_params) or (args.init_from_pretrain_model)
+    if args.init_from_params:
+        save_load_io.init_from_params(args, exe, test_prog)
+    if args.init_from_pretrain_model:
+        save_load_io.init_from_pretrain_model(args, exe, test_prog)
+
+    compiled_test_prog = fluid.CompiledProgram(test_prog)
+
+    processor = reader.DataProcessor(
+        data_path=args.predict_file,
+        max_seq_length=args.max_seq_len, 
+        batch_size=args.batch_size)
+
+    batch_generator = processor.data_generator(
+        place=place,
+        phase="test",
+        shuffle=False, 
+        sample_pro=1)
+    num_test_examples = processor.get_num_examples(phase='test')
+
+    data_reader.decorate_batch_generator(batch_generator)
+    data_reader.start()
+
+    scores = []
+    while True: 
+        try: 
+            results = exe.run(compiled_test_prog, fetch_list=fetch_list)
+            scores.extend(results[0])
+        except fluid.core.EOFException:
+            data_reader.reset()
+            break
+
+    scores = scores[: num_test_examples]
+    with open(args.output_prediction_file, 'w') as fw: 
+        for index, score in enumerate(scores): 
+            fw.write("%s\t%s\n" % (index, score))
+
+
+if __name__ == "__main__": 
+    
+    args = PDConfig(yaml_file="./data/config/ade.yaml")
+    args.build()
+    args.Print()
+
+    check_cuda(args.use_cuda)
+
+    do_predict(args) 
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/reader.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/reader.py
-"""
-Reader for auto dialogue evaluation
-"""
-
-import sys
-import time
-import numpy as np
-import random
-
-import paddle.fluid as fluid
-import paddle
-
-def to_lodtensor(data, place):
-    """
-    Convert to LODtensor 
-    """
-    seq_lens = [len(seq) for seq in data]
-    cur_len = 0
-    lod = [cur_len]
-    for l in seq_lens:
-        cur_len += l
-        lod.append(cur_len)
-    flattened_data = np.concatenate(data, axis=0).astype("int64")
-    flattened_data = flattened_data.reshape([len(flattened_data), 1])
-    res = fluid.LoDTensor()
-    res.set(flattened_data, place)
-    res.set_lod([lod])
-    return res
-
-
-def reshape_batch(batch, place):
-    """
-    Reshape batch
-    """
-    context_reshape = to_lodtensor([dat[0] for dat in batch], place)
-    response_reshape = to_lodtensor([dat[1] for dat in batch], place)
-    label_reshape = [dat[2] for dat in batch]
-    return (context_reshape, response_reshape, label_reshape)
-
-
-def batch_reader(data_path,
-                 batch_size,
-                 place,
-                 max_len=50,
-                 sample_pro=1):
-    """
-    Yield batch
-    """
-    batch = []
-    with open(data_path, 'r') as f:
-        Print = True
-        for line in f:
-            #sample for training data
-            if sample_pro < 1:
-                if random.random() > sample_pro:
-                    continue
-
-            tokens = line.strip().split('\t')
-            assert len(tokens) == 3
-            context = [int(x) for x in tokens[0].split()[:max_len]]
-            response = [int(x) for x in tokens[1].split()[:max_len]]
-
-            label = [int(tokens[2])]
-            #label = int(tokens[2])
-            instance = (context, response, label)
-
-            if len(batch) < batch_size:
-                batch.append(instance)
-            else:
-                if len(batch) == batch_size:
-                    yield reshape_batch(batch, place)
-                batch = [instance]
-
-        if len(batch) == batch_size:
-            yield reshape_batch(batch, place)
-
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/run.sh
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/run.sh
-export CUDA_VISIBLE_DEVICES=4
-export FLAGS_eager_delete_tensor_gb=0.0
-
-#pretrain
-python -u main.py \
-  --do_train True \
-  --use_cuda \
-  --save_path model_files_tmp/matching_pretrained \
-  --train_path data/unlabel_data/train.ids \
-  --val_path data/unlabel_data/val.ids
-
-#finetune based on one task
-TASK=human
-python -u main.py \
-  --do_train True \
-  --loss_type L2 \
-  --use_cuda \
-  --save_path model_files_tmp/${TASK}_finetuned \
-  --init_model model_files/matching_pretrained \
-  --train_path data/label_data/$TASK/train.ids \
-  --val_path data/label_data/$TASK/val.ids \
-  --print_step 1 \
-  --save_step 1 \
-  --num_scan_data 50 
-
-#evaluate pretrained model by Recall
-python -u main.py \
-  --do_val True \
-  --use_cuda \
-  --test_path data/unlabel_data/test.ids \
-  --init_model model_files/matching_pretrained \
-  --loss_type CLS
-
-#evaluate pretrained model by Cor
-for task in seq2seq_naive seq2seq_att keywords human
-do
-  echo $task
-  python -u main.py \
-    --do_val True \
-    --use_cuda \
-    --test_path data/label_data/$task/test.ids \
-    --init_model model_files/matching_pretrained \
-    --loss_type L2
-done
-
-#evaluate finetuned model by Cor
-for task in seq2seq_naive seq2seq_att keywords human
-do
-  echo $task
-  python -u main.py \
-    --do_val True \
-    --use_cuda \
-    --test_path data/label_data/$task/test.ids \
-    --init_model model_files/${task}_finetuned \
-    --loss_type L2 
-done
-  
-#infer
-TASK=human
-python -u main.py \
-  --do_infer True \
-  --use_cuda \
-  --test_path data/label_data/$TASK/test.ids \
-  --init_model model_files/${TASK}_finetuned
+#!/bin/bash
+
+export FLAGS_sync_nccl_allreduce=0
+export FLAGS_eager_delete_tensor_gb=1.0
+
+export CUDA_VISIBLE_DEVICES=0
+
+if [ $# -ne 2 ]
+then
+  echo "please input parameters: TRAIN_TYPE and TASK_TYPE"
+  echo "TRAIN_TYPE: [matching|seq2seq_naive|seq2seq_att|keywords|human]"
+  echo "TASK_TYPE: [train|predict|evaluate|inference]"
+  exit 255
+fi
+
+TRAIN_TYPE=$1
+TASK_TYPE=$2
+
+typeset -l TRAIN_TYPE
+typeset -l TASK_TYPE
+
+candi_train_type=("matching" "seq2seq_naive" "seq2seq_att" "keywords" "human")
+candi_task_type=("train" "predict" "evaluate" "inference")
+
+if [[ ! "${candi_train_type[@]}" =~ ${TRAIN_TYPE} ]] 
+then
+  echo "unknown parameter: ${TRAIN_TYPE}, just support [matching|seq2seq_naive|seq2seq_att|keywords|human]"
+  exit 255
+fi
+
+if [[ ! "${candi_task_type[@]}" =~ ${TASK_TYPE} ]] 
+then
+  echo "unknown parameter: ${TRAIN_TYPE}, just support [train|predict|evaluate|inference]"
+  exit 255
+fi
+
+INPUT_PATH="data/input/data"
+OUTPUT_PATH="data/output"
+SAVED_MODELS="data/saved_models"
+INFERENCE_MODEL="data/inference_models"
+PYTHON_PATH="python"
+
+#train pretrain model
+if  [ ! "$CUDA_VISIBLE_DEVICES" ]
+then
+  use_cuda=false
+else
+  use_cuda=true
+fi
+
+#training
+function pretrain_train()
+{
+
+    pretrain_model_path="${SAVED_MODELS}/matching_pretrained"
+    if [ ! -d ${pretrain_model_path} ]
+    then
+        mkdir ${pretrain_model_path}
+    fi
+
+    ${PYTHON_PATH} -u main.py \
+      --do_train=true \
+      --use_cuda=${1} \
+      --loss_type="CLS" \
+      --max_seq_len=50 \
+      --save_model_path=${pretrain_model_path} \
+      --save_param="params" \
+      --training_file="${INPUT_PATH}/unlabel_data/train.ids" \
+      --epoch=20 \
+      --print_step=1 \
+      --save_step=400 \
+      --batch_size=256 \
+      --hidden_size=256 \
+      --emb_size=256 \
+      --vocab_size=484016 \
+      --learning_rate=0.001 \
+      --sample_pro 0.1 
+}
+
+function finetuning_train()
+{
+    save_model_path="${SAVED_MODELS}/${2}_finetuned"
+    if [ ! -d ${save_model_path} ]
+    then
+        mkdir ${save_model_path}
+    fi
+    ${PYTHON_PATH} -u main.py \
+      --do_train=true \
+      --use_cuda=${1} \
+      --loss_type="L2" \
+      --max_seq_len=50 \
+      --init_from_pretrain_model="${SAVED_MODELS}/matching_pretrained/params/step_final" \
+      --save_model_path=${save_model_path} \
+      --save_param="params" \
+      --training_file="${INPUT_PATH}/label_data/${2}/train.ids" \
+      --epoch=50 \
+      --print_step=1 \
+      --save_step=400 \
+      --batch_size=256 \
+      --hidden_size=256 \
+      --emb_size=256 \
+      --vocab_size=484016 \
+      --learning_rate=0.001 \
+      --sample_pro 0.1
+}
+
+#predict
+function pretrain_predict()
+{
+    ${PYTHON_PATH} -u main.py \
+      --do_predict=true \
+      --use_cuda=${1} \
+      --predict_file="${INPUT_PATH}/unlabel_data/test.ids" \
+      --init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \
+      --loss_type="CLS" \
+      --output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \
+      --max_seq_len=50 \
+      --batch_size=256 \
+      --hidden_size=256 \
+      --emb_size=256 \
+      --vocab_size=484016
+}
+
+function finetuning_predict()
+{
+    ${PYTHON_PATH} -u main.py \
+      --do_predict=true \
+      --use_cuda=${1} \
+      --predict_file="${INPUT_PATH}/label_data/${2}/test.ids" \
+      --init_from_params=${SAVED_MODELS}/trained_models/${2}_finetuned/params \
+      --loss_type="L2" \
+      --output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \
+      --max_seq_len=50 \
+      --batch_size=256 \
+      --hidden_size=256 \
+      --emb_size=256 \
+      --vocab_size=484016
+}
+
+#evaluate
+function pretrain_eval()
+{
+    ${PYTHON_PATH} -u main.py \
+      --do_eval=true \
+      --use_cuda=${1} \
+      --evaluation_file="${INPUT_PATH}/unlabel_data/test.ids" \
+      --output_prediction_file="${OUTPUT_PATH}/pretrain_matching_predict" \
+      --loss_type="CLS" 
+}
+
+function finetuning_eval()
+{
+    ${PYTHON_PATH} -u main.py \
+      --do_eval=true \
+      --use_cuda=${1} \
+      --evaluation_file="${INPUT_PATH}/label_data/${2}/test.ids" \
+      --output_prediction_file="${OUTPUT_PATH}/finetuning_${2}_predict" \
+      --loss_type="L2" 
+}
+
+#inference model
+function pretrain_infer()
+{
+    ${PYTHON_PATH} -u main.py \
+      --do_save_inference_model=true \
+      --use_cuda=${1} \
+      --init_from_params="${SAVED_MODELS}/trained_models/matching_pretrained/params" \
+      --inference_model_dir="${INFERENCE_MODEL}/matching_inference_model"
+
+}
+function finetuning_infer()
+{
+    ${PYTHON_PATH} -u main.py \
+      --do_save_inference_model=true \
+      --use_cuda=${1} \
+      --init_from_params="${SAVED_MODELS}/trained_models/${2}_finetuned/params" \
+      --inference_model_dir="${INFERENCE_MODEL}/${2}_inference_model"
+}
+
+if [ "${TASK_TYPE}" = "train" ]
+then
+    echo "train ${TRAIN_TYPE} start.........."
+    if [ "${TRAIN_TYPE}" = "matching" ]
+    then
+        pretrain_train ${use_cuda};
+    else
+        finetuning_train ${use_cuda} ${TRAIN_TYPE};
+    fi
+elif [ "${TASK_TYPE}" = "predict" ]
+then
+    echo "predict ${TRAIN_TYPE} start.........."
+    if [ "${TRAIN_TYPE}" = "matching" ]
+    then
+        pretrain_predict ${use_cuda};
+    else
+        finetuning_predict ${use_cuda} ${TRAIN_TYPE};
+    fi
+elif [ "${TASK_TYPE}" = "evaluate" ]
+then
+    echo "evaluate ${TRAIN_TYPE} start.........."
+    if [ "${TRAIN_TYPE}" = "matching" ]
+    then
+        pretrain_eval ${use_cuda};
+    else
+        finetuning_eval ${use_cuda} ${TRAIN_TYPE};
+    fi
+elif [ "${TASK_TYPE}" = "inference" ]
+then
+    echo "save ${TRAIN_TYPE} inference model start.........."
+    if [ "${TRAIN_TYPE}" = "matching" ]
+    then
+        pretrain_infer ${use_cuda};
+    else
+        finetuning_infer ${use_cuda} ${TRAIN_TYPE};
+    fi
+else
+    exit 255
+fi
+
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/run_CPU.sh
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/run_CPU.sh
-export FLAGS_eager_delete_tensor_gb=0.0
-
-#pretrain
-python -u main.py \
-  --do_train True \
-  --sample_pro 0.9 \
-  --batch_size 64 \
-  --save_path model_files_tmp/matching_pretrained \
-  --train_path data/unlabel_data/train.ids \
-  --val_path data/unlabel_data/val.ids
-
-#finetune based on one task
-TASK=human
-python -u main.py \
-  --do_train True \
-  --loss_type L2 \
-  --save_path model_files_tmp/${TASK}_finetuned \
-  --init_model model_files/matching_pretrained \
-  --train_path data/label_data/$TASK/train.ids \
-  --val_path data/label_data/$TASK/val.ids \
-  --print_step 1 \
-  --save_step 1 \
-  --num_scan_data 50 
-
-#evaluate pretrained model by Recall
-python -u main.py \
-  --do_val True \
-  --test_path data/unlabel_data/test.ids \
-  --init_model model_files/matching_pretrained \
-  --loss_type CLS
-
-#evaluate pretrained model by Cor
-for task in seq2seq_naive seq2seq_att keywords human
-do
-  echo $task
-  python -u main.py \
-    --do_val True \
-    --test_path data/label_data/$task/test.ids \
-    --init_model model_files/matching_pretrained \
-    --loss_type L2
-done
-
-#evaluate finetuned model by Cor
-for task in seq2seq_naive seq2seq_att keywords human
-do
-  echo $task
-  python -u main.py \
-    --do_val True \
-    --test_path data/label_data/$task/test.ids \
-    --init_model model_files/${task}_finetuned \
-    --loss_type L2 
-done
-  
-#infer
-TASK=human
-python -u main.py \
-  --do_infer True \
-  --test_path data/label_data/$TASK/test.ids \
-  --init_model model_files/${TASK}_finetuned
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/train.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/train.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.                                                                                                      
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""train auto dialogue evaluation task"""
+
+import os
+import sys
+import six
+import time
+import numpy as np
+import multiprocessing
+
+import paddle
+import paddle.fluid as fluid
+
+import ade.reader as reader
+from ade_net import create_net, set_word_embedding
+
+from ade.utils.configure import PDConfig
+from ade.utils.input_field import InputField
+from ade.utils.model_check import check_cuda
+import ade.utils.save_load_io as save_load_io
+
+try: 
+    import cPickle as pickle  #python 2
+except ImportError as e:
+    import pickle  #python 3
+
+
+def do_train(args):
+    """train function"""
+
+    train_prog = fluid.default_main_program()
+    startup_prog = fluid.default_startup_program()
+
+    with fluid.program_guard(train_prog, startup_prog):
+        train_prog.random_seed = args.random_seed
+        startup_prog.random_seed = args.random_seed
+
+        with fluid.unique_name.guard(): 
+            context_wordseq = fluid.layers.data(
+                    name='context_wordseq', shape=[1], dtype='int64', lod_level=1)
+            response_wordseq = fluid.layers.data(
+                    name='response_wordseq', shape=[1], dtype='int64', lod_level=1)
+            labels = fluid.layers.data(
+                    name='labels', shape=[1], dtype='int64')
+
+            input_inst = [context_wordseq, response_wordseq, labels]
+            input_field = InputField(input_inst)
+            data_reader = fluid.io.PyReader(feed_list=input_inst, 
+                        capacity=4, iterable=False)
+
+            loss = create_net(
+                    is_training=True,
+                    model_input=input_field, 
+                    args=args
+                )
+            loss.persistable = True
+            # gradient clipping
+            fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
+                max=1.0, min=-1.0))
+            optimizer = fluid.optimizer.Adam(learning_rate=args.learning_rate)
+            optimizer.minimize(loss)
+
+            if args.use_cuda:
+                dev_count = fluid.core.get_cuda_device_count()
+                place = fluid.CUDAPlace(int(os.getenv('FLAGS_selected_gpus', '0')))
+            else: 
+                dev_count = int(
+                    os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
+                place = fluid.CPUPlace()
+
+            processor = reader.DataProcessor(
+                data_path=args.training_file,
+                max_seq_length=args.max_seq_len, 
+                batch_size=args.batch_size)
+
+            batch_generator = processor.data_generator(
+                place=place,
+                phase="train",
+                shuffle=True, 
+                sample_pro=args.sample_pro)
+
+            num_train_examples = processor.get_num_examples(phase='train')
+            max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size
+
+            print("Num train examples: %d" % num_train_examples)
+            print("Max train steps: %d" % max_train_steps)
+
+    data_reader.decorate_batch_generator(batch_generator)
+
+    exe = fluid.Executor(place)
+    exe.run(startup_prog)
+
+    assert (args.init_from_checkpoint == "") or (
+        args.init_from_pretrain_model == "")
+
+    #init from some checkpoint, to resume the previous training
+    if args.init_from_checkpoint: 
+        save_load_io.init_from_checkpoint(args, exe, train_prog)
+    #init from some pretrain models, to better solve the current task
+    if args.init_from_pretrain_model: 
+        save_load_io.init_from_pretrain_model(args, exe, train_prog)
+
+    if args.word_emb_init:
+        print("start loading word embedding init ...")
+        if six.PY2:
+            word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'))).astype('float32')
+        else:
+            word_emb = np.array(pickle.load(open(args.word_emb_init, 'rb'), encoding="bytes")).astype('float32')
+        set_word_embedding(word_emb, place)
+        print("finish init word embedding  ...")
+
+    build_strategy = fluid.compiler.BuildStrategy()
+    build_strategy.enable_inplace = True
+
+    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
+                loss_name=loss.name, build_strategy=build_strategy)
+
+    steps = 0
+    begin_time = time.time()
+
+    for epoch_step in range(args.epoch): 
+        data_reader.start()
+        sum_loss = 0.0
+        ce_loss = 0.0
+        while True:
+            try: 
+                steps += 1
+                fetch_list = [loss.name]
+                outputs = exe.run(compiled_train_prog, fetch_list=fetch_list)
+                np_loss = outputs
+                sum_loss += np.array(np_loss).mean()
+                ce_loss = np.array(np_loss).mean()
+
+                if steps % args.print_steps == 0: 
+                    print('epoch: %d, step: %s, avg loss %s' % (epoch_step, steps, sum_loss / args.print_steps))
+                    sum_loss = 0.0
+
+                if steps % args.save_steps == 0: 
+                    if args.save_checkpoint:
+                        save_load_io.save_checkpoint(args, exe, train_prog, "step_" + str(steps))
+                    if args.save_param: 
+                        save_load_io.save_param(args, exe, train_prog, "step_" + str(steps))
+            except fluid.core.EOFException:  
+                data_reader.reset()
+                break
+    
+    if args.save_checkpoint: 
+        save_load_io.save_checkpoint(args, exe, train_prog, "step_final")
+    if args.save_param: 
+        save_load_io.save_param(args, exe, train_prog, "step_final")
+
+    def get_cards(): 
+        num = 0
+        cards = os.environ.get('CUDA_VISIBLE_DEVICES', '')
+        if cards != '': 
+            num = len(cards.split(","))
+        return num
+
+    if args.enable_ce: 
+        card_num = get_cards()
+        pass_time_cost = time.time() - begin_time
+        print("test_card_num", card_num)
+        print("kpis\ttrain_duration_card%s\t%s" % (card_num, pass_time_cost))
+        print("kpis\ttrain_loss_card%s\t%f" % (card_num, ce_loss))
+        
+
+if __name__ == '__main__':
+    
+    args = PDConfig(yaml_file="./data/config/ade.yaml")
+    args.build()
+    args.Print()
+
+    check_cuda(args.use_cuda)
+    
+    do_train(args)
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/README.md
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/README.md
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/_ce.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/_ce.py
-# this file is only used for continuous evaluation test!
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""this file is only used for continuous evaluation test!"""

 import os
 import sys

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/config/dgu.yaml
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/config/dgu.yaml
+task_name: ""
+data_dir: ""
+bert_config_path: ""
+init_from_checkpoint: ""
+init_from_params: ""
+init_from_pretrain_model: ""
+inference_model_dir: ""
+save_model_path: ""
+save_checkpoint: ""
+save_param: ""
+lr_scheduler: "linear_warmup_decay"
+weight_decay: 0.01
+warmup_proportion: 0.1
+save_steps: 1000
+use_fp16: False
+loss_scaling: 1.0
+print_steps: 20
+evaluation_file: ""
+output_prediction_file: ""
+vocab_path: ""
+max_seq_len: 128
+batch_size: 2
+verbose: False
+do_lower_case: False
+random_seed: 0
+use_cuda: True
+task_name: ""
+in_tokens: False
+do_save_inference_model: False
+enable_ce: "store_true"
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/inference_models/inference_models.md
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/inference_models/inference_models.md
+save inference model directory
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/input/input.md
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/input/input.md
+input train and test data directory
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/output/output.md
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/output/output.md
+save predict results output directory
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/pretrain_model/pretrain_model.md
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/pretrain_model/pretrain_model.md
+pretrain model directory: in this module, we use bert as pretrain model
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/saved_models/saved_models.md
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/data/saved_models/saved_models.md
+save user finetuning models and trained model we provided
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/__init__.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/__init__.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/batching.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/batching.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -74,7 +74,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
    return batch_tokens, mask_label, mask_pos


-def prepare_batch_data(insts, 
+def prepare_batch_data(task_name,
+                       insts, 
                       max_len, 
                       total_token_num,
                       voc_size=0,
@@ -90,7 +91,6 @@ def prepare_batch_data(insts,
    2. generate Tensor of position
    3. generate self attention mask, [shape: batch_size *  max_len * max_len]
    """
-
    batch_src_ids = [inst[0] for inst in insts]
    batch_sent_ids = [inst[1] for inst in insts]
    batch_pos_ids = [inst[2] for inst in insts]
@@ -99,10 +99,10 @@ def prepare_batch_data(insts,
    # or unique id

    if isinstance(insts[0][3], list): 
-        if max_len != -1: 
+        if task_name == "atis_slot": 
            labels_list = [inst[3] + [0] * (max_len - len(inst[3])) for inst in insts]
            labels_list = [np.array(labels_list).astype("int64").reshape([-1, max_len])]
-        else: 
+        elif task_name == "dstc2": 
            labels_list = [inst[3] for inst in insts]
            labels_list = [np.array(labels_list).astype("int64")]
    else: 

--- a/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/bert.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/bert.py
@@ -24,9 +24,7 @@ import json
 import numpy as np
 import paddle.fluid as fluid

-_WORK_DIR = os.path.split(os.path.realpath(__file__))[0]
-sys.path.append(os.path.join(_WORK_DIR, "../../"))
-from transformer_encoder import encoder, pre_process_layer
+from dgu.transformer_encoder import encoder, pre_process_layer


 class BertConfig(object):

--- a/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/define_paradigm.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/define_paradigm.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -49,15 +49,9 @@ class Paradigm(object):
            bias_attr=fluid.ParamAttr(
                name="cls_out_b", initializer=fluid.initializer.Constant(0.)))

-        if params['is_prediction']:
+        if not params['is_training']:
            probs = fluid.layers.softmax(logits)
-            feed_targets_name = [
-                params['src_ids'].name,
-                params['pos_ids'].name,
-                params['sent_ids'].name,
-                params['input_mask'].name,
-            ]
-            results = {"probs": probs, "feed_targets_name": feed_targets_name}
+            results = {"probs": probs}
            return results

        ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
@@ -67,11 +61,6 @@ class Paradigm(object):
        accuracy = fluid.layers.accuracy(
            input=probs, label=params['labels'], total=num_seqs)

-        loss.persistable = True
-        probs.persistable = True
-        accuracy.persistable = True
-        num_seqs.persistable = True
-
        results = {
            "loss": loss,
            "probs": probs,
@@ -105,22 +94,13 @@ class Paradigm(object):
        loss = fluid.layers.mean(x=ce_loss)
        probs = fluid.layers.sigmoid(logits)

-        if params['is_prediction']:
-            feed_targets_name = [
-                params['src_ids'].name,
-                params['pos_ids'].name,
-                params['sent_ids'].name,
-                params['input_mask'].name,
-            ]
-            results = {"probs": probs, "feed_targets_name": feed_targets_name}
+        if not params['is_training']:
+            results = {"probs": probs}
            return results

        num_seqs = fluid.layers.tensor.fill_constant(
            shape=[1], dtype='int64', value=1)

-        loss.persistable = True
-        probs.persistable = True
-        num_seqs.persistable = True
        results = {"loss": loss, "probs": probs, "num_seqs": num_seqs}
        return results

@@ -138,14 +118,8 @@ class Paradigm(object):
            fluid.layers.argmax(
                logits, axis=1), dtype='int32')

-        if params['is_prediction']:
-            feed_targets_name = [
-                params['src_ids'].name,
-                params['pos_ids'].name,
-                params['sent_ids'].name,
-                params['input_mask'].name,
-            ]
-            results = {"probs": probs, "feed_targets_name": feed_targets_name}
+        if not params['is_training']:
+            results = {"probs": probs}
            return results

        num_seqs = fluid.layers.tensor.fill_constant(
@@ -160,10 +134,6 @@ class Paradigm(object):
                label=fluid.layers.reshape(params['labels'], [-1, 1]))
        loss = fluid.layers.mean(x=ce_loss)

-        loss.persistable = True
-        probs.persistable = True
-        accuracy.persistable = True
-        num_seqs.persistable = True
        results = {
            "loss": loss,
            "probs": probs,

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/define_predict_pack.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/define_predict_pack.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/eval_metrics.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/eval_metrics.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,9 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""
-evaluate task metrics
-"""
+"""evaluate task metrics"""

 import sys

@@ -22,15 +20,12 @@ class EvalDA(object):
    """
    evaluate da testset, swda|mrda
    """
-    def __init__(self, task_name, pred): 
+    def __init__(self, task_name, pred, refer): 
        """
        predict file
        """
        self.pred_file = pred
-        if task_name == 'swda': 
-            self.refer_file = "./data/swda/test.txt"
-        elif task_name == "mrda": 
-            self.refer_file = "./data/mrda/test.txt"
+        self.refer_file = refer

    def load_data(self): 
        """
@@ -70,12 +65,12 @@ class EvalATISIntent(object):
    """
    evaluate da testset, swda|mrda
    """
-    def __init__(self, pred): 
+    def __init__(self, pred, refer): 
        """
        predict file
        """
        self.pred_file = pred
-        self.refer_file = "./data/atis/atis_intent/test.txt"
+        self.refer_file = refer

    def load_data(self): 
        """
@@ -115,12 +110,12 @@ class EvalATISSlot(object):
    """
    evaluate atis slot
    """
-    def __init__(self, pred): 
+    def __init__(self, pred, refer): 
        """
        pred file
        """
        self.pred_file = pred
-        self.refer_file = "./data/atis/atis_slot/test.txt"
+        self.refer_file = refer

    def load_data(self): 
        """
@@ -200,12 +195,12 @@ class EvalUDC(object):
    """
    evaluate udc
    """
-    def __init__(self, pred): 
+    def __init__(self, pred, refer): 
        """
        predict file
        """
        self.pred_file = pred
-        self.refer_file = "./data/udc/test.txt"
+        self.refer_file = refer

    def load_data(self): 
        """
@@ -272,13 +267,13 @@ class EvalDSTC2(object):
    """
    evaluate dst testset, dstc2
    """
-    def __init__(self, task_name, pred):
+    def __init__(self, task_name, pred, refer):
        """
        predict file
        """
        self.task_name = task_name
        self.pred_file = pred
-        self.refer_file = "./data/dstc2/%s/test.txt" % self.task_name
+        self.refer_file = refer

    def load_data(self): 
        """
@@ -320,15 +315,10 @@ class EvalDSTC2(object):
        return metrics_out


-if __name__ == "__main__": 
-    if len(sys.argv[1:]) < 2: 
-        print("please input task_name predict_file")
-
-    task_name = sys.argv[1]
-    pred_file = sys.argv[2]
-
+def evaluate(task_name, pred_file, refer_file): 
+    """evaluate task metrics"""
    if task_name.lower() == 'udc': 
-        eval_inst = EvalUDC(pred_file)
+        eval_inst = EvalUDC(pred_file, refer_file)
        eval_metrics = eval_inst.evaluate()
        print("MATCHING TASK: %s metrics in testset: " % task_name)
        print("R1@2: %s" % eval_metrics[0])
@@ -337,29 +327,29 @@ if __name__ == "__main__":
        print("R5@10: %s" % eval_metrics[3])

    elif task_name.lower() in ['swda', 'mrda']: 
-        eval_inst = EvalDA(task_name.lower(), pred_file)
+        eval_inst = EvalDA(task_name.lower(), pred_file, refer_file)
        eval_metrics = eval_inst.evaluate()
        print("DA TASK: %s metrics in testset: " % task_name)
        print("ACC: %s" % eval_metrics)

    elif task_name.lower() == 'atis_intent': 
-        eval_inst = EvalATISIntent(pred_file)
+        eval_inst = EvalATISIntent(pred_file, refer_file)
        eval_metrics = eval_inst.evaluate()
        print("INTENTION TASK: %s metrics in testset: " % task_name)
        print("ACC: %s" % eval_metrics)

    elif task_name.lower() == 'atis_slot': 
-        eval_inst = EvalATISSlot(pred_file)
+        eval_inst = EvalATISSlot(pred_file, refer_file)
        eval_metrics = eval_inst.evaluate()
        print("SLOT FILLING TASK: %s metrics in testset: " % task_name)
        print(eval_metrics)
    elif task_name.lower() in ['dstc2', 'dstc2_asr']: 
-        eval_inst = EvalDSTC2(task_name.lower(), pred_file)
+        eval_inst = EvalDSTC2(task_name.lower(), pred_file, refer_file)
        eval_metrics = eval_inst.evaluate()
        print("DST TASK: %s metrics in testset: " % task_name)
        print("JOINT ACC: %s" % eval_metrics[0])
    elif task_name.lower() == "multi-woz": 
-        eval_inst = EvalMultiWoz(pred_file)
+        eval_inst = EvalMultiWoz(pred_file, refer_file)
        eval_metrics = eval_inst.evaluate()
        print("DST TASK: %s metrics in testset: " % task_name)
        print("JOINT ACC: %s" % eval_metrics[0])
@@ -367,3 +357,14 @@ if __name__ == "__main__":
    else: 
        print("task name not in [udc|swda|mrda|atis_intent|atis_slot|dstc2|dstc2_asr|multi-woz]")

+
+if __name__ == "__main__": 
+    if len(sys.argv[1:]) < 3: 
+        print("please input task_name predict_file reference_file")
+
+    task_name = sys.argv[1]
+    pred_file = sys.argv[2]
+    refer_file = sys.argv[3]
+
+
+    evaluate(task_name, pred_file, refer_file)
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/optimization.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/optimization.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@ from __future__ import print_function

 import numpy as np
 import paddle.fluid as fluid
-from utils.fp16 import create_master_params_grads, master_param_to_train_param
+from dgu.utils.fp16 import create_master_params_grads, master_param_to_train_param


 def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/prepare_data_and_model.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/prepare_data_and_model.sh
+#!/bin/bash
+
+#check data directory
+cd ..
+echo "Start download data and models.............."
+if [ ! -d "data" ]; then
+	echo "Directory data does not exist, make new data directory"
+	mkdir data
+fi
+cd data
+
+#check configure file
+if [ ! -d "config" ]; then
+	echo "config directory not exist........"
+	exit 255
+else
+	if [ ! -f "config/dgu.yaml" ]; then
+		echo "config file dgu.yaml has been lost........"
+		exit 255
+	fi
+fi
+
+#check and download input data
+if [ ! -d "input" ]; then
+	echo "Directory input does not exist, make new input directory"
+	mkdir input
+fi
+cd input
+wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz
+tar -xvf dmtk_data_1.0.0.tar.gz
+rm dmtk_data_1.0.0.tar.gz
+cd ..
+
+#check and download pretrain model
+if [ ! -d "pretrain_model" ]; then
+	echo "Directory pretrain_model does not exist, make new pretrain_model directory"
+	mkdir pretrain_model
+fi
+cd pretrain_model
+wget --no-check-certificate https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz
+tar -xvf uncased_L-12_H-768_A-12.tar.gz
+rm uncased_L-12_H-768_A-12.tar.gz
+cd ..
+
+#check and download inferenece model
+if [ ! -d "inference_models" ]; then
+	echo "Directory inferenece_model does not exist, make new inferenece_model directory"
+	mkdir inference_models
+fi
+
+#check output
+if [ ! -d "output" ]; then
+	echo "Directory output does not exist, make new output directory"
+	mkdir output
+fi
+
+#check saved model
+if [ ! -d "saved_models" ]; then
+	echo "Directory saved_models does not exist, make new saved_models directory"
+	mkdir saved_models
+fi
+cd saved_models
+wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/dgu_models_2.0.0.tar.gz
+tar -xvf dgu_models_2.0.0.tar.gz
+rm dgu_models_2.0.0.tar.gz
+cd ..
+
+echo "Finish.............."
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/reader/data_reader.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/reader/data_reader.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,9 +13,10 @@
 # limitations under the License.
 """data reader"""
 import os
-import types
 import csv
+import types
 import numpy as np
+
 import tokenization
 from batching import prepare_batch_data

@@ -40,9 +41,7 @@ class DataProcessor(object):

        np.random.seed(random_seed)

-        self.current_train_example = -1
        self.num_examples = {'train': -1, 'dev': -1, 'test': -1}
-        self.current_train_epoch = -1
        self.task_name = task_name

    def get_train_examples(self, data_dir):
@@ -57,7 +56,8 @@ class DataProcessor(object):
        """Gets a collection of `InputExample`s for prediction."""
        raise NotImplementedError()

-    def get_labels(self):
+    @staticmethod
+    def get_labels():
        """Gets the list of labels for this data set."""
        raise NotImplementedError()

@@ -90,6 +90,7 @@ class DataProcessor(object):
                            return_num_token=False): 
        """generate batch data"""
        return prepare_batch_data(
+            self.task_name,
            batch_data,
            max_len,
            total_token_num,
@@ -119,18 +120,13 @@ class DataProcessor(object):
                "Unknown phase, which should be in ['train', 'dev', 'test'].")
        return self.num_examples[phase]

-    def get_train_progress(self):
-        """Gets progress for training phase."""
-        return self.current_train_example, self.current_train_epoch
-
-    def data_generator(self, batch_size, phase='train', epoch=1, shuffle=False):
+    def data_generator(self, batch_size, phase='train', shuffle=False):
        """
        Generate data for train, dev or test.
    
        Args:
          batch_size: int. The batch size of generated data.
          phase: string. The phase for which to generate data.
-          epoch: int. Total epoches to generate data.
          shuffle: bool. Whether to shuffle examples.
        """
        if phase == 'train':
@@ -148,25 +144,19 @@ class DataProcessor(object):

        def instance_reader(): 
            """generate instance data"""
-            for epoch_index in range(epoch):
-                if shuffle:
-                    np.random.shuffle(examples)
-                if phase == 'train':
-                    self.current_train_epoch = epoch_index
-                for (index, example) in enumerate(examples):
-                    if phase == 'train':
-                        self.current_train_example = index + 1
-                    feature = self.convert_example(
-                        index, example,
-                        self.get_labels(), self.max_seq_len, self.tokenizer)
-
-                    instance = self.generate_instance(feature)
-                    yield instance
+            if shuffle:
+                np.random.shuffle(examples)
+            for (index, example) in enumerate(examples): 
+                feature = self.convert_example(
+                    index, example,
+                    self.get_labels(), self.max_seq_len, self.tokenizer)
+                instance = self.generate_instance(feature)
+                yield instance

        def batch_reader(reader, batch_size, in_tokens): 
            """read batch data"""
            batch, total_token_num, max_len = [], 0, 0
-            for instance in reader():
+            for instance in reader(): 
                token_ids, sent_ids, pos_ids, label = instance[:4]
                max_len = max(max_len, len(token_ids))
                if in_tokens:
@@ -294,7 +284,8 @@ class UDCProcessor(DataProcessor):
        examples = self._create_examples(lines, "test")
        return examples

-    def get_labels(self): 
+    @staticmethod
+    def get_labels(): 
        """See base class."""
        return ["0", "1"]

@@ -327,7 +318,8 @@ class SWDAProcessor(DataProcessor):
        examples = self._create_examples(lines, "test")
        return examples

-    def get_labels(self): 
+    @staticmethod
+    def get_labels(): 
        """See base class."""
        labels = range(42)
        labels = [str(label) for label in labels]
@@ -362,7 +354,8 @@ class MRDAProcessor(DataProcessor):
        examples = self._create_examples(lines, "test")
        return examples

-    def get_labels(self): 
+    @staticmethod
+    def get_labels(): 
        """See base class."""
        labels = range(42)
        labels = [str(label) for label in labels]
@@ -406,7 +399,8 @@ class ATISSlotProcessor(DataProcessor):
        examples = self._create_examples(lines, "test")
        return examples

-    def get_labels(self): 
+    @staticmethod
+    def get_labels(): 
        """See base class."""
        labels = range(130)
        labels = [str(label) for label in labels]
@@ -449,7 +443,8 @@ class ATISIntentProcessor(DataProcessor):
        examples = self._create_examples(lines, "test")
        return examples

-    def get_labels(self):
+    @staticmethod
+    def get_labels():
        """See base class."""
        labels = range(26)
        labels = [str(label) for label in labels]
@@ -522,7 +517,8 @@ class DSTC2Processor(DataProcessor):
        examples = self._create_examples(lines, "test")
        return examples

-    def get_labels(self):
+    @staticmethod
+    def get_labels():
        """See base class."""
        labels = range(217)
        labels = [str(label) for label in labels]
@@ -598,7 +594,8 @@ class MULTIWOZProcessor(DataProcessor):
        examples = self._create_examples(lines, "test")
        return examples

-    def get_labels(self):
+    @staticmethod
+    def get_labels():
        """See base class."""
        labels = range(722)
        labels = [str(label) for label in labels]
@@ -666,8 +663,8 @@ def convert_tokens(tokens, sep_id, tokenizer):
            tok_text = tokenizer.tokenize(text)
            ids = tokenizer.convert_tokens_to_ids(tok_text)
            tokens_ids.extend(ids)
-            if text != tokens[-1]: 
-                tokens_ids.append(sep_id)
+            tokens_ids.append(sep_id)
+        tokens_ids = tokens_ids[: -1]
    else: 
        tok_text = tokenizer.tokenize(tokens)
        tokens_ids = tokenizer.convert_tokens_to_ids(tok_text)
@@ -719,7 +716,8 @@ def convert_single_example(ex_index, example, label_list, max_seq_length,
    if tokens_b_ids: 
        tokens_b_ids = tokens_b_ids[:min(limit_length, len(tokens_b_ids))]
    else: 
-        tokens_a_ids = tokens_a_ids[len(tokens_a_ids) - max_seq_length + 2:]
+        if len(tokens_a_ids) > max_seq_length - 2:
+            tokens_a_ids = tokens_a_ids[len(tokens_a_ids) - max_seq_length + 2:]
    if not tokens_c_ids: 
        if len(tokens_a_ids) > max_seq_length - len(tokens_b_ids) - 3: 
            tokens_a_ids = tokens_a_ids[len(tokens_a_ids) - max_seq_length + len(tokens_b_ids) + 3:]
@@ -727,13 +725,10 @@ def convert_single_example(ex_index, example, label_list, max_seq_length,
        if len(tokens_a_ids) + len(tokens_b_ids) + len(tokens_c_ids) > max_seq_length - 4: 
            left_num = max_seq_length - len(tokens_b_ids) - 4
            if len(tokens_a_ids) > len(tokens_c_ids): 
-                if not tokens_c_ids: 
-                    tokens_a_ids = tokens_a_ids[max(0, len(tokens_a_ids) - left_num):]
-                else: 
-                    suffix_num = int(left_num / 2)
-                    tokens_c_ids = tokens_c_ids[: min(len(tokens_c_ids), suffix_num)]
-                    prefix_num = left_num - len(tokens_c_ids)
-                    tokens_a_ids = tokens_a_ids[max(0, len(tokens_a_ids) - prefix_num):]
+                suffix_num = int(left_num / 2)
+                tokens_c_ids = tokens_c_ids[: min(len(tokens_c_ids), suffix_num)]
+                prefix_num = left_num - len(tokens_c_ids)
+                tokens_a_ids = tokens_a_ids[max(0, len(tokens_a_ids) - prefix_num):]
            else: 
                if not tokens_a_ids: 
                    tokens_c_ids = tokens_c_ids[max(0, len(tokens_c_ids) - left_num):]

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/scripts/README.md
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/scripts/README.md
+scripts：运行数据处理脚本目录, 将官方公开数据集转换成模型所需训练数据格式
+运行命令：
+  sh run_build_data.sh [udc|swda|mrda|atis|dstc2]
+
+1)、生成MATCHING任务所需要的训练集、开发集、测试集时:
+sh run_build_data.sh udc
+生成数据在dialogue_general_understanding/data/input/data/udc
+
+2)、生成DA任务所需要的训练集、开发集、测试集时: 
+  sh run_build_data.sh swda
+  sh run_build_data.sh mrda
+  生成数据分别在dialogue_general_understanding/data/input/data/swda和dialogue_general_understanding/data/input/data/mrda
+
+3)、生成DST任务所需的训练集、开发集、测试集时:
+  sh run_build_data.sh dstc2
+  生成数据分别在dialogue_general_understanding/data/input/data/dstc2
+
+4)、生成意图解析, 槽位识别任务所需训练集、开发集、测试集时:
+  sh run_build_data.sh atis
+  生成槽位识别数据在dialogue_general_understanding/data/input/data/atis/atis_slot
+  生成意图识别数据在dialogue_general_understanding/data/input/data/atis/atis_intent
+
+
+
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_atis_dataset.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_atis_dataset.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 """build swda train dev test dataset"""

 import json
@@ -32,11 +33,11 @@ class ATIS(object):
        self.slot_dict = {"PAD": 0, "O": 1}
        self.intent_id = 0
        self.intent_dict = dict()
-        self.src_dir = "../data/atis/source_data"
-        self.out_slot_dir = "../data/atis/atis_slot"
-        self.out_intent_dir = "../data/atis/atis_intent"
-        self.map_tag_slot = "../data/atis/atis_slot/map_tag_slot_id.txt"
-        self.map_tag_intent = "../data/atis/atis_intent/map_tag_intent_id.txt"
+        self.src_dir = "../../data/input/data/atis/source_data"
+        self.out_slot_dir = "../../data/input/data/atis/atis_slot"
+        self.out_intent_dir = "../../data/input/data/atis/atis_intent"
+        self.map_tag_slot = "../../data/input/data/atis/atis_slot/map_tag_slot_id.txt"
+        self.map_tag_intent = "../../data/input/data/atis/atis_intent/map_tag_intent_id.txt"

    def _load_file(self, data_type): 
        """

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_dstc2_dataset.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_dstc2_dataset.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,12 +31,12 @@ class DSTC2(object):
        init instance
        """
        self.map_tag_dict = {}
-        self.out_dir = "../data/dstc2/dstc2"
-        self.out_asr_dir = "../data/dstc2/dstc2_asr"
+        self.out_dir = "../../data/input/data/dstc2/dstc2"
+        self.out_asr_dir = "../../data/input/data/dstc2/dstc2_asr"
        self.data_list = "./conf/dstc2.conf"
-        self.map_tag = "../data/dstc2/dstc2/map_tag_id.txt"
-        self.src_dir = "../data/dstc2/source_data"
-        self.onto_json = "../data/dstc2/source_data/ontology_dstc2.json"
+        self.map_tag = "../../data/input/data/dstc2/dstc2/map_tag_id.txt"
+        self.src_dir = "../../data/input/data/dstc2/source_data"
+        self.onto_json = "../../data/input/data/dstc2/source_data/ontology_dstc2.json"
        self._load_file()
        self._load_ontology()


--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_mrda_dataset.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_mrda_dataset.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,11 +31,11 @@ class MRDA(object):
        """
        self.tag_id = 0
        self.map_tag_dict = dict()
-        self.out_dir = "../data/mrda"
+        self.out_dir = "../../data/input/data/mrda"
        self.data_list = "./conf/mrda.conf"
-        self.map_tag = "../data/mrda/map_tag_id.txt"
-        self.voc_map_tag = "../data/mrda/source_data/icsi_mrda+hs_corpus_050512/classmaps/map_01b_expanded_w_split"
-        self.src_dir = "../data/mrda/source_data/icsi_mrda+hs_corpus_050512/data"
+        self.map_tag = "../../data/input/data/mrda/map_tag_id.txt"
+        self.voc_map_tag = "../../data/input/data/mrda/source_data/icsi_mrda+hs_corpus_050512/classmaps/map_01b_expanded_w_split"
+        self.src_dir = "../../data/input/data/mrda/source_data/icsi_mrda+hs_corpus_050512/data"
        self._load_file()
        self.tag_dict = commonlib.load_voc(self.voc_map_tag)


--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_swda_dataset.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_swda_dataset.py
-# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,10 +31,10 @@ class SWDA(object):
        """
        self.tag_id = 0
        self.map_tag_dict = dict()
-        self.out_dir = "../data/swda"
+        self.out_dir = "../../data/input/data/swda"
        self.data_list = "./conf/swda.conf"
-        self.map_tag = "../data/swda/map_tag_id.txt"
-        self.src_dir = "../data/swda/source_data/swda"
+        self.map_tag = "../../data/input/data/swda/map_tag_id.txt"
+        self.src_dir = "../../data/input/data/swda/source_data/swda"
        self._load_file()

    def _load_file(self): 

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/commonlib.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/commonlib.py
-#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/dstc2.conf
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/dstc2.conf
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/mrda.conf
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/mrda.conf
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/multi-woz.conf
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/multi-woz.conf
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/swda.conf
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/swda.conf
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/run_build_data.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/run_build_data.sh
@@ -15,8 +15,8 @@ then
 elif [[ "${TASK_DATA}" =~ "atis" ]]
 then
  python build_atis_dataset.py
-  cat ../data/atis/atis_slot/test.txt > ../data/atis/atis_slot/dev.txt
-  cat ../data/atis/atis_intent/test.txt > ../data/atis/atis_intent/dev.txt
+  cat ../../data/input/data/atis/atis_slot/test.txt > ../../data/input/data/atis/atis_slot/dev.txt 
+  cat ../../data/input/data/atis/atis_intent/test.txt > ../../data/input/data/atis/atis_intent/dev.txt
 elif [ "${TASK_DATA}" = "dstc2" ]
 then
  python build_dstc2_dataset.py

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/tokenization.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/tokenization.py
-# coding=utf-8
-# Copyright 2018 The Google AI Language Team Authors.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#         http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 """Tokenization classes."""

 from __future__ import absolute_import

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/transformer_encoder.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/transformer_encoder.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Transformer encoder."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from functools import partial
+
+import paddle.fluid as fluid
+import paddle.fluid.layers as layers
+
+
+def multi_head_attention(queries,
+                         keys,
+                         values,
+                         attn_bias,
+                         d_key,
+                         d_value,
+                         d_model,
+                         n_head=1,
+                         dropout_rate=0.,
+                         cache=None,
+                         param_initializer=None,
+                         name='multi_head_att'):
+    """
+    Multi-Head Attention. Note that attn_bias is added to the logit before
+    computing softmax activiation to mask certain selected positions so that
+    they will not considered in attention weights.
+    """
+    keys = queries if keys is None else keys
+    values = keys if values is None else values
+
+    if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3):
+        raise ValueError(
+            "Inputs: quries, keys and values should all be 3-D tensors.")
+
+    def __compute_qkv(queries, keys, values, n_head, d_key, d_value):
+        """
+        Add linear projection to queries, keys, and values.
+        """
+        q = layers.fc(input=queries,
+                      size=d_key * n_head,
+                      num_flatten_dims=2,
+                      param_attr=fluid.ParamAttr(
+                          name=name + '_query_fc.w_0',
+                          initializer=param_initializer),
+                      bias_attr=name + '_query_fc.b_0')
+        k = layers.fc(input=keys,
+                      size=d_key * n_head,
+                      num_flatten_dims=2,
+                      param_attr=fluid.ParamAttr(
+                          name=name + '_key_fc.w_0',
+                          initializer=param_initializer),
+                      bias_attr=name + '_key_fc.b_0')
+        v = layers.fc(input=values,
+                      size=d_value * n_head,
+                      num_flatten_dims=2,
+                      param_attr=fluid.ParamAttr(
+                          name=name + '_value_fc.w_0',
+                          initializer=param_initializer),
+                      bias_attr=name + '_value_fc.b_0')
+        return q, k, v
+
+    def __split_heads(x, n_head):
+        """
+        Reshape the last dimension of inpunt tensor x so that it becomes two
+        dimensions and then transpose. Specifically, input a tensor with shape
+        [bs, max_sequence_length, n_head * hidden_dim] then output a tensor
+        with shape [bs, n_head, max_sequence_length, hidden_dim].
+        """
+        hidden_size = x.shape[-1]
+        # The value 0 in shape attr means copying the corresponding dimension
+        # size of the input as the output dimension size.
+        reshaped = layers.reshape(
+            x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True)
+
+        # permuate the dimensions into:
+        # [batch_size, n_head, max_sequence_len, hidden_size_per_head]
+        return layers.transpose(x=reshaped, perm=[0, 2, 1, 3])
+
+    def __combine_heads(x):
+        """
+        Transpose and then reshape the last two dimensions of inpunt tensor x
+        so that it becomes one dimension, which is reverse to __split_heads.
+        """
+        if len(x.shape) == 3:
+            return x
+        if len(x.shape) != 4:
+            raise ValueError("Input(x) should be a 4-D Tensor.")
+
+        trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
+        # The value 0 in shape attr means copying the corresponding dimension
+        # size of the input as the output dimension size.
+        return layers.reshape(
+            x=trans_x,
+            shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]],
+            inplace=True)
+
+    def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate):
+        """
+        Scaled Dot-Product Attention
+        """
+        scaled_q = layers.scale(x=q, scale=d_key ** -0.5)
+        product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
+        if attn_bias:
+            product += attn_bias
+        weights = layers.softmax(product)
+        if dropout_rate:
+            weights = layers.dropout(
+                weights,
+                dropout_prob=dropout_rate,
+                dropout_implementation="upscale_in_train",
+                is_test=False)
+        out = layers.matmul(weights, v)
+        return out
+
+    q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value)
+
+    if cache is not None:  # use cache and concat time steps
+        # Since the inplace reshape in __split_heads changes the shape of k and
+        # v, which is the cache input for next time step, reshape the cache
+        # input from the previous time step first.
+        k = cache["k"] = layers.concat(
+            [layers.reshape(
+                cache["k"], shape=[0, 0, d_model]), k], axis=1)
+        v = cache["v"] = layers.concat(
+            [layers.reshape(
+                cache["v"], shape=[0, 0, d_model]), v], axis=1)
+
+    q = __split_heads(q, n_head)
+    k = __split_heads(k, n_head)
+    v = __split_heads(v, n_head)
+
+    ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key,
+                                                  dropout_rate)
+
+    out = __combine_heads(ctx_multiheads)
+
+    # Project back to the model size.
+    proj_out = layers.fc(input=out,
+                         size=d_model,
+                         num_flatten_dims=2,
+                         param_attr=fluid.ParamAttr(
+                             name=name + '_output_fc.w_0',
+                             initializer=param_initializer),
+                         bias_attr=name + '_output_fc.b_0')
+    return proj_out
+
+
+def positionwise_feed_forward(x,
+                              d_inner_hid,
+                              d_hid,
+                              dropout_rate,
+                              hidden_act,
+                              param_initializer=None,
+                              name='ffn'):
+    """
+    Position-wise Feed-Forward Networks.
+    This module consists of two linear transformations with a ReLU activation
+    in between, which is applied to each position separately and identically.
+    """
+    hidden = layers.fc(input=x,
+                       size=d_inner_hid,
+                       num_flatten_dims=2,
+                       act=hidden_act,
+                       param_attr=fluid.ParamAttr(
+                           name=name + '_fc_0.w_0',
+                           initializer=param_initializer),
+                       bias_attr=name + '_fc_0.b_0')
+    if dropout_rate:
+        hidden = layers.dropout(
+            hidden,
+            dropout_prob=dropout_rate,
+            dropout_implementation="upscale_in_train",
+            is_test=False)
+    out = layers.fc(input=hidden,
+                    size=d_hid,
+                    num_flatten_dims=2,
+                    param_attr=fluid.ParamAttr(
+                        name=name + '_fc_1.w_0', initializer=param_initializer),
+                    bias_attr=name + '_fc_1.b_0')
+    return out
+
+
+def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.,
+                           name=''):
+    """
+    Add residual connection, layer normalization and droput to the out tensor
+    optionally according to the value of process_cmd.
+    This will be used before or after multi-head attention and position-wise
+    feed-forward networks.
+    """
+    for cmd in process_cmd:
+        if cmd == "a":  # add residual connection
+            out = out + prev_out if prev_out else out
+        elif cmd == "n":  # add layer normalization
+            out_dtype = out.dtype
+            if out_dtype == fluid.core.VarDesc.VarType.FP16:
+                out = layers.cast(x=out, dtype="float32")
+            out = layers.layer_norm(
+                out,
+                begin_norm_axis=len(out.shape) - 1,
+                param_attr=fluid.ParamAttr(
+                    name=name + '_layer_norm_scale',
+                    initializer=fluid.initializer.Constant(1.)),
+                bias_attr=fluid.ParamAttr(
+                    name=name + '_layer_norm_bias',
+                    initializer=fluid.initializer.Constant(0.)))
+            if out_dtype == fluid.core.VarDesc.VarType.FP16:
+                out = layers.cast(x=out, dtype="float16")
+        elif cmd == "d":  # add dropout
+            if dropout_rate:
+                out = layers.dropout(
+                    out,
+                    dropout_prob=dropout_rate,
+                    dropout_implementation="upscale_in_train",
+                    is_test=False)
+    return out
+
+
+pre_process_layer = partial(pre_post_process_layer, None)
+post_process_layer = pre_post_process_layer
+
+
+def encoder_layer(enc_input,
+                  attn_bias,
+                  n_head,
+                  d_key,
+                  d_value,
+                  d_model,
+                  d_inner_hid,
+                  prepostprocess_dropout,
+                  attention_dropout,
+                  relu_dropout,
+                  hidden_act,
+                  preprocess_cmd="n",
+                  postprocess_cmd="da",
+                  param_initializer=None,
+                  name=''):
+    """The encoder layers that can be stacked to form a deep encoder.
+    This module consits of a multi-head (self) attention followed by
+    position-wise feed-forward networks and both the two components companied
+    with the post_process_layer to add residual connection, layer normalization
+    and droput.
+    """
+    attn_output = multi_head_attention(
+        pre_process_layer(
+            enc_input,
+            preprocess_cmd,
+            prepostprocess_dropout,
+            name=name + '_pre_att'),
+        None,
+        None,
+        attn_bias,
+        d_key,
+        d_value,
+        d_model,
+        n_head,
+        attention_dropout,
+        param_initializer=param_initializer,
+        name=name + '_multi_head_att')
+    attn_output = post_process_layer(
+        enc_input,
+        attn_output,
+        postprocess_cmd,
+        prepostprocess_dropout,
+        name=name + '_post_att')
+    ffd_output = positionwise_feed_forward(
+        pre_process_layer(
+            attn_output,
+            preprocess_cmd,
+            prepostprocess_dropout,
+            name=name + '_pre_ffn'),
+        d_inner_hid,
+        d_model,
+        relu_dropout,
+        hidden_act,
+        param_initializer=param_initializer,
+        name=name + '_ffn')
+    return post_process_layer(
+        attn_output,
+        ffd_output,
+        postprocess_cmd,
+        prepostprocess_dropout,
+        name=name + '_post_ffn')
+
+
+def encoder(enc_input,
+            attn_bias,
+            n_layer,
+            n_head,
+            d_key,
+            d_value,
+            d_model,
+            d_inner_hid,
+            prepostprocess_dropout,
+            attention_dropout,
+            relu_dropout,
+            hidden_act,
+            preprocess_cmd="n",
+            postprocess_cmd="da",
+            param_initializer=None,
+            name=''):
+    """
+    The encoder is composed of a stack of identical layers returned by calling
+    encoder_layer.
+    """
+    for i in range(n_layer):
+        enc_output = encoder_layer(
+            enc_input,
+            attn_bias,
+            n_head,
+            d_key,
+            d_value,
+            d_model,
+            d_inner_hid,
+            prepostprocess_dropout,
+            attention_dropout,
+            relu_dropout,
+            hidden_act,
+            preprocess_cmd,
+            postprocess_cmd,
+            param_initializer=param_initializer,
+            name=name + '_layer_' + str(i))
+        enc_input = enc_output
+    enc_output = pre_process_layer(
+        enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder")
+
+    return enc_output
--- a/PaddleNLP/models/dialogue_model_toolkit/auto_dialogue_evaluation/__init__.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/auto_dialogue_evaluation/__init__.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/utils/configure.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/utils/configure.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import argparse
+import json
+import yaml
+import six
+import logging
+
+logging_only_message = "%(message)s"
+logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
+
+
+class JsonConfig(object):
+    """
+    A high-level api for handling json configure file.
+    """
+
+    def __init__(self, config_path):
+        self._config_dict = self._parse(config_path)
+
+    def _parse(self, config_path):
+        try:
+            with open(config_path) as json_file:
+                config_dict = json.load(json_file)
+        except:
+            raise IOError("Error in parsing bert model config file '%s'" %
+                          config_path)
+        else:
+            return config_dict
+
+    def __getitem__(self, key):
+        return self._config_dict[key]
+
+    def print_config(self):
+        for arg, value in sorted(six.iteritems(self._config_dict)):
+            print('%s: %s' % (arg, value))
+        print('------------------------------------------------')
+
+
+class ArgumentGroup(object):
+    def __init__(self, parser, title, des):
+        self._group = parser.add_argument_group(title=title, description=des)
+
+    def add_arg(self, name, type, default, help, **kwargs):
+        type = str2bool if type == bool else type
+        self._group.add_argument(
+            "--" + name,
+            default=default,
+            type=type,
+            help=help + ' Default: %(default)s.',
+            **kwargs)
+
+
+class ArgConfig(object):
+    """
+    A high-level api for handling argument configs.
+    """
+
+    def __init__(self):
+        parser = argparse.ArgumentParser()
+
+        train_g = ArgumentGroup(parser, "training", "training options.")
+        train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
+        train_g.add_arg("learning_rate", float, 5e-5,
+                        "Learning rate used to train with warmup.")
+        train_g.add_arg(
+            "lr_scheduler",
+            str,
+            "linear_warmup_decay",
+            "scheduler of learning rate.",
+            choices=['linear_warmup_decay', 'noam_decay'])
+        train_g.add_arg("weight_decay", float, 0.01,
+                        "Weight decay rate for L2 regularizer.")
+        train_g.add_arg(
+            "warmup_proportion", float, 0.1,
+            "Proportion of training steps to perform linear learning rate warmup for."
+        )
+        train_g.add_arg("save_steps", int, 1000,
+                        "The steps interval to save checkpoints.")
+        train_g.add_arg("use_fp16", bool, False,
+                        "Whether to use fp16 mixed precision training.")
+        train_g.add_arg(
+            "loss_scaling", float, 1.0,
+            "Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled."
+        )
+        train_g.add_arg("pred_dir", str, None,
+                        "Path to save the prediction results")
+
+        log_g = ArgumentGroup(parser, "logging", "logging related.")
+        log_g.add_arg("skip_steps", int, 10,
+                      "The steps interval to print loss.")
+        log_g.add_arg("verbose", bool, False, "Whether to output verbose log.")
+
+        run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
+        run_type_g.add_arg("use_cuda", bool, True,
+                           "If set, use GPU for training.")
+        run_type_g.add_arg(
+            "use_fast_executor", bool, False,
+            "If set, use fast parallel executor (in experiment).")
+        run_type_g.add_arg(
+            "num_iteration_per_drop_scope", int, 1,
+            "Ihe iteration intervals to clean up temporary variables.")
+        run_type_g.add_arg("do_train", bool, True,
+                           "Whether to perform training.")
+        run_type_g.add_arg("do_predict", bool, True,
+                           "Whether to perform prediction.")
+
+        custom_g = ArgumentGroup(parser, "customize", "customized options.")
+
+        self.custom_g = custom_g
+
+        self.parser = parser
+
+    def add_arg(self, name, dtype, default, descrip):
+        self.custom_g.add_arg(name, dtype, default, descrip)
+
+    def build_conf(self):
+        return self.parser.parse_args()
+
+
+def str2bool(v):
+    # because argparse does not support to parse "true, False" as python
+    # boolean directly
+    return v.lower() in ("true", "t", "1")
+
+
+def print_arguments(args, log=None):
+    if not log:
+        print('-----------  Configuration Arguments -----------')
+        for arg, value in sorted(six.iteritems(vars(args))):
+            print('%s: %s' % (arg, value))
+        print('------------------------------------------------')
+    else:
+        log.info('-----------  Configuration Arguments -----------')
+        for arg, value in sorted(six.iteritems(vars(args))):
+            log.info('%s: %s' % (arg, value))
+        log.info('------------------------------------------------')
+
+
+class PDConfig(object):
+    """
+    A high-level API for managing configuration files in PaddlePaddle.
+    Can jointly work with command-line-arugment, json files and yaml files.
+    """
+
+    def __init__(self, json_file="", yaml_file="", fuse_args=True):
+        """
+            Init funciton for PDConfig.
+            json_file: the path to the json configure file.
+            yaml_file: the path to the yaml configure file.
+            fuse_args: if fuse the json/yaml configs with argparse.
+        """
+        assert isinstance(json_file, str)
+        assert isinstance(yaml_file, str)
+
+        if json_file != "" and yaml_file != "":
+            raise Warning(
+                "json_file and yaml_file can not co-exist for now. please only use one configure file type."
+            )
+            return
+
+        self.args = None
+        self.arg_config = {}
+        self.json_config = {}
+        self.yaml_config = {}
+
+        parser = argparse.ArgumentParser()
+
+        self.default_g = ArgumentGroup(parser, "default", "default options.")
+        self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
+        self.json_g = ArgumentGroup(parser, "json", "options from json.")
+        self.com_g = ArgumentGroup(parser, "custom", "customized options.")
+
+        self.default_g.add_arg("epoch", int, 2,
+                               "Number of epoches for training.")
+        self.default_g.add_arg("learning_rate", float, 1e-2,
+                               "Learning rate used to train.")
+        self.default_g.add_arg("do_train", bool, False,
+                               "Whether to perform training.")
+        self.default_g.add_arg("do_predict", bool, False,
+                               "Whether to perform predicting.")
+        self.default_g.add_arg("do_eval", bool, False,
+                               "Whether to perform evaluating.")
+
+        self.parser = parser
+
+        if json_file != "":
+            self.load_json(json_file, fuse_args=fuse_args)
+
+        if yaml_file:
+            self.load_yaml(yaml_file, fuse_args=fuse_args)
+
+    def load_json(self, file_path, fuse_args=True):
+
+        if not os.path.exists(file_path):
+            raise Warning("the json file %s does not exist." % file_path)
+            return
+
+        with open(file_path, "r") as fin:
+            self.json_config = json.loads(fin.read())
+            fin.close()
+
+        if fuse_args:
+            for name in self.json_config:
+                if not isinstance(self.json_config[name], int) \
+                    and not isinstance(self.json_config[name], float) \
+                    and not isinstance(self.json_config[name], str) \
+                    and not isinstance(self.json_config[name], bool):
+
+                    continue
+
+                self.json_g.add_arg(name,
+                                    type(self.json_config[name]),
+                                    self.json_config[name],
+                                    "This is from %s" % file_path)
+
+    def load_yaml(self, file_path, fuse_args=True):
+
+        if not os.path.exists(file_path):
+            raise Warning("the yaml file %s does not exist." % file_path)
+            return
+
+        with open(file_path, "r") as fin:
+            self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
+            fin.close()
+
+        if fuse_args:
+            for name in self.yaml_config:
+                if not isinstance(self.yaml_config[name], int) \
+                    and not isinstance(self.yaml_config[name], float) \
+                    and not isinstance(self.yaml_config[name], str) \
+                    and not isinstance(self.yaml_config[name], bool):
+
+                    continue
+
+                self.yaml_g.add_arg(name,
+                                    type(self.yaml_config[name]),
+                                    self.yaml_config[name],
+                                    "This is from %s" % file_path)
+
+    def build(self):
+        self.args = self.parser.parse_args()
+        self.arg_config = vars(self.args)
+
+    def __add__(self, new_arg):
+        assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
+        assert len(new_arg) >= 3
+        assert self.args is None
+
+        name = new_arg[0]
+        dtype = new_arg[1]
+        dvalue = new_arg[2]
+        desc = new_arg[3] if len(
+            new_arg) == 4 else "Description is not provided."
+
+        self.com_g.add_arg(name, dtype, dvalue, desc)
+
+        return self
+
+    def __getattr__(self, name):
+        if name in self.arg_config:
+            return self.arg_config[name]
+
+        if name in self.json_config:
+            return self.json_config[name]
+
+        if name in self.yaml_config:
+            return self.yaml_config[name]
+
+        raise Warning("The argument %s is not defined." % name)
+
+    def Print(self):
+
+        print("-" * 70)
+        for name in self.arg_config:
+            print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
+
+        for name in self.json_config:
+            if name not in self.arg_config:
+                print("%s:\t\t\t\t%s" %
+                      (str(name), str(self.json_config[name])))
+
+        for name in self.yaml_config:
+            if name not in self.arg_config:
+                print("%s:\t\t\t\t%s" %
+                      (str(name), str(self.yaml_config[name])))
+
+        print("-" * 70)
+
+
+if __name__ == "__main__":
+    """
+    pd_config = PDConfig(json_file = "./test/bert_config.json")
+    pd_config.build()
+
+    print(pd_config.do_train)
+    print(pd_config.hidden_size)
+
+    pd_config = PDConfig(yaml_file = "./test/bert_config.yaml")
+    pd_config.build()
+
+    print(pd_config.do_train)
+    print(pd_config.hidden_size)
+    """
+
+    pd_config = PDConfig(yaml_file="./test/bert_config.yaml")
+    pd_config += ("my_age", int, 18, "I am forever 18.")
+    pd_config.build()
+
+    print(pd_config.do_train)
+    print(pd_config.hidden_size)
+    print(pd_config.my_age)
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/fp16.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/fp16.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/args.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/args.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/utils/model_check.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/utils/model_check.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/utils/save_load_io.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/dgu/utils/save_load_io.py
--- a/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/create_model.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/create_model.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_data.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_data.sh
-wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz
-tar -xvf dmtk_data_1.0.0.tar.gz
-rm dmtk_data_1.0.0.tar.gz
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_models.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_models.sh
-wget --no-check-certificate https://baidu-nlp.bj.bcebos.com/dmtk_models_1.0.0.tar.gz
-tar -xvf dmtk_models_1.0.0.tar.gz
-rm dmtk_models_1.0.0.tar.gz
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_pretrain_model.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_pretrain_model.sh
-wget --no-check-certificate https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz
-tar -xvf uncased_L-12_H-768_A-12.tar.gz
-rm uncased_L-12_H-768_A-12.tar.gz
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/eval.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/eval.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/finetune_args.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/finetune_args.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/images/dgu.png
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/images/dgu.png
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/inference_model.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/inference_model.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/main.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/main.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/predict.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/predict.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_eval_metrics.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_eval_metrics.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_predict.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_predict.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_train.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_train.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/README.md
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/README.md
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/train.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/train.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/init.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/init.py
--- a/PaddleNLP/models/dialogue_model_toolkit/auto_dialogue_evaluation/net.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/auto_dialogue_evaluation/net.py
--- a/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/__init__.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/__init__.py