Remove dirs unarchived & legacy in release 1.6 (#3666)

7d338bb0 · Yibing Liu · GitHub · e3558f53 · e3558f53 · e3558f53
430 changed file
--- a/PaddleNLP/unarchived/chinese_ner/.run_ce.sh
+++ b/PaddleNLP/unarchived/chinese_ner/.run_ce.sh
-#!/bin/bash
-export MKL_NUM_THREADS=1
-export OMP_NUM_THREADS=1
-cudaid=${chinese_ner:=0} # use 0-th card as default
-export CUDA_VISIBLE_DEVICES=$cudaid
-FLAGS_benchmark=true  python train.py --num_passes 300 --device GPU --enable_ce | python _ce.py 
-cudaid=${chinese_ner_4:=0,1,2,3} # use 0-th card as default
-export CUDA_VISIBLE_DEVICES=$cudaid
-FLAGS_benchmark=true  python train.py --num_passes 300 --device GPU --parallel True --enable_ce | python _ce.py 
-export CPU_NUM=1
-export NUM_THREADS=1
-FLAGS_benchmark=true  python train.py --num_passes 300 --device CPU --enable_ce | python _ce.py 
--- a/PaddleNLP/unarchived/chinese_ner/README.md
+++ b/PaddleNLP/unarchived/chinese_ner/README.md
-# 使用ParallelExecutor的中文命名实体识别示例
-以下是本例的简要目录结构及说明：
-```text
-.
-├── data                 # 存储运行本例所依赖的数据，从外部获取
-├── reader.py            # 数据读取接口, 从外部获取
-├── README.md            # 文档
-├── train.py             # 训练脚本
-├── infer.py             # 预测脚本
-```
-## 数据
-在data目录下，有两个文件夹，train_files中保存的是训练数据，test_files中保存的是测试数据，作为示例，在目录下我们各放置了两个文件，实际训练时，根据自己的实际需要将数据放置在对应目录，并根据数据格式，修改reader.py中的数据读取函数。
-## 训练
-通过运行
-```
-python train.py --help
-```
-来获取命令行参数的帮助，设置正确的数据路径等参数后，运行`train.py`开始训练。
-训练记录形如
-```txt
-pass_id:0, time_cost:4.92960214615s
-[Train] precision:0.000862136531076, recall:0.0059880239521, f1:0.00150726226363
-[Test] precision:0.000796178343949, recall:0.00335758254057, f1:0.00128713933283
-pass_id:1, time_cost:0.715255975723s
-[Train] precision:0.00474094141551, recall:0.00762112139358, f1:0.00584551148225
-[Test] precision:0.0228873239437, recall:0.00727476217124, f1:0.0110403397028
-pass_id:2, time_cost:0.740842103958s
-[Train] precision:0.0120967741935, recall:0.00163309744148, f1:0.00287769784173
-[Test] precision:0, recall:0.0, f1:0
-```
-## 预测
-类似于训练过程，预测时指定需要测试模型的路径、测试数据、预测标记文件的路径，运行`infer.py`开始预测。
-预测结果如下
-```txt
-152804  O       O
-130048  O       O
-38862   10-B    O
-784     O       O
-1540    O       O
-4145    O       O
-2255    O       O
-0       O       O
-1279    O       O
-7793    O       O
-373     O       O
-1621    O       O
-815     O       O
-2       O       O
-247     24-B    O
-401     24-I    O
-```
-输出分为三列，以"\t"分割，第一列是输入的词语的序号，第二列是标准结果，第三列为标记结果。多条输入序列之间以空行分隔。
--- a/PaddleNLP/unarchived/chinese_ner/_ce.py
+++ b/PaddleNLP/unarchived/chinese_ner/_ce.py
-# this file is only used for continuous evaluation test!
-import os
-import sys
-sys.path.append(os.environ['ceroot'])
-from kpi import CostKpi
-from kpi import DurationKpi
-from kpi import AccKpi
-each_pass_duration_cpu1_thread1_kpi = DurationKpi(
-    'each_pass_duration_cpu1_thread1', 0.08, 0, actived=True)
-train_recall_cpu1_thread1_kpi = AccKpi('train_recall_cpu1_thread1', 0.08, 0)
-each_pass_duration_gpu1_kpi = DurationKpi(
-    'each_pass_duration_gpu1', 0.08, 0, actived=True)
-train_recall_gpu1_kpi = AccKpi('train_recall_gpu1', 0.08, 0)
-each_pass_duration_gpu4_kpi = DurationKpi(
-    'each_pass_duration_gpu4', 0.08, 0, actived=True)
-train_recall_gpu4_kpi = AccKpi('train_recall_gpu4', 0.08, 0)
-tracking_kpis = [
-    each_pass_duration_cpu1_thread1_kpi,
-    train_recall_cpu1_thread1_kpi,
-    each_pass_duration_gpu1_kpi,
-    train_recall_gpu1_kpi,
-    each_pass_duration_gpu4_kpi,
-    train_recall_gpu4_kpi,
-]
-def parse_log(log):
-    '''
-    This method should be implemented by model developers.
-    The suggestion:
-    each line in the log should be key, value, for example:
-    "
-    train_cost\t1.0
-    test_cost\t1.0
-    train_cost\t1.0
-    train_cost\t1.0
-    train_acc\t1.2
-    "
-    '''
-    for line in log.split('\n'):
-        fs = line.strip().split('\t')
-        print(fs)
-        if len(fs) == 3 and fs[0] == 'kpis':
-            kpi_name = fs[1]
-            kpi_value = float(fs[2])
-            yield kpi_name, kpi_value
-def log_to_ce(log):
-    kpi_tracker = {}
-    for kpi in tracking_kpis:
-        kpi_tracker[kpi.name] = kpi
-    for (kpi_name, kpi_value) in parse_log(log):
-        print(kpi_name, kpi_value)
-        kpi_tracker[kpi_name].add_record(kpi_value)
-        kpi_tracker[kpi_name].persist()
-if __name__ == '__main__':
-    log = sys.stdin.read()
-    log_to_ce(log)
--- a/PaddleNLP/unarchived/chinese_ner/data/label_dict
+++ b/PaddleNLP/unarchived/chinese_ner/data/label_dict
-24-B
-24-I
-27-B
-27-I
-20-B
-20-I
-21-B
-21-I
-22-B
-22-I
-23-B
-23-I
-28-B
-28-I
-29-B
-29-I
-12-B
-12-I
-11-B
-11-I
-10-B
-10-I
-13-B
-13-I
-38-B
-38-I
-14-B
-14-I
-16-B
-16-I
-33-B
-33-I
-18-B
-18-I
-31-B
-31-I
-30-B
-30-I
-37-B
-37-I
-36-B
-36-I
-35-B
-35-I
-19-B
-19-I
-32-B
-32-I
-O
--- a/PaddleNLP/unarchived/chinese_ner/data/test_files/test_part_1
+++ b/PaddleNLP/unarchived/chinese_ner/data/test_files/test_part_1
--- a/PaddleNLP/unarchived/chinese_ner/data/test_files/test_part_2
+++ b/PaddleNLP/unarchived/chinese_ner/data/test_files/test_part_2
--- a/PaddleNLP/unarchived/chinese_ner/data/train_files/train_part_1
+++ b/PaddleNLP/unarchived/chinese_ner/data/train_files/train_part_1
--- a/PaddleNLP/unarchived/chinese_ner/data/train_files/train_part_2
+++ b/PaddleNLP/unarchived/chinese_ner/data/train_files/train_part_2
--- a/PaddleNLP/unarchived/chinese_ner/infer.py
+++ b/PaddleNLP/unarchived/chinese_ner/infer.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import numpy as np
-import argparse
-import time
-import paddle.fluid as fluid
-import paddle.fluid.profiler as profiler
-import paddle
-import reader
-def parse_args():
-    parser = argparse.ArgumentParser("Run inference.")
-    parser.add_argument(
-        '--batch_size',
-        type=int,
-        default=6,
-        help='The size of a batch. (default: %(default)d)')
-    parser.add_argument(
-        '--device',
-        type=str,
-        default='GPU',
-        choices=['CPU', 'GPU'],
-        help='The device type. (default: %(default)s)')
-    parser.add_argument(
-        '--model_path',
-        type=str,
-        default='output/params_pass_0',
-        help='A path to the model. (default: %(default)s)')
-    parser.add_argument(
-        '--test_data_dir',
-        type=str,
-        default='data/test_files',
-        help='A directory with test data files. (default: %(default)s)')
-    parser.add_argument(
-        '--test_label_file',
-        type=str,
-        default='data/label_dict',
-        help='A file with test labels. (default: %(default)s)')
-    parser.add_argument(
-        '--num_passes', type=int, default=1, help='The number of passes.')
-    parser.add_argument(
-        '--skip_pass_num',
-        type=int,
-        default=0,
-        help='The first num of passes to skip in statistics calculations.')
-    parser.add_argument(
-        '--profile', action='store_true', help='If set, do profiling.')
-    args = parser.parse_args()
-    return args
-def print_arguments(args):
-    print('-----------  Configuration Arguments -----------')
-    for arg, value in sorted(vars(args).items()):
-        print('%s: %s' % (arg, value))
-    print('------------------------------------------------')
-def load_reverse_dict(dict_path):
-    return dict((idx, line.strip().split("\t")[0])
-                for idx, line in enumerate(open(dict_path, "r").readlines()))
-def to_lodtensor(data, place):
-    seq_lens = [len(seq) for seq in data]
-    cur_len = 0
-    lod = [cur_len]
-    for l in seq_lens:
-        cur_len += l
-        lod.append(cur_len)
-    flattened_data = np.concatenate(data, axis=0).astype("int64")
-    flattened_data = flattened_data.reshape([len(flattened_data), 1])
-    res = fluid.LoDTensor()
-    res.set(flattened_data, place)
-    res.set_lod([lod])
-    return res
-def infer(args):
-    word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
-    mention = fluid.layers.data(
-        name='mention', shape=[1], dtype='int64', lod_level=1)
-    target = fluid.layers.data(
-        name='target', shape=[1], dtype='int64', lod_level=1)
-    label_reverse_dict = load_reverse_dict(args.test_label_file)
-    test_data = paddle.batch(
-        reader.file_reader(args.test_data_dir), batch_size=args.batch_size)
-    place = fluid.CUDAPlace(0) if args.device == 'GPU' else fluid.CPUPlace()
-    feeder = fluid.DataFeeder(feed_list=[word, mention, target], place=place)
-    exe = fluid.Executor(place)
-    inference_scope = fluid.Scope()
-    with fluid.scope_guard(inference_scope):
-        [inference_program, feed_target_names,
-         fetch_targets] = fluid.io.load_inference_model(args.model_path, exe)
-        total_passes = args.num_passes + args.skip_pass_num
-        batch_times = [0] * total_passes
-        word_counts = [0] * total_passes
-        wpses = [0] * total_passes
-        all_iters = 0
-        for pass_id in range(total_passes):
-            if pass_id < args.skip_pass_num:
-                print("Warm-up pass")
-            if pass_id == args.skip_pass_num:
-                profiler.reset_profiler()
-            iters = 0
-            for data in test_data():
-                word = to_lodtensor(list(map(lambda x: x[0], data)), place)
-                mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
-                start = time.time()
-                crf_decode = exe.run(inference_program,
-                                     feed={"word": word,
-                                           "mention": mention},
-                                     fetch_list=fetch_targets,
-                                     return_numpy=False)
-                batch_time = time.time() - start
-                lod_info = (crf_decode[0].lod())[0]
-                np_data = np.array(crf_decode[0])
-                word_count = 0
-                assert len(data) == len(lod_info) - 1
-                for sen_index in range(len(data)):
-                    assert len(data[sen_index][0]) == lod_info[
-                        sen_index + 1] - lod_info[sen_index]
-                    word_index = 0
-                    for tag_index in range(lod_info[sen_index],
-                                           lod_info[sen_index + 1]):
-                        word = str(data[sen_index][0][word_index])
-                        gold_tag = label_reverse_dict[data[sen_index][2][
-                            word_index]]
-                        tag = label_reverse_dict[np_data[tag_index][0]]
-                        word_index += 1
-                    word_count += word_index
-                batch_times[pass_id] += batch_time
-                word_counts[pass_id] += word_count
-                iters += 1
-                all_iters += 1
-            batch_times[pass_id] /= iters
-            word_counts[pass_id] /= iters
-            wps = word_counts[pass_id] / batch_times[pass_id]
-            wpses[pass_id] = wps
-            print(
-                "Pass: %d, iterations (total): %d (%d), latency: %.5f s, words: %d, wps: %f"
-                % (pass_id, iters, all_iters, batch_times[pass_id],
-                   word_counts[pass_id], wps))
-    # Postprocess benchmark data
-    latencies = batch_times[args.skip_pass_num:]
-    latency_avg = np.average(latencies)
-    latency_std = np.std(latencies)
-    latency_pc99 = np.percentile(latencies, 99)
-    wps_avg = np.average(wpses)
-    wps_std = np.std(wpses)
-    wps_pc01 = np.percentile(wpses, 1)
-    # Benchmark output
-    print('\nTotal passes (incl. warm-up): %d' % (total_passes))
-    print('Total iterations (incl. warm-up): %d' % (all_iters))
-    print('Total examples (incl. warm-up): %d' % (all_iters * args.batch_size))
-    print('avg latency: %.5f, std latency: %.5f, 99pc latency: %.5f' %
-          (latency_avg, latency_std, latency_pc99))
-    print('avg wps: %.5f, std wps: %.5f, wps for 99pc latency: %.5f' %
-          (wps_avg, wps_std, wps_pc01))
-if __name__ == "__main__":
-    args = parse_args()
-    print_arguments(args)
-    if args.profile:
-        if args.device == 'GPU':
-            with profiler.cuda_profiler("cuda_profiler.txt", 'csv') as nvprof:
-                infer(args)
-        else:
-            with profiler.profiler('CPU', sorted_key='total') as cpuprof:
-                infer(args)
-    else:
-        infer(args)
--- a/PaddleNLP/unarchived/chinese_ner/reader.py
+++ b/PaddleNLP/unarchived/chinese_ner/reader.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-def file_reader(file_dir):
-    def reader():
-        files = os.listdir(file_dir)
-        for fi in files:
-            for line in open(file_dir + '/' + fi, 'r'):
-                line = line.strip()
-                features = line.split(";")
-                word_idx = []
-                for item in features[1].strip().split(" "):
-                    word_idx.append(int(item))
-                    target_idx = []
-                for item in features[2].strip().split(" "):
-                    label_index = int(item)
-                    if label_index == 0:
-                        label_index = 48
-                    else:
-                        label_index -= 1
-                    target_idx.append(label_index)
-                mention_idx = []
-                for item in features[3].strip().split(" "):
-                    mention_idx.append(int(item))
-                yield word_idx, mention_idx, target_idx,
-    return reader
--- a/PaddleNLP/unarchived/chinese_ner/scripts/README.md
+++ b/PaddleNLP/unarchived/chinese_ner/scripts/README.md
-## Purpose of this directory
-The purpose of this directory is to provide exemplary execution commands. They are inside bash scripts described below.
-## Preparation
-To add execution permissions for shell scripts, run in this directory:
-`chmod +x *.sh`
-## Performance tips
-Use the below environment flags for best performance:
-```
-KMP_AFFINITY=granularity=fine,compact,1,0
-OMP_NUM_THREADS=<num_of_physical_cores>
-```
-For example, you can export them, or add them inside the specific files.
-## Training
-### CPU with mkldnn
-Run:
-`./train.sh MKLDNN`
-### CPU without mkldnn
-Run:
-`./train.sh CPU`
-### GPU
-Run:
-`./train.sh GPU`
-## Inference
-### CPU with mkldnn
-Run:
-`./infer.sh MKLDNN`
-### CPU without mkldnn
-Run:
-`./infer.sh CPU`
-### GPU
-Run:
-`./infer.sh GPU`
--- a/PaddleNLP/unarchived/chinese_ner/scripts/infer.sh
+++ b/PaddleNLP/unarchived/chinese_ner/scripts/infer.sh
-#!/bin/bash
-export MKL_NUM_THREADS=1
-export OMP_NUM_THREADS=1
-mode=$1 # gpu, cpu, mkldnn
-if [ "$mode" = "CPU" ]; then
-  device="CPU"
-  model_path="cpu_model"
-elif [ "$mode" = "GPU" ]; then
-  device="GPU"
-  model_path="gpu_model"
-elif [ "$mode" = "MKLDNN" ]; then
-  device="CPU"
-  model_path="mkldnn_model"
-  export FLAGS_use_mkldnn=1
-else
-  echo "Invalid mode provided. Please use one of {GPU, CPU, MKLDNN}"
-  exit 1
-fi
-ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
-if [ $ht -eq 1 ]; then # HT is OFF
-    if [ -z "$KMP_AFFINITY" ]; then
-        export KMP_AFFINITY="granularity=fine,compact,0,0"
-    fi
-    if [ -z "$OMP_DYNAMIC" ]; then
-        export OMP_DYNAMIC="FALSE"
-    fi
-else # HT is ON
-    if [ -z "$KMP_AFFINITY" ]; then
-        export KMP_AFFINITY="granularity=fine,compact,1,0"
-    fi
-fi
-python ../infer.py \
-	--device $device \
-	--num_passes 1 \
-	--skip_pass_num 2 \
-	--profile \
-	--test_data_dir ../data/test_files \
-	--test_label_file ../data/label_dict \
-	--model_path $model_path/params_pass_0
--- a/PaddleNLP/unarchived/chinese_ner/scripts/train.sh
+++ b/PaddleNLP/unarchived/chinese_ner/scripts/train.sh
-#!/bin/bash
-export MKL_NUM_THREADS=1
-export OMP_NUM_THREADS=1
-mode=$1 # gpu, cpu, mkldnn
-if [ "$mode" = "CPU" ]; then
-  device="CPU"
-  parallel="--parallel True"
-  save_model_dir="cpu_model"
-elif [ "$mode" = "GPU" ]; then
-  device="GPU"
-  parallel="--parallel True"
-  save_model_dir="gpu_model"
-elif [ "$mode" = "MKLDNN" ]; then
-  device="CPU"
-  parallel=""
-  save_model_dir="mkldnn_model"
-  export FLAGS_use_mkldnn=1
-else
-  echo "Invalid mode provided. Please use one of {GPU, CPU, MKLDNN}"
-  exit 1
-fi
-ht=`lscpu |grep "per core"|awk -F':' '{print $2}'|xargs`
-if [ $ht -eq 1 ]; then # HT is OFF
-    if [ -z "$KMP_AFFINITY" ]; then
-        export KMP_AFFINITY="granularity=fine,compact,0,0"
-    fi
-    if [ -z "$OMP_DYNAMIC" ]; then
-        export OMP_DYNAMIC="FALSE"
-    fi
-else # HT is ON
-    if [ -z "$KMP_AFFINITY" ]; then
-        export KMP_AFFINITY="granularity=fine,compact,1,0"
-    fi
-fi
-python ../train.py \
-	--device $device \
-  $parallel \
-	--model_save_dir $save_model_dir \
-	--test_data_dir ../data/test_files \
-	--train_data_dir ../data/train_files \
-	--num_passes 1
--- a/PaddleNLP/unarchived/chinese_ner/train.py
+++ b/PaddleNLP/unarchived/chinese_ner/train.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import math
-import time
-import argparse
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.initializer import NormalInitializer
-import reader
-def parse_args():
-    parser = argparse.ArgumentParser("Run training.")
-    parser.add_argument(
-        '--batch_size',
-        type=int,
-        default=256,
-        help='The size of a batch. (default: %(default)d)')
-    parser.add_argument(
-        '--word_dict_len',
-        type=int,
-        default=1942563,
-        help='The lenght of the word dictionary. (default: %(default)d)')
-    parser.add_argument(
-        '--label_dict_len',
-        type=int,
-        default=49,
-        help='The lenght of the label dictionary. (default: %(default)d)')
-    parser.add_argument(
-        '--device',
-        type=str,
-        default='GPU',
-        choices=['CPU', 'GPU'],
-        help='The device type. (default: %(default)s)')
-    parser.add_argument(
-        '--train_data_dir',
-        type=str,
-        default='data/train_files',
-        help='A directory with train data files. (default: %(default)s)')
-    parser.add_argument(
-        '--parallel',
-        type=bool,
-        default=False,
-        help="Whether to use parallel training. (default: %(default)s)")
-    parser.add_argument(
-        '--test_data_dir',
-        type=str,
-        default='data/test_files',
-        help='A directory with test data files. (default: %(default)s)')
-    parser.add_argument(
-        '--model_save_dir',
-        type=str,
-        default='./output',
-        help='A directory for saving models. (default: %(default)s)')
-    parser.add_argument(
-        '--num_passes',
-        type=int,
-        default=1000,
-        help='The number of epochs. (default: %(default)d)')
-    parser.add_argument(
-        '--enable_ce',
-        action='store_true',
-        help='If set, run the task with continuous evaluation logs.')
-    args = parser.parse_args()
-    return args
-def print_arguments(args):
-    print('-----------  Configuration Arguments -----------')
-    for arg, value in sorted(vars(args).items()):
-        print('%s: %s' % (arg, value))
-    print('------------------------------------------------')
-def load_reverse_dict(dict_path):
-    return dict((idx, line.strip().split("\t")[0])
-                for idx, line in enumerate(open(dict_path, "r").readlines()))
-def to_lodtensor(data, place):
-    seq_lens = [len(seq) for seq in data]
-    cur_len = 0
-    lod = [cur_len]
-    for l in seq_lens:
-        cur_len += l
-        lod.append(cur_len)
-    flattened_data = np.concatenate(data, axis=0).astype("int64")
-    flattened_data = flattened_data.reshape([len(flattened_data), 1])
-    res = fluid.LoDTensor()
-    res.set(flattened_data, place)
-    res.set_lod([lod])
-    return res
-def ner_net(word_dict_len, label_dict_len):
-    IS_SPARSE = False
-    word_dim = 32
-    mention_dict_len = 57
-    mention_dim = 20
-    grnn_hidden = 36
-    emb_lr = 5
-    init_bound = 0.1
-    def _net_conf(word, mark, target):
-        word_embedding = fluid.layers.embedding(
-            input=word,
-            size=[word_dict_len, word_dim],
-            dtype='float32',
-            is_sparse=IS_SPARSE,
-            param_attr=fluid.ParamAttr(
-                learning_rate=emb_lr,
-                name="word_emb",
-                initializer=fluid.initializer.Uniform(
-                    low=-init_bound, high=init_bound)))
-        mention_embedding = fluid.layers.embedding(
-            input=mention,
-            size=[mention_dict_len, mention_dim],
-            dtype='float32',
-            is_sparse=IS_SPARSE,
-            param_attr=fluid.ParamAttr(
-                learning_rate=emb_lr,
-                name="mention_emb",
-                initializer=fluid.initializer.Uniform(
-                    low=-init_bound, high=init_bound)))
-        word_embedding_r = fluid.layers.embedding(
-            input=word,
-            size=[word_dict_len, word_dim],
-            dtype='float32',
-            is_sparse=IS_SPARSE,
-            param_attr=fluid.ParamAttr(
-                learning_rate=emb_lr,
-                name="word_emb_r",
-                initializer=fluid.initializer.Uniform(
-                    low=-init_bound, high=init_bound)))
-        mention_embedding_r = fluid.layers.embedding(
-            input=mention,
-            size=[mention_dict_len, mention_dim],
-            dtype='float32',
-            is_sparse=IS_SPARSE,
-            param_attr=fluid.ParamAttr(
-                learning_rate=emb_lr,
-                name="mention_emb_r",
-                initializer=fluid.initializer.Uniform(
-                    low=-init_bound, high=init_bound)))
-        word_mention_vector = fluid.layers.concat(
-            input=[word_embedding, mention_embedding], axis=1)
-        word_mention_vector_r = fluid.layers.concat(
-            input=[word_embedding_r, mention_embedding_r], axis=1)
-        pre_gru = fluid.layers.fc(
-            input=word_mention_vector,
-            size=grnn_hidden * 3,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Uniform(
-                    low=-init_bound, high=init_bound),
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=1e-4)))
-        gru = fluid.layers.dynamic_gru(
-            input=pre_gru,
-            size=grnn_hidden,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Uniform(
-                    low=-init_bound, high=init_bound),
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=1e-4)))
-        pre_gru_r = fluid.layers.fc(
-            input=word_mention_vector_r,
-            size=grnn_hidden * 3,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Uniform(
-                    low=-init_bound, high=init_bound),
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=1e-4)))
-        gru_r = fluid.layers.dynamic_gru(
-            input=pre_gru_r,
-            size=grnn_hidden,
-            is_reverse=True,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Uniform(
-                    low=-init_bound, high=init_bound),
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=1e-4)))
-        gru_merged = fluid.layers.concat(input=[gru, gru_r], axis=1)
-        emission = fluid.layers.fc(
-            size=label_dict_len,
-            input=gru_merged,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Uniform(
-                    low=-init_bound, high=init_bound),
-                regularizer=fluid.regularizer.L2DecayRegularizer(
-                    regularization_coeff=1e-4)))
-        crf_cost = fluid.layers.linear_chain_crf(
-            input=emission,
-            label=target,
-            param_attr=fluid.ParamAttr(
-                name='crfw',
-                learning_rate=0.2, ))
-        avg_cost = fluid.layers.mean(x=crf_cost)
-        return avg_cost, emission
-    word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
-    mention = fluid.layers.data(
-        name='mention', shape=[1], dtype='int64', lod_level=1)
-    target = fluid.layers.data(
-        name="target", shape=[1], dtype='int64', lod_level=1)
-    avg_cost, emission = _net_conf(word, mention, target)
-    return avg_cost, emission, word, mention, target
-def test2(exe, chunk_evaluator, inference_program, test_data, place,
-          cur_fetch_list):
-    chunk_evaluator.reset()
-    for data in test_data():
-        word = to_lodtensor(list(map(lambda x: x[0], data)), place)
-        mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
-        target = to_lodtensor(list(map(lambda x: x[2], data)), place)
-        result_list = exe.run(
-            inference_program,
-            feed={"word": word,
-                  "mention": mention,
-                  "target": target},
-            fetch_list=cur_fetch_list)
-        number_infer = np.array(result_list[0])
-        number_label = np.array(result_list[1])
-        number_correct = np.array(result_list[2])
-        chunk_evaluator.update(number_infer[0].astype('int64'),
-                               number_label[0].astype('int64'),
-                               number_correct[0].astype('int64'))
-    return chunk_evaluator.eval()
-def test(test_exe, chunk_evaluator, inference_program, test_data, place,
-         cur_fetch_list):
-    chunk_evaluator.reset()
-    for data in test_data():
-        word = to_lodtensor(list(map(lambda x: x[0], data)), place)
-        mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
-        target = to_lodtensor(list(map(lambda x: x[2], data)), place)
-        result_list = test_exe.run(
-            fetch_list=cur_fetch_list,
-            feed={"word": word,
-                  "mention": mention,
-                  "target": target})
-        number_infer = np.array(result_list[0])
-        number_label = np.array(result_list[1])
-        number_correct = np.array(result_list[2])
-        chunk_evaluator.update(number_infer.sum().astype('int64'),
-                               number_label.sum().astype('int64'),
-                               number_correct.sum().astype('int64'))
-    return chunk_evaluator.eval()
-def main(args):
-    if not os.path.exists(args.model_save_dir):
-        os.makedirs(args.model_save_dir)
-    main = fluid.Program()
-    startup = fluid.Program()
-    if args.enable_ce:
-        SEED = 102
-        main.random_seed = SEED
-        startup.random_seed = SEED
-    with fluid.program_guard(main, startup):
-        avg_cost, feature_out, word, mention, target = ner_net(
-            args.word_dict_len, args.label_dict_len)
-        crf_decode = fluid.layers.crf_decoding(
-            input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
-        (precision, recall, f1_score, num_infer_chunks, num_label_chunks,
-         num_correct_chunks) = fluid.layers.chunk_eval(
-             input=crf_decode,
-             label=target,
-             chunk_scheme="IOB",
-             num_chunk_types=int(math.ceil((args.label_dict_len - 1) / 2.0)))
-        inference_program = fluid.default_main_program().clone(for_test=True)
-        sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
-        sgd_optimizer.minimize(avg_cost)
-        chunk_evaluator = fluid.metrics.ChunkEvaluator()
-        train_reader = paddle.batch(
-            paddle.reader.shuffle(
-                reader.file_reader(args.train_data_dir), buf_size=2000000),
-            batch_size=args.batch_size)
-        test_reader = paddle.batch(
-            paddle.reader.shuffle(
-                reader.file_reader(args.test_data_dir), buf_size=2000000),
-            batch_size=args.batch_size)
-        place = fluid.CUDAPlace(0) if args.device == 'GPU' else fluid.CPUPlace()
-        feeder = fluid.DataFeeder(
-            feed_list=[word, mention, target], place=place)
-        exe = fluid.Executor(place)
-        exe.run(startup)
-        if args.parallel:
-            train_exe = fluid.ParallelExecutor(
-                loss_name=avg_cost.name, use_cuda=(args.device == 'GPU'))
-            test_exe = fluid.ParallelExecutor(
-                use_cuda=(args.device == 'GPU'),
-                main_program=inference_program,
-                share_vars_from=train_exe)
-        else:
-            train_exe = exe
-            test_exe = exe
-        total_time = 0
-        ce_info = []
-        batch_id = 0
-        for pass_id in range(args.num_passes):
-            chunk_evaluator.reset()
-            train_reader_iter = train_reader()
-            start_time = time.time()
-            while True:
-                try:
-                    cur_batch = next(train_reader_iter)
-                    cost, nums_infer, nums_label, nums_correct = train_exe.run(
-                        fetch_list=[
-                            avg_cost.name, num_infer_chunks.name,
-                            num_label_chunks.name, num_correct_chunks.name
-                        ],
-                        feed=feeder.feed(cur_batch))
-                    chunk_evaluator.update(
-                        np.array(nums_infer).sum().astype("int64"),
-                        np.array(nums_label).sum().astype("int64"),
-                        np.array(nums_correct).sum().astype("int64"))
-                    cost_list = np.array(cost)
-                    batch_id += 1
-                except StopIteration:
-                    break
-            end_time = time.time()
-            total_time += end_time - start_time
-            print("pass_id:" + str(pass_id) + ", time_cost:" + str(
-                end_time - start_time) + "s")
-            precision, recall, f1_score = chunk_evaluator.eval()
-            print("[Train] precision:" + str(precision) + ", recall:" + str(
-                recall) + ", f1:" + str(f1_score))
-            ce_info.append(recall)
-            p, r, f1 = test2(
-                exe, chunk_evaluator, inference_program, test_reader, place,
-                [num_infer_chunks, num_label_chunks, num_correct_chunks])
-            print("[Test] precision:" + str(p) + ", recall:" + str(r) + ", f1:"
-                  + str(f1))
-            save_dirname = os.path.join(args.model_save_dir,
-                                        "params_pass_%d" % pass_id)
-            fluid.io.save_inference_model(save_dirname, ['word', 'mention'],
-                                          [crf_decode], exe)
-        # only for ce
-        if args.enable_ce:
-            ce_recall = 0
-            try:
-                ce_recall = ce_info[-2]
-            except:
-                print("ce info error")
-            epoch_idx = args.num_passes
-            device = get_device(args)
-            if args.device == "GPU":
-                gpu_num = device[1]
-                print("kpis\teach_pass_duration_gpu%s\t%s" %
-                      (gpu_num, total_time / epoch_idx))
-                print("kpis\ttrain_recall_gpu%s\t%s" % (gpu_num, ce_recall))
-            else:
-                cpu_num = device[1]
-                threads_num = device[2]
-                print("kpis\teach_pass_duration_cpu%s_thread%s\t%s" %
-                      (cpu_num, threads_num, total_time / epoch_idx))
-                print("kpis\ttrain_recall_cpu%s_thread%s\t%s" %
-                      (cpu_num, threads_num, ce_recall))
-def get_device(args):
-    if args.device == "GPU":
-        gpus = os.environ.get("CUDA_VISIBLE_DEVICES", "")
-        gpu_num = len(gpus.split(','))
-        return "gpu", gpu_num
-    else:
-        threads_num = os.environ.get('NUM_THREADS', 1)
-        cpu_num = os.environ.get('CPU_NUM', 1)
-        return "cpu", int(cpu_num), int(threads_num)
-if __name__ == "__main__":
-    args = parse_args()
-    print_arguments(args)
-    main(args)
--- a/PaddleNLP/unarchived/deep_attention_matching_net/.run_ce.sh
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/.run_ce.sh
-###!/bin/bash
-####This file is only used for continuous evaluation.
-export CE_MODE_X=1
-export CUDA_VISIBLE_DEVICES=0
-export FLAGS_eager_delete_tensor_gb=0.0
-if  [ ! -e data_small.pkl ]; then
-    wget -c http://dam-data.bj.bcebos.com/data_small.pkl
-fi
-python train_and_evaluate.py  --data_path data_small.pkl \
-                              --use_cuda \
-                              --use_pyreader \
-                              --num_scan_data 1 \
-                              --batch_size 100 | python _ce.py
--- a/PaddleNLP/unarchived/deep_attention_matching_net/README.md
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/README.md
-# __Deep Attention Matching Network__
-This is the source code of Deep Attention Matching network (DAM), that is proposed for multi-turn response selection in the retrieval-based chatbot.
-DAM is a neural matching network that entirely based on attention mechanism. The motivation of DAM is to capture those semantic dependencies, among dialogue elements at different level of granularities, in multi-turn conversation as matching evidences, in order to better match response candidate with its multi-turn context. DAM appears on ACL-2018, please find our paper at [http://aclweb.org/anthology/P18-1103](http://aclweb.org/anthology/P18-1103).
-## __Network__
-DAM is inspired by Transformer in Machine Translation (Vaswani et al., 2017), and we extend the key attention mechanism of Transformer in two perspectives and introduce those two kinds of attention in one uniform neural network.
- **self-attention** To gradually capture semantic representations in different granularities by stacking attention from word-level embeddings. Those multi-grained semantic representations would facilitate exploring segmental dependencies between context and response.
- **cross-attention** Attention across context and response can generally capture the relevance in dependency between segment pairs, which could provide complementary information to textual relevance for matching response with multi-turn context.
-<p align="center">
-<img src="images/Figure1.png"/> <br />
-Overview of Deep Attention Matching Network
-</p>
-## __Results__
-We test DAM on two large-scale multi-turn response selection tasks, i.e., the Ubuntu Corpus v1 and Douban Conversation Corpus, experimental results are bellow:
-<p align="center">
-<img src="images/Figure2.png"/> <br />
-</p>
-## __Usage__
-Take the experiment on the Ubuntu Corpus v1 for Example.
-1) Go to the `ubuntu` directory
-```
-cd ubuntu
-```
-2) Download the well-preprocessed data for training  
-```
-sh download_data.sh
-```
-3) Execute the model training and evaluation by
-```
-sh train.sh
-```
-for more detailed explanation about the arguments, please run
-```
-python ../train_and_evaluate.py --help
-```
-By default, the training is executed on one single GPU, which can be switched to multiple-GPU mode easily by simply resetting the visible devices in `train.sh`, e.g.,
-```
-export CUDA_VISIBLE_DEVICES=0,1,2,3
-```
-4) Run test by
-```
-sh test.sh
-```
-and run the test for different saved models by using different argument `--model_path`.
-Similary, one can carry out the experiment on the Douban Conversation Corpus by going to the directory `douban` and following the same procedure.
-## __Dependencies__
- Python >= 2.7.3
- PaddlePaddle latest develop branch
-## __Citation__
-The following article describe the DAM in detail. We recommend citing this article as default.
-```
-@inproceedings{ ,
-  title={Multi-Turn Response Selection for Chatbots with Deep Attention Matching Network},
-  author={Xiangyang Zhou, Lu Li, Daxiang Dong, Yi Liu, Ying Chen, Wayne Xin Zhao, Dianhai Yu and Hua Wu},
-  booktitle={Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
-  volume={1},
-  pages={  --  },
-  year={2018}
-}
-```
--- a/PaddleNLP/unarchived/deep_attention_matching_net/_ce.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/_ce.py
-####this file is only used for continuous evaluation test!
-import os
-import sys
-sys.path.append(os.environ['ceroot'])
-from kpi import CostKpi, DurationKpi, AccKpi
-#### NOTE kpi.py should shared in models in some way!!!!
-train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
-train_duration_kpi = DurationKpi('train_duration', 0.05, 0, actived=True)
-tracking_kpis = [
-    train_cost_kpi,
-    train_duration_kpi,
-]
-def parse_log(log):
-    for line in log.split('\n'):
-        fs = line.strip().split('\t')
-        print(fs)
-        if len(fs) == 3 and fs[0] == 'kpis':
-            print("-----%s" % fs)
-            kpi_name = fs[1]
-            kpi_value = float(fs[2])
-            yield kpi_name, kpi_value
-def log_to_ce(log):
-    kpi_tracker = {}
-    for kpi in tracking_kpis:
-        kpi_tracker[kpi.name] = kpi
-    for (kpi_name, kpi_value) in parse_log(log):
-        print(kpi_name, kpi_value)
-        kpi_tracker[kpi_name].add_record(kpi_value)
-        kpi_tracker[kpi_name].persist()
-if __name__ == '__main__':
-    log = sys.stdin.read()
-    print("*****")
-    print(log)
-    print("****")
-    log_to_ce(log)
--- a/PaddleNLP/unarchived/deep_attention_matching_net/douban/download_data.sh
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/douban/download_data.sh
-url=http://dam-data.cdn.bcebos.com/douban.tar.gz
-md5=e07ca68f21c20e09efb3e8b247194405
-if  [ ! -e douban.tar.gz ]; then
-    wget -c $url
-fi
-echo "Checking md5 sum ..."
-md5sum_tmp=`md5sum douban.tar.gz | cut -d ' ' -f1`
-if [ $md5sum_tmp !=  $md5 ]; then
-    echo "Md5sum check failed, please remove and redownload douban.tar.gz"
-    exit 1
-fi
-echo "Untar douban.tar.gz ..."
-tar -xzvf douban.tar.gz 
--- a/PaddleNLP/unarchived/deep_attention_matching_net/douban/test.sh
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/douban/test.sh
-export CUDA_VISIBLE_DEVICES=0
-python -u ../test_and_evaluate.py --use_cuda \
-                --ext_eval \
-                --data_path ./data/data.pkl \
-                --save_path ./eval_3900 \
-                --model_path models/step_3900 \
-                --channel1_num 16 \
-                --batch_size 200 \
-                --vocab_size 172130 \
-                --emb_size 200 \
-                --_EOS_ 1
--- a/PaddleNLP/unarchived/deep_attention_matching_net/douban/train.sh
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/douban/train.sh
-export CUDA_VISIBLE_DEVICES=0
-export FLAGS_eager_delete_tensor_gb=0.0
-python -u ../train_and_evaluate.py --use_cuda \
-                --data_path ./data/data.pkl \
-                --ext_eval \
-                --word_emb_init ./data/word_embedding.pkl \
-                --save_path ./models \
-                --use_pyreader \
-                --batch_size 256 \
-                --vocab_size 172130 \
-                --channel1_num 16 \
-                --emb_size 200 \
-                --_EOS_ 1
--- a/PaddleNLP/unarchived/deep_attention_matching_net/images/Figure1.png
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/images/Figure1.png
--- a/PaddleNLP/unarchived/deep_attention_matching_net/images/Figure2.png
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/images/Figure2.png
--- a/PaddleNLP/unarchived/deep_attention_matching_net/model.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/model.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import six
-import numpy as np
-import paddle.fluid as fluid
-import utils.layers as layers
-class Net(object):
-    def __init__(self, max_turn_num, max_turn_len, vocab_size, emb_size,
-                 stack_num, channel1_num, channel2_num):
-        self._max_turn_num = max_turn_num
-        self._max_turn_len = max_turn_len
-        self._vocab_size = vocab_size
-        self._emb_size = emb_size
-        self._stack_num = stack_num
-        self._channel1_num = channel1_num
-        self._channel2_num = channel2_num
-        self._feed_names = []
-        self.word_emb_name = "shared_word_emb"
-        self.use_stack_op = True
-        self.use_mask_cache = True
-        self.use_sparse_embedding = True
-    def create_py_reader(self, capacity, name):
-        # turns ids
-        shapes = [[-1, self._max_turn_len, 1]
-                  for i in six.moves.xrange(self._max_turn_num)]
-        dtypes = ["int64" for i in six.moves.xrange(self._max_turn_num)]
-        # turns mask
-        shapes += [[-1, self._max_turn_len, 1]
-                   for i in six.moves.xrange(self._max_turn_num)]
-        dtypes += ["float32" for i in six.moves.xrange(self._max_turn_num)]
-        # response ids, response mask, label
-        shapes += [[-1, self._max_turn_len, 1], [-1, self._max_turn_len, 1],
-                   [-1, 1]]
-        dtypes += ["int64", "float32", "float32"]
-        py_reader = fluid.layers.py_reader(
-            capacity=capacity,
-            shapes=shapes,
-            lod_levels=[0] * (2 * self._max_turn_num + 3),
-            dtypes=dtypes,
-            name=name,
-            use_double_buffer=True)
-        data_vars = fluid.layers.read_file(py_reader)
-        self.turns_data = data_vars[0:self._max_turn_num]
-        self.turns_mask = data_vars[self._max_turn_num:2 * self._max_turn_num]
-        self.response = data_vars[-3]
-        self.response_mask = data_vars[-2]
-        self.label = data_vars[-1]
-        return py_reader
-    def create_data_layers(self):
-        self._feed_names = []
-        self.turns_data = []
-        for i in six.moves.xrange(self._max_turn_num):
-            name = "turn_%d" % i
-            turn = fluid.layers.data(
-                name=name, shape=[self._max_turn_len, 1], dtype="int64")
-            self.turns_data.append(turn)
-            self._feed_names.append(name)
-        self.turns_mask = []
-        for i in six.moves.xrange(self._max_turn_num):
-            name = "turn_mask_%d" % i
-            turn_mask = fluid.layers.data(
-                name=name, shape=[self._max_turn_len, 1], dtype="float32")
-            self.turns_mask.append(turn_mask)
-            self._feed_names.append(name)
-        self.response = fluid.layers.data(
-            name="response", shape=[self._max_turn_len, 1], dtype="int64")
-        self.response_mask = fluid.layers.data(
-            name="response_mask",
-            shape=[self._max_turn_len, 1],
-            dtype="float32")
-        self.label = fluid.layers.data(name="label", shape=[1], dtype="float32")
-        self._feed_names += ["response", "response_mask", "label"]
-    def get_feed_names(self):
-        return self._feed_names
-    def set_word_embedding(self, word_emb, place):
-        word_emb_param = fluid.global_scope().find_var(
-            self.word_emb_name).get_tensor()
-        word_emb_param.set(word_emb, place)
-    def create_network(self):
-        mask_cache = dict() if self.use_mask_cache else None
-        response_emb = fluid.layers.embedding(
-            input=self.response,
-            size=[self._vocab_size + 1, self._emb_size],
-            is_sparse=self.use_sparse_embedding,
-            param_attr=fluid.ParamAttr(
-                name=self.word_emb_name,
-                initializer=fluid.initializer.Normal(scale=0.1)))
-        # response part
-        Hr = response_emb
-        Hr_stack = [Hr]
-        for index in six.moves.xrange(self._stack_num):
-            Hr = layers.block(
-                name="response_self_stack" + str(index),
-                query=Hr,
-                key=Hr,
-                value=Hr,
-                d_key=self._emb_size,
-                q_mask=self.response_mask,
-                k_mask=self.response_mask,
-                mask_cache=mask_cache)
-            Hr_stack.append(Hr)
-        # context part
-        sim_turns = []
-        for t in six.moves.xrange(self._max_turn_num):
-            Hu = fluid.layers.embedding(
-                input=self.turns_data[t],
-                size=[self._vocab_size + 1, self._emb_size],
-                is_sparse=self.use_sparse_embedding,
-                param_attr=fluid.ParamAttr(
-                    name=self.word_emb_name,
-                    initializer=fluid.initializer.Normal(scale=0.1)))
-            Hu_stack = [Hu]
-            for index in six.moves.xrange(self._stack_num):
-                # share parameters
-                Hu = layers.block(
-                    name="turn_self_stack" + str(index),
-                    query=Hu,
-                    key=Hu,
-                    value=Hu,
-                    d_key=self._emb_size,
-                    q_mask=self.turns_mask[t],
-                    k_mask=self.turns_mask[t],
-                    mask_cache=mask_cache)
-                Hu_stack.append(Hu)
-            # cross attention
-            r_a_t_stack = []
-            t_a_r_stack = []
-            for index in six.moves.xrange(self._stack_num + 1):
-                t_a_r = layers.block(
-                    name="t_attend_r_" + str(index),
-                    query=Hu_stack[index],
-                    key=Hr_stack[index],
-                    value=Hr_stack[index],
-                    d_key=self._emb_size,
-                    q_mask=self.turns_mask[t],
-                    k_mask=self.response_mask,
-                    mask_cache=mask_cache)
-                r_a_t = layers.block(
-                    name="r_attend_t_" + str(index),
-                    query=Hr_stack[index],
-                    key=Hu_stack[index],
-                    value=Hu_stack[index],
-                    d_key=self._emb_size,
-                    q_mask=self.response_mask,
-                    k_mask=self.turns_mask[t],
-                    mask_cache=mask_cache)
-                t_a_r_stack.append(t_a_r)
-                r_a_t_stack.append(r_a_t)
-            t_a_r_stack.extend(Hu_stack)
-            r_a_t_stack.extend(Hr_stack)
-            if self.use_stack_op:
-                t_a_r = fluid.layers.stack(t_a_r_stack, axis=1)
-                r_a_t = fluid.layers.stack(r_a_t_stack, axis=1)
-            else:
-                for index in six.moves.xrange(len(t_a_r_stack)):
-                    t_a_r_stack[index] = fluid.layers.unsqueeze(
-                        input=t_a_r_stack[index], axes=[1])
-                    r_a_t_stack[index] = fluid.layers.unsqueeze(
-                        input=r_a_t_stack[index], axes=[1])
-                t_a_r = fluid.layers.concat(input=t_a_r_stack, axis=1)
-                r_a_t = fluid.layers.concat(input=r_a_t_stack, axis=1)
-            # sim shape: [batch_size, 2*(stack_num+1), max_turn_len, max_turn_len]
-            sim = fluid.layers.matmul(
-                x=t_a_r, y=r_a_t, transpose_y=True, alpha=1 / np.sqrt(200.0))
-            sim_turns.append(sim)
-        if self.use_stack_op:
-            sim = fluid.layers.stack(sim_turns, axis=2)
-        else:
-            for index in six.moves.xrange(len(sim_turns)):
-                sim_turns[index] = fluid.layers.unsqueeze(
-                    input=sim_turns[index], axes=[2])
-            # sim shape: [batch_size, 2*(stack_num+1), max_turn_num, max_turn_len, max_turn_len]
-            sim = fluid.layers.concat(input=sim_turns, axis=2)
-        final_info = layers.cnn_3d(sim, self._channel1_num, self._channel2_num)
-        loss, logits = layers.loss(final_info, self.label)
-        return loss, logits
--- a/PaddleNLP/unarchived/deep_attention_matching_net/test_and_evaluate.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/test_and_evaluate.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import six
-import numpy as np
-import time
-import argparse
-import multiprocessing
-import paddle
-import paddle.fluid as fluid
-import utils.reader as reader
-from utils.util import print_arguments, mkdir
-try:
-    import cPickle as pickle  #python 2
-except ImportError as e:
-    import pickle  #python 3
-from model import Net
-#yapf: disable
-def parse_args():
-    parser = argparse.ArgumentParser("Test for DAM.")
-    parser.add_argument(
-        '--batch_size',
-        type=int,
-        default=256,
-        help='Batch size for training. (default: %(default)d)')
-    parser.add_argument(
-        '--num_scan_data',
-        type=int,
-        default=2,
-        help='Number of pass for training. (default: %(default)d)')
-    parser.add_argument(
-        '--learning_rate',
-        type=float,
-        default=1e-3,
-        help='Learning rate used to train. (default: %(default)f)')
-    parser.add_argument(
-        '--data_path',
-        type=str,
-        default="data/ubuntu/data_small.pkl",
-        help='Path to training data. (default: %(default)s)')
-    parser.add_argument(
-        '--save_path',
-        type=str,
-        default="./",
-        help='Path to save score and result files. (default: %(default)s)')
-    parser.add_argument(
-        '--model_path',
-        type=str,
-        default="saved_models/step_1000",
-        help='Path to load well-trained models. (default: %(default)s)')
-    parser.add_argument(
-        '--use_cuda',
-        action='store_true',
-        help='If set, use cuda for training.')
-    parser.add_argument(
-        '--ext_eval',
-        action='store_true',
-        help='If set, use MAP, MRR ect for evaluation.')
-    parser.add_argument(
-        '--max_turn_num',
-        type=int,
-        default=9,
-        help='Maximum number of utterances in context.')
-    parser.add_argument(
-        '--max_turn_len',
-        type=int,
-        default=50,
-        help='Maximum length of setences in turns.')
-    parser.add_argument(
-        '--word_emb_init',
-        type=str,
-        default=None,
-        help='Path to the initial word embedding.')
-    parser.add_argument(
-        '--vocab_size',
-        type=int,
-        default=434512,
-        help='The size of vocabulary.')
-    parser.add_argument(
-        '--emb_size',
-        type=int,
-        default=200,
-        help='The dimension of word embedding.')
-    parser.add_argument(
-        '--_EOS_',
-        type=int,
-        default=28270,
-        help='The id for end of sentence in vocabulary.')
-    parser.add_argument(
-        '--stack_num',
-        type=int,
-        default=5,
-        help='The number of stacked attentive modules in network.')
-    parser.add_argument(
-        '--channel1_num',
-        type=int,
-        default=32,
-        help="The channels' number of the 1st conv3d layer's output.")
-    parser.add_argument(
-        '--channel2_num',
-        type=int,
-        default=16,
-        help="The channels' number of the 2nd conv3d layer's output.")
-    args = parser.parse_args()
-    return args
-#yapf: enable
-def test(args):
-    if not os.path.exists(args.save_path):
-        mkdir(args.save_path)
-    if not os.path.exists(args.model_path):
-        raise ValueError("Invalid model init path %s" % args.model_path)
-    # data data_config
-    data_conf = {
-        "batch_size": args.batch_size,
-        "max_turn_num": args.max_turn_num,
-        "max_turn_len": args.max_turn_len,
-        "_EOS_": args._EOS_,
-    }
-    dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size,
-              args.emb_size, args.stack_num, args.channel1_num,
-              args.channel2_num)
-    dam.create_data_layers()
-    loss, logits = dam.create_network()
-    loss.persistable = True
-    logits.persistable = True
-    # gradient clipping
-    fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
-        max=1.0, min=-1.0))
-    test_program = fluid.default_main_program().clone(for_test=True)
-    optimizer = fluid.optimizer.Adam(
-        learning_rate=fluid.layers.exponential_decay(
-            learning_rate=args.learning_rate,
-            decay_steps=400,
-            decay_rate=0.9,
-            staircase=True))
-    optimizer.minimize(loss)
-    if args.use_cuda:
-        place = fluid.CUDAPlace(0)
-        dev_count = fluid.core.get_cuda_device_count()
-    else:
-        place = fluid.CPUPlace()
-        dev_count = multiprocessing.cpu_count()
-    exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
-    fluid.io.load_persistables(exe, args.model_path)
-    test_exe = fluid.ParallelExecutor(
-        use_cuda=args.use_cuda, main_program=test_program)
-    print("start loading data ...")
-    with open(args.data_path, 'rb') as f:
-        if six.PY2:
-            train_data, val_data, test_data = pickle.load(f)
-        else:
-            train_data, val_data, test_data = pickle.load(f, encoding="bytes")
-    print("finish loading data ...")
-    if args.ext_eval:
-        import utils.douban_evaluation as eva
-        eval_metrics = ["MAP", "MRR", "P@1", "R_{10}@1", "R_{10}@2", "R_{10}@5"]
-    else:
-        import utils.evaluation as eva
-        eval_metrics = ["R_2@1", "R_{10}@1", "R_{10}@2", "R_{10}@5"]
-    test_batches = reader.build_batches(test_data, data_conf)
-    test_batch_num = len(test_batches["response"])
-    print("test batch num: %d" % test_batch_num)
-    print("begin inference ...")
-    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
-    score_path = os.path.join(args.save_path, 'score.txt')
-    score_file = open(score_path, 'w')
-    for it in six.moves.xrange(test_batch_num // dev_count):
-        feed_list = []
-        for dev in six.moves.xrange(dev_count):
-            index = it * dev_count + dev
-            batch_data = reader.make_one_batch_input(test_batches, index)
-            feed_dict = dict(zip(dam.get_feed_names(), batch_data))
-            feed_list.append(feed_dict)
-        predicts = test_exe.run(feed=feed_list, fetch_list=[logits.name])
-        scores = np.array(predicts[0])
-        print("step = %d" % it)
-        for dev in six.moves.xrange(dev_count):
-            index = it * dev_count + dev
-            for i in six.moves.xrange(args.batch_size):
-                score_file.write(
-                    str(scores[args.batch_size * dev + i][0]) + '\t' + str(
-                        test_batches["label"][index][i]) + '\n')
-    score_file.close()
-    #write evaluation result
-    result = eva.evaluate(score_path)
-    result_file_path = os.path.join(args.save_path, 'result.txt')
-    with open(result_file_path, 'w') as out_file:
-        for metric, p_at in zip(eval_metrics, result):
-            out_file.write(metric + ": " + str(p_at) + '\n')
-    print('finish test')
-    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
-if __name__ == '__main__':
-    args = parse_args()
-    print_arguments(args)
-    test(args)
--- a/PaddleNLP/unarchived/deep_attention_matching_net/train_and_evaluate.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/train_and_evaluate.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import six
-import numpy as np
-import time
-import argparse
-import multiprocessing
-import paddle
-import paddle.fluid as fluid
-import utils.reader as reader
-from utils.util import print_arguments, mkdir
-try:
-    import cPickle as pickle  #python 2
-except ImportError as e:
-    import pickle  #python 3
-from model import Net
-#yapf: disable
-def parse_args():
-    parser = argparse.ArgumentParser("Training DAM.")
-    parser.add_argument(
-        '--batch_size',
-        type=int,
-        default=256,
-        help='Batch size for training. (default: %(default)d)')
-    parser.add_argument(
-        '--num_scan_data',
-        type=int,
-        default=2,
-        help='Number of pass for training. (default: %(default)d)')
-    parser.add_argument(
-        '--learning_rate',
-        type=float,
-        default=1e-3,
-        help='Learning rate used to train. (default: %(default)f)')
-    parser.add_argument(
-        '--data_path',
-        type=str,
-        default="data/data_small.pkl",
-        help='Path to training data. (default: %(default)s)')
-    parser.add_argument(
-        '--save_path',
-        type=str,
-        default="saved_models",
-        help='Path to save trained models. (default: %(default)s)')
-    parser.add_argument(
-        '--use_cuda',
-        action='store_true',
-        help='If set, use cuda for training.')
-    parser.add_argument(
-        '--use_pyreader',
-        action='store_true',
-        help='If set, use pyreader for reading data.')
-    parser.add_argument(
-        '--ext_eval',
-        action='store_true',
-        help='If set, use MAP, MRR ect for evaluation.')
-    parser.add_argument(
-        '--max_turn_num',
-        type=int,
-        default=9,
-        help='Maximum number of utterances in context.')
-    parser.add_argument(
-        '--max_turn_len',
-        type=int,
-        default=50,
-        help='Maximum length of setences in turns.')
-    parser.add_argument(
-        '--word_emb_init',
-        type=str,
-        default=None,
-        help='Path to the initial word embedding.')
-    parser.add_argument(
-        '--vocab_size',
-        type=int,
-        default=434512,
-        help='The size of vocabulary.')
-    parser.add_argument(
-        '--emb_size',
-        type=int,
-        default=200,
-        help='The dimension of word embedding.')
-    parser.add_argument(
-        '--_EOS_',
-        type=int,
-        default=28270,
-        help='The id for the end of sentence in vocabulary.')
-    parser.add_argument(
-        '--stack_num',
-        type=int,
-        default=5,
-        help='The number of stacked attentive modules in network.')
-    parser.add_argument(
-        '--channel1_num',
-        type=int,
-        default=32,
-        help="The channels' number of the 1st conv3d layer's output.")
-    parser.add_argument(
-        '--channel2_num',
-        type=int,
-        default=16,
-        help="The channels' number of the 2nd conv3d layer's output.")
-    args = parser.parse_args()
-    return args
-#yapf: enable
-def evaluate(score_path, result_file_path):
-    if args.ext_eval:
-        import utils.douban_evaluation as eva
-    else:
-        import utils.evaluation as eva
-    #write evaluation result
-    result = eva.evaluate(score_path)
-    with open(result_file_path, 'w') as out_file:
-        for p_at in result:
-            out_file.write(str(p_at) + '\n')
-    print('finish evaluation')
-    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
-def test_with_feed(exe, program, feed_names, fetch_list, score_path, batches,
-                   batch_num, dev_count):
-    score_file = open(score_path, 'w')
-    for it in six.moves.xrange(batch_num // dev_count):
-        feed_list = []
-        for dev in six.moves.xrange(dev_count):
-            val_index = it * dev_count + dev
-            batch_data = reader.make_one_batch_input(batches, val_index)
-            feed_dict = dict(zip(feed_names, batch_data))
-            feed_list.append(feed_dict)
-            predicts = exe.run(feed=feed_list, fetch_list=fetch_list)
-            scores = np.array(predicts[0])
-            for dev in six.moves.xrange(dev_count):
-                val_index = it * dev_count + dev
-                for i in six.moves.xrange(args.batch_size):
-                    score_file.write(
-                        str(scores[args.batch_size * dev + i][0]) + '\t' + str(
-                            batches["label"][val_index][i]) + '\n')
-    score_file.close()
-def test_with_pyreader(exe, program, pyreader, fetch_list, score_path, batches,
-                       batch_num, dev_count):
-    def data_provider():
-        for index in six.moves.xrange(batch_num):
-            yield reader.make_one_batch_input(batches, index)
-    score_file = open(score_path, 'w')
-    pyreader.decorate_tensor_provider(data_provider)
-    it = 0
-    pyreader.start()
-    while True:
-        try:
-            predicts = exe.run(fetch_list=fetch_list)
-            scores = np.array(predicts[0])
-            for dev in six.moves.xrange(dev_count):
-                val_index = it * dev_count + dev
-                for i in six.moves.xrange(args.batch_size):
-                    score_file.write(
-                        str(scores[args.batch_size * dev + i][0]) + '\t' + str(
-                            batches["label"][val_index][i]) + '\n')
-            it += 1
-        except fluid.core.EOFException:
-            pyreader.reset()
-            break
-    score_file.close()
-def train(args):
-    if not os.path.exists(args.save_path):
-        os.makedirs(args.save_path)
-    # data data_config
-    data_conf = {
-        "batch_size": args.batch_size,
-        "max_turn_num": args.max_turn_num,
-        "max_turn_len": args.max_turn_len,
-        "_EOS_": args._EOS_,
-    }
-    dam = Net(args.max_turn_num, args.max_turn_len, args.vocab_size,
-              args.emb_size, args.stack_num, args.channel1_num,
-              args.channel2_num)
-    train_program = fluid.Program()
-    train_startup = fluid.Program()
-    if "CE_MODE_X" in os.environ:
-        train_program.random_seed = 110
-        train_startup.random_seed = 110
-    with fluid.program_guard(train_program, train_startup):
-        with fluid.unique_name.guard():
-            if args.use_pyreader:
-                train_pyreader = dam.create_py_reader(
-                    capacity=10, name='train_reader')
-            else:
-                dam.create_data_layers()
-            loss, logits = dam.create_network()
-            loss.persistable = True
-            logits.persistable = True
-            # gradient clipping
-            fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByValue(
-                max=1.0, min=-1.0))
-            optimizer = fluid.optimizer.Adam(
-                learning_rate=fluid.layers.exponential_decay(
-                    learning_rate=args.learning_rate,
-                    decay_steps=400,
-                    decay_rate=0.9,
-                    staircase=True))
-            optimizer.minimize(loss)
-    test_program = fluid.Program()
-    test_startup = fluid.Program()
-    if "CE_MODE_X" in os.environ:
-        test_program.random_seed = 110
-        test_startup.random_seed = 110
-    with fluid.program_guard(test_program, test_startup):
-        with fluid.unique_name.guard():
-            if args.use_pyreader:
-                test_pyreader = dam.create_py_reader(
-                    capacity=10, name='test_reader')
-            else:
-                dam.create_data_layers()
-            loss, logits = dam.create_network()
-            loss.persistable = True
-            logits.persistable = True
-    test_program = test_program.clone(for_test=True)
-    if args.use_cuda:
-        place = fluid.CUDAPlace(0)
-        dev_count = fluid.core.get_cuda_device_count()
-    else:
-        place = fluid.CPUPlace()
-        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-    print("device count %d" % dev_count)
-    print("theoretical memory usage: ")
-    print(fluid.contrib.memory_usage(
-        program=train_program, batch_size=args.batch_size))
-    exe = fluid.Executor(place)
-    exe.run(train_startup)
-    exe.run(test_startup)
-    train_exe = fluid.ParallelExecutor(
-        use_cuda=args.use_cuda, loss_name=loss.name, main_program=train_program)
-    test_exe = fluid.ParallelExecutor(
-        use_cuda=args.use_cuda,
-        main_program=test_program,
-        share_vars_from=train_exe)
-    if args.word_emb_init is not None:
-        print("start loading word embedding init ...")
-        if six.PY2:
-            word_emb = np.array(pickle.load(open(args.word_emb_init,
-                                                 'rb'))).astype('float32')
-        else:
-            word_emb = np.array(
-                pickle.load(
-                    open(args.word_emb_init, 'rb'), encoding="bytes")).astype(
-                        'float32')
-        dam.set_word_embedding(word_emb, place)
-        print("finish init word embedding  ...")
-    print("start loading data ...")
-    with open(args.data_path, 'rb') as f:
-        if six.PY2:
-            train_data, val_data, test_data = pickle.load(f)
-        else:
-            train_data, val_data, test_data = pickle.load(f, encoding="bytes")
-    print("finish loading data ...")
-    val_batches = reader.build_batches(val_data, data_conf)
-    batch_num = len(train_data[six.b('y')]) // args.batch_size
-    val_batch_num = len(val_batches["response"])
-    print_step = max(1, batch_num // (dev_count * 100))
-    save_step = max(1, batch_num // (dev_count * 10))
-    print("begin model training ...")
-    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
-    # train on one epoch data by feeding
-    def train_with_feed(step):
-        ave_cost = 0.0
-        for it in six.moves.xrange(batch_num // dev_count):
-            feed_list = []
-            for dev in six.moves.xrange(dev_count):
-                index = it * dev_count + dev
-                batch_data = reader.make_one_batch_input(train_batches, index)
-                feed_dict = dict(zip(dam.get_feed_names(), batch_data))
-                feed_list.append(feed_dict)
-            cost = train_exe.run(feed=feed_list, fetch_list=[loss.name])
-            ave_cost += np.array(cost[0]).mean()
-            step = step + 1
-            if step % print_step == 0:
-                print("processed: [" + str(step * dev_count * 1.0 / batch_num) +
-                      "] ave loss: [" + str(ave_cost / print_step) + "]")
-                ave_cost = 0.0
-            if (args.save_path is not None) and (step % save_step == 0):
-                save_path = os.path.join(args.save_path, "step_" + str(step))
-                print("Save model at step %d ... " % step)
-                print(time.strftime('%Y-%m-%d %H:%M:%S',
-                                    time.localtime(time.time())))
-                fluid.io.save_persistables(exe, save_path, train_program)
-                score_path = os.path.join(args.save_path, 'score.' + str(step))
-                test_with_feed(test_exe, test_program,
-                               dam.get_feed_names(), [logits.name], score_path,
-                               val_batches, val_batch_num, dev_count)
-                result_file_path = os.path.join(args.save_path,
-                                                'result.' + str(step))
-                evaluate(score_path, result_file_path)
-        return step, np.array(cost[0]).mean()
-    # train on one epoch with pyreader
-    def train_with_pyreader(step):
-        def data_provider():
-            for index in six.moves.xrange(batch_num):
-                yield reader.make_one_batch_input(train_batches, index)
-        train_pyreader.decorate_tensor_provider(data_provider)
-        ave_cost = 0.0
-        train_pyreader.start()
-        while True:
-            try:
-                cost = train_exe.run(fetch_list=[loss.name])
-                ave_cost += np.array(cost[0]).mean()
-                step = step + 1
-                if step % print_step == 0:
-                    print("processed: [" + str(step * dev_count * 1.0 /
-                                               batch_num) + "] ave loss: [" +
-                          str(ave_cost / print_step) + "]")
-                    ave_cost = 0.0
-                if (args.save_path is not None) and (step % save_step == 0):
-                    save_path = os.path.join(args.save_path,
-                                             "step_" + str(step))
-                    print("Save model at step %d ... " % step)
-                    print(time.strftime('%Y-%m-%d %H:%M:%S',
-                                        time.localtime(time.time())))
-                    fluid.io.save_persistables(exe, save_path, train_program)
-                    score_path = os.path.join(args.save_path,
-                                              'score.' + str(step))
-                    test_with_pyreader(test_exe, test_program, test_pyreader,
-                                       [logits.name], score_path, val_batches,
-                                       val_batch_num, dev_count)
-                    result_file_path = os.path.join(args.save_path,
-                                                    'result.' + str(step))
-                    evaluate(score_path, result_file_path)
-            except fluid.core.EOFException:
-                train_pyreader.reset()
-                break
-        return step, np.array(cost[0]).mean()
-    # train over different epoches
-    global_step, train_time = 0, 0.0
-    for epoch in six.moves.xrange(args.num_scan_data):
-        shuffle_train = reader.unison_shuffle(
-            train_data, seed=110 if ("CE_MODE_X" in os.environ) else None)
-        train_batches = reader.build_batches(shuffle_train, data_conf)
-        begin_time = time.time()
-        if args.use_pyreader:
-            global_step, last_cost = train_with_pyreader(global_step)
-        else:
-            global_step, last_cost = train_with_feed(global_step)
-        pass_time_cost = time.time() - begin_time
-        train_time += pass_time_cost
-        print("Pass {0}, pass_time_cost {1}"
-              .format(epoch, "%2.2f sec" % pass_time_cost))
-    # For internal continuous evaluation
-    if "CE_MODE_X" in os.environ:
-        print("kpis	train_cost	%f" % last_cost)
-        print("kpis	train_duration	%f" % train_time)
-if __name__ == '__main__':
-    args = parse_args()
-    print_arguments(args)
-    train(args)
--- a/PaddleNLP/unarchived/deep_attention_matching_net/ubuntu/download_data.sh
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/ubuntu/download_data.sh
-url=http://dam-data.cdn.bcebos.com/ubuntu.tar.gz
-md5=9d7db116a040530a16f68dc0ab44e4b6
-if  [ ! -e ubuntu.tar.gz ]; then
-    wget -c $url
-fi
-echo "Checking md5 sum ..."
-md5sum_tmp=`md5sum ubuntu.tar.gz | cut -d ' ' -f1`
-if [ $md5sum_tmp !=  $md5 ]; then
-    echo "Md5sum check failed, please remove and redownload ubuntu.tar.gz"
-    exit 1
-fi
-echo "Untar ubuntu.tar.gz ..."
-tar -xzvf ubuntu.tar.gz 
--- a/PaddleNLP/unarchived/deep_attention_matching_net/ubuntu/test.sh
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/ubuntu/test.sh
-export CUDA_VISIBLE_DEVICES=0
-python -u ../test_and_evaluate.py --use_cuda \
-                --data_path ./data/data.pkl \
-                --save_path ./step_3900 \
-                --model_path ./models/step_3900 \
-                --batch_size 200 \
-                --vocab_size 434512 \
-                --emb_size 200 \
-                --_EOS_ 28270
--- a/PaddleNLP/unarchived/deep_attention_matching_net/ubuntu/train.sh
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/ubuntu/train.sh
-export CUDA_VISIBLE_DEVICES=0
-export FLAGS_eager_delete_tensor_gb=0.0
-python -u ../train_and_evaluate.py --use_cuda \
-                --data_path ./data/data.pkl \
-                --word_emb_init ./data/word_embedding.pkl \
-                --save_path ./models \
-                --use_pyreader \
-                --batch_size 256 \
-                --vocab_size 434512 \
-                --emb_size 200 \
-                --_EOS_ 28270
--- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/__init__.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/__init__.py
--- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/douban_evaluation.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/douban_evaluation.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import six
-import numpy as np
-from sklearn.metrics import average_precision_score
-def mean_average_precision(sort_data):
-    #to do
-    count_1 = 0
-    sum_precision = 0
-    for index in six.moves.xrange(len(sort_data)):
-        if sort_data[index][1] == 1:
-            count_1 += 1
-            sum_precision += 1.0 * count_1 / (index + 1)
-    return sum_precision / count_1
-def mean_reciprocal_rank(sort_data):
-    sort_lable = [s_d[1] for s_d in sort_data]
-    assert 1 in sort_lable
-    return 1.0 / (1 + sort_lable.index(1))
-def precision_at_position_1(sort_data):
-    if sort_data[0][1] == 1:
-        return 1
-    else:
-        return 0
-def recall_at_position_k_in_10(sort_data, k):
-    sort_lable = [s_d[1] for s_d in sort_data]
-    select_lable = sort_lable[:k]
-    return 1.0 * select_lable.count(1) / sort_lable.count(1)
-def evaluation_one_session(data):
-    sort_data = sorted(data, key=lambda x: x[0], reverse=True)
-    m_a_p = mean_average_precision(sort_data)
-    m_r_r = mean_reciprocal_rank(sort_data)
-    p_1 = precision_at_position_1(sort_data)
-    r_1 = recall_at_position_k_in_10(sort_data, 1)
-    r_2 = recall_at_position_k_in_10(sort_data, 2)
-    r_5 = recall_at_position_k_in_10(sort_data, 5)
-    return m_a_p, m_r_r, p_1, r_1, r_2, r_5
-def evaluate(file_path):
-    sum_m_a_p = 0
-    sum_m_r_r = 0
-    sum_p_1 = 0
-    sum_r_1 = 0
-    sum_r_2 = 0
-    sum_r_5 = 0
-    i = 0
-    total_num = 0
-    with open(file_path, 'r') as infile:
-        for line in infile:
-            if i % 10 == 0:
-                data = []
-            tokens = line.strip().split('\t')
-            data.append((float(tokens[0]), int(tokens[1])))
-            if i % 10 == 9:
-                total_num += 1
-                m_a_p, m_r_r, p_1, r_1, r_2, r_5 = evaluation_one_session(data)
-                sum_m_a_p += m_a_p
-                sum_m_r_r += m_r_r
-                sum_p_1 += p_1
-                sum_r_1 += r_1
-                sum_r_2 += r_2
-                sum_r_5 += r_5
-            i += 1
-    #print('total num: %s' %total_num)
-    #print('MAP: %s' %(1.0*sum_m_a_p/total_num))
-    #print('MRR: %s' %(1.0*sum_m_r_r/total_num))
-    #print('P@1: %s' %(1.0*sum_p_1/total_num))
-    return (1.0 * sum_m_a_p / total_num, 1.0 * sum_m_r_r / total_num,
-            1.0 * sum_p_1 / total_num, 1.0 * sum_r_1 / total_num,
-            1.0 * sum_r_2 / total_num, 1.0 * sum_r_5 / total_num)
-if __name__ == '__main__':
-    result = evaluate(sys.argv[1])
-    for r in result:
-        print(r)
--- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/evaluation.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/evaluation.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import six
-def get_p_at_n_in_m(data, n, m, ind):
-    pos_score = data[ind][0]
-    curr = data[ind:ind + m]
-    curr = sorted(curr, key=lambda x: x[0], reverse=True)
-    if curr[n - 1][0] <= pos_score:
-        return 1
-    return 0
-def evaluate(file_path):
-    data = []
-    with open(file_path, 'r') as file:
-        for line in file:
-            line = line.strip()
-            tokens = line.split("\t")
-            if len(tokens) != 2:
-                continue
-            data.append((float(tokens[0]), int(tokens[1])))
-    #assert len(data) % 10 == 0
-    p_at_1_in_2 = 0.0
-    p_at_1_in_10 = 0.0
-    p_at_2_in_10 = 0.0
-    p_at_5_in_10 = 0.0
-    length = len(data) // 10
-    for i in six.moves.xrange(0, length):
-        ind = i * 10
-        assert data[ind][1] == 1
-        p_at_1_in_2 += get_p_at_n_in_m(data, 1, 2, ind)
-        p_at_1_in_10 += get_p_at_n_in_m(data, 1, 10, ind)
-        p_at_2_in_10 += get_p_at_n_in_m(data, 2, 10, ind)
-        p_at_5_in_10 += get_p_at_n_in_m(data, 5, 10, ind)
-    return (p_at_1_in_2 / length, p_at_1_in_10 / length, p_at_2_in_10 / length,
-            p_at_5_in_10 / length)
--- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/layers.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/layers.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import paddle.fluid as fluid
-def loss(x, y, clip_value=10.0):
-    """Calculate the sigmoid cross entropy with logits for input(x).
-    Args:
-        x: Variable with shape with shape [batch, dim]
-        y: Input label
-    Returns:
-        loss: cross entropy
-        logits: prediction
-    """
-    logits = fluid.layers.fc(
-        input=x,
-        size=1,
-        bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(0.)))
-    loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=logits, label=y)
-    loss = fluid.layers.reduce_mean(
-        fluid.layers.clip(
-            loss, min=-clip_value, max=clip_value))
-    return loss, logits
-def ffn(input, d_inner_hid, d_hid, name=None):
-    """Position-wise Feed-Forward Network
-    """
-    hidden = fluid.layers.fc(input=input,
-                             size=d_inner_hid,
-                             num_flatten_dims=2,
-                             param_attr=fluid.ParamAttr(name=name + '_fc.w_0'),
-                             bias_attr=fluid.ParamAttr(
-                                 name=name + '_fc.b_0',
-                                 initializer=fluid.initializer.Constant(0.)),
-                             act="relu")
-    out = fluid.layers.fc(input=hidden,
-                          size=d_hid,
-                          num_flatten_dims=2,
-                          param_attr=fluid.ParamAttr(name=name + '_fc.w_1'),
-                          bias_attr=fluid.ParamAttr(
-                              name=name + '_fc.b_1',
-                              initializer=fluid.initializer.Constant(0.)))
-    return out
-def dot_product_attention(query,
-                          key,
-                          value,
-                          d_key,
-                          q_mask=None,
-                          k_mask=None,
-                          dropout_rate=None,
-                          mask_cache=None):
-    """Dot product layer.
-     Args:
-         query: a tensor with shape [batch, Q_time, Q_dimension]
-         key: a tensor with shape [batch, time, K_dimension]
-         value: a tensor with shape [batch, time, V_dimension]
-         q_lengths: a tensor with shape [batch]
-         k_lengths: a tensor with shape [batch]
-     Returns:
-         a tensor with shape [batch, query_time, value_dimension]
-     Raises:
-         AssertionError: if Q_dimension not equal to K_dimension when attention 
-                        type is dot.
-    """
-    logits = fluid.layers.matmul(
-        x=query, y=key, transpose_y=True, alpha=d_key**(-0.5))
-    if (q_mask is not None) and (k_mask is not None):
-        if mask_cache is not None and q_mask.name in mask_cache and k_mask.name in mask_cache[
-                q_mask.name]:
-            mask, another_mask = mask_cache[q_mask.name][k_mask.name]
-        else:
-            mask = fluid.layers.matmul(x=q_mask, y=k_mask, transpose_y=True)
-            another_mask = fluid.layers.scale(
-                mask,
-                scale=float(2**32 - 1),
-                bias=float(-1),
-                bias_after_scale=False)
-            if mask_cache is not None:
-                if q_mask.name not in mask_cache:
-                    mask_cache[q_mask.name] = dict()
-                mask_cache[q_mask.name][k_mask.name] = [mask, another_mask]
-        logits = mask * logits + another_mask
-    attention = fluid.layers.softmax(logits)
-    if dropout_rate:
-        attention = fluid.layers.dropout(
-            input=attention, dropout_prob=dropout_rate, is_test=False, seed=2)
-    atten_out = fluid.layers.matmul(x=attention, y=value)
-    return atten_out
-def block(name,
-          query,
-          key,
-          value,
-          d_key,
-          q_mask=None,
-          k_mask=None,
-          is_layer_norm=True,
-          dropout_rate=None,
-          mask_cache=None):
-    """
-    """
-    att_out = dot_product_attention(
-        query,
-        key,
-        value,
-        d_key,
-        q_mask,
-        k_mask,
-        dropout_rate,
-        mask_cache=mask_cache)
-    y = query + att_out
-    if is_layer_norm:
-        y = fluid.layers.layer_norm(
-            input=y,
-            begin_norm_axis=len(y.shape) - 1,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(1.),
-                name=name + '_layer_norm.w_0'),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(0.),
-                name=name + '_layer_norm.b_0'))
-    z = ffn(y, d_key, d_key, name)
-    w = y + z
-    if is_layer_norm:
-        w = fluid.layers.layer_norm(
-            input=w,
-            begin_norm_axis=len(w.shape) - 1,
-            param_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(1.),
-                name=name + '_layer_norm.w_1'),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(0.),
-                name=name + '_layer_norm.b_1'))
-    return w
-def cnn_3d(input, out_channels_0, out_channels_1, add_relu=True):
-    # same padding
-    conv_0 = fluid.layers.conv3d(
-        name="conv3d_0",
-        input=input,
-        num_filters=out_channels_0,
-        filter_size=[3, 3, 3],
-        padding=[1, 1, 1],
-        act="elu" if add_relu else None,
-        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
-            low=-0.01, high=0.01)),
-        bias_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.0)))
-    # same padding 
-    pooling_0 = fluid.layers.pool3d(
-        input=conv_0,
-        pool_type="max",
-        pool_size=3,
-        pool_padding=1,
-        pool_stride=3)
-    conv_1 = fluid.layers.conv3d(
-        name="conv3d_1",
-        input=pooling_0,
-        num_filters=out_channels_1,
-        filter_size=[3, 3, 3],
-        padding=[1, 1, 1],
-        act="elu" if add_relu else None,
-        param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
-            low=-0.01, high=0.01)),
-        bias_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Constant(value=0.0)))
-    # same padding 
-    pooling_1 = fluid.layers.pool3d(
-        input=conv_1,
-        pool_type="max",
-        pool_size=3,
-        pool_padding=1,
-        pool_stride=3)
-    return pooling_1
--- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/reader.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/reader.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import six
-import numpy as np
-try:
-    import cPickle as pickle  #python 2
-except ImportError as e:
-    import pickle  #python 3
-def unison_shuffle(data, seed=None):
-    if seed is not None:
-        np.random.seed(seed)
-    y = np.array(data[six.b('y')])
-    c = np.array(data[six.b('c')])
-    r = np.array(data[six.b('r')])
-    assert len(y) == len(c) == len(r)
-    p = np.random.permutation(len(y))
-    print(p)
-    shuffle_data = {six.b('y'): y[p], six.b('c'): c[p], six.b('r'): r[p]}
-    return shuffle_data
-def split_c(c, split_id):
-    '''c is a list, example context
-       split_id is a integer, conf[_EOS_]
-       return nested list
-    '''
-    turns = [[]]
-    for _id in c:
-        if _id != split_id:
-            turns[-1].append(_id)
-        else:
-            turns.append([])
-    if turns[-1] == [] and len(turns) > 1:
-        turns.pop()
-    return turns
-def normalize_length(_list, length, cut_type='tail'):
-    '''_list is a list or nested list, example turns/r/single turn c
-       cut_type is head or tail, if _list len > length is used
-       return a list len=length and min(read_length, length)
-    '''
-    real_length = len(_list)
-    if real_length == 0:
-        return [0] * length, 0
-    if real_length <= length:
-        if not isinstance(_list[0], list):
-            _list.extend([0] * (length - real_length))
-        else:
-            _list.extend([[]] * (length - real_length))
-        return _list, real_length
-    if cut_type == 'head':
-        return _list[:length], length
-    if cut_type == 'tail':
-        return _list[-length:], length
-def produce_one_sample(data,
-                       index,
-                       split_id,
-                       max_turn_num,
-                       max_turn_len,
-                       turn_cut_type='tail',
-                       term_cut_type='tail'):
-    '''max_turn_num=10
-       max_turn_len=50
-       return y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len
-    '''
-    c = data[six.b('c')][index]
-    r = data[six.b('r')][index][:]
-    y = data[six.b('y')][index]
-    turns = split_c(c, split_id)
-    #normalize turns_c length, nor_turns length is max_turn_num
-    nor_turns, turn_len = normalize_length(turns, max_turn_num, turn_cut_type)
-    nor_turns_nor_c = []
-    term_len = []
-    #nor_turn_nor_c length is max_turn_num, element is a list length is max_turn_len
-    for c in nor_turns:
-        #nor_c length is max_turn_len
-        nor_c, nor_c_len = normalize_length(c, max_turn_len, term_cut_type)
-        nor_turns_nor_c.append(nor_c)
-        term_len.append(nor_c_len)
-    nor_r, r_len = normalize_length(r, max_turn_len, term_cut_type)
-    return y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len
-def build_one_batch(data,
-                    batch_index,
-                    conf,
-                    turn_cut_type='tail',
-                    term_cut_type='tail'):
-    _turns = []
-    _tt_turns_len = []
-    _every_turn_len = []
-    _response = []
-    _response_len = []
-    _label = []
-    for i in six.moves.xrange(conf['batch_size']):
-        index = batch_index * conf['batch_size'] + i
-        y, nor_turns_nor_c, nor_r, turn_len, term_len, r_len = produce_one_sample(
-            data, index, conf['_EOS_'], conf['max_turn_num'],
-            conf['max_turn_len'], turn_cut_type, term_cut_type)
-        _label.append(y)
-        _turns.append(nor_turns_nor_c)
-        _response.append(nor_r)
-        _every_turn_len.append(term_len)
-        _tt_turns_len.append(turn_len)
-        _response_len.append(r_len)
-    return _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label
-def build_one_batch_dict(data,
-                         batch_index,
-                         conf,
-                         turn_cut_type='tail',
-                         term_cut_type='tail'):
-    _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label = build_one_batch(
-        data, batch_index, conf, turn_cut_type, term_cut_type)
-    ans = {
-        'turns': _turns,
-        'tt_turns_len': _tt_turns_len,
-        'every_turn_len': _every_turn_len,
-        'response': _response,
-        'response_len': _response_len,
-        'label': _label
-    }
-    return ans
-def build_batches(data, conf, turn_cut_type='tail', term_cut_type='tail'):
-    _turns_batches = []
-    _tt_turns_len_batches = []
-    _every_turn_len_batches = []
-    _response_batches = []
-    _response_len_batches = []
-    _label_batches = []
-    batch_len = len(data[six.b('y')]) // conf['batch_size']
-    for batch_index in six.moves.range(batch_len):
-        _turns, _tt_turns_len, _every_turn_len, _response, _response_len, _label = build_one_batch(
-            data, batch_index, conf, turn_cut_type='tail', term_cut_type='tail')
-        _turns_batches.append(_turns)
-        _tt_turns_len_batches.append(_tt_turns_len)
-        _every_turn_len_batches.append(_every_turn_len)
-        _response_batches.append(_response)
-        _response_len_batches.append(_response_len)
-        _label_batches.append(_label)
-    ans = {
-        "turns": _turns_batches,
-        "tt_turns_len": _tt_turns_len_batches,
-        "every_turn_len": _every_turn_len_batches,
-        "response": _response_batches,
-        "response_len": _response_len_batches,
-        "label": _label_batches
-    }
-    return ans
-def make_one_batch_input(data_batches, index):
-    """Split turns and return feeding data.
-    Args:
-        data_batches: All data batches
-        index: The index for current batch
-    Return:
-        feeding dictionary
-    """
-    turns = np.array(data_batches["turns"][index]).astype('int64')
-    tt_turns_len = np.array(data_batches["tt_turns_len"][index]).astype('int64')
-    every_turn_len = np.array(data_batches["every_turn_len"][index]).astype(
-        'int64')
-    response = np.array(data_batches["response"][index]).astype('int64')
-    response_len = np.array(data_batches["response_len"][index]).astype('int64')
-    batch_size = turns.shape[0]
-    max_turn_num = turns.shape[1]
-    max_turn_len = turns.shape[2]
-    turns_list = [turns[:, i, :] for i in six.moves.xrange(max_turn_num)]
-    every_turn_len_list = [
-        every_turn_len[:, i] for i in six.moves.xrange(max_turn_num)
-    ]
-    feed_list = []
-    for i, turn in enumerate(turns_list):
-        turn = np.expand_dims(turn, axis=-1)
-        feed_list.append(turn)
-    for i, turn_len in enumerate(every_turn_len_list):
-        turn_mask = np.ones((batch_size, max_turn_len, 1)).astype("float32")
-        for row in six.moves.xrange(batch_size):
-            turn_mask[row, turn_len[row]:, 0] = 0
-        feed_list.append(turn_mask)
-    response = np.expand_dims(response, axis=-1)
-    feed_list.append(response)
-    response_mask = np.ones((batch_size, max_turn_len, 1)).astype("float32")
-    for row in six.moves.xrange(batch_size):
-        response_mask[row, response_len[row]:, 0] = 0
-    feed_list.append(response_mask)
-    label = np.array([data_batches["label"][index]]).reshape(
-        [-1, 1]).astype("float32")
-    feed_list.append(label)
-    return feed_list
-if __name__ == '__main__':
-    conf = {
-        "batch_size": 256,
-        "max_turn_num": 10,
-        "max_turn_len": 50,
-        "_EOS_": 28270,
-    }
-    with open('../ubuntu/data/data_small.pkl', 'rb') as f:
-        if six.PY2:
-            train, val, test = pickle.load(f)
-        else:
-            train, val, test = pickle.load(f, encoding="bytes")
-    print('load data success')
-    train_batches = build_batches(train, conf)
-    val_batches = build_batches(val, conf)
-    test_batches = build_batches(test, conf)
-    print('build batches success')
--- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/util.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/util.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import six
-import os
-def print_arguments(args):
-    print('-----------  Configuration Arguments -----------')
-    for arg, value in sorted(six.iteritems(vars(args))):
-        print('%s: %s' % (arg, value))
-    print('------------------------------------------------')
-def mkdir(path):
-    if not os.path.isdir(path):
-        mkdir(os.path.split(path)[0])
-    else:
-        return
-    os.mkdir(path)
-def pos_encoding_init():
-    pass
-def scaled_dot_product_attention():
-    pass
--- a/PaddleNLP/unarchived/language_model/gru/.run_ce.sh
+++ b/PaddleNLP/unarchived/language_model/gru/.run_ce.sh
-#!/bin/bash
-export MKL_NUM_THREADS=1
-export OMP_NUM_THREADS=1
-cudaid=${language_model:=0} # use 0-th card as default
-export CUDA_VISIBLE_DEVICES=$cudaid
-FLAGS_benchmark=true  python train.py --enable_ce | python _ce.py
-cudaid=${language_model_m:=0,1,2,3} # use 0,1,2,3 card as default
-export CUDA_VISIBLE_DEVICES=$cudaid
-FLAGS_benchmark=true  python train.py --enable_ce | python _ce.py
--- a/PaddleNLP/unarchived/language_model/gru/README.md
+++ b/PaddleNLP/unarchived/language_model/gru/README.md
-# 语言模型
-以下是本例的简要目录结构及说明：
-```text
-.
-├── README.md            # 文档
-├── train.py             # 训练脚本
-├── infer.py             # 预测脚本
-└── utils.py             # 通用函数
-```
-## 简介
-循环神经网络语言模型的介绍可以参阅论文[Recurrent Neural Network Regularization](https://arxiv.org/abs/1409.2329)，在本例中，我们实现了GRU-RNN语言模型。
-## 训练
-运行命令 `python train.py` 开始训练模型。
-```python
-python train.py
-```
-当前支持的参数可参见[train.py](./train.py) `train_net` 函数
-```python
-vocab, train_reader, test_reader = utils.prepare_data(
-        batch_size=20, # batch size
-        buffer_size=1000, # buffer size, default value is OK
-        word_freq_threshold=0) # vocabulary related parameter, and words with frequency below this value will be filtered
-train(train_reader=train_reader,
-        vocab=vocab,
-        network=network,
-        hid_size=200, # embedding and hidden size
-        base_lr=1.0, # base learning rate
-        batch_size=20, # batch size, the same as that in prepare_data
-        pass_num=12, # the number of passes for training
-        use_cuda=True, # whether to use GPU card
-        parallel=False, # whether to be parallel
-        model_dir="model", # directory to save model
-        init_low_bound=-0.1, # uniform parameter initialization lower bound
-        init_high_bound=0.1) # uniform parameter initialization upper bound
-```
-## 自定义网络结构
-可在[train.py](./train.py) `network` 函数中调整网络结构，当前的网络结构如下：
-```python
-emb = fluid.layers.embedding(input=src, size=[vocab_size, hid_size],
-        param_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
-            learning_rate=emb_lr_x),
-        is_sparse=True)
-fc0 = fluid.layers.fc(input=emb, size=hid_size * 3,
-        param_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
-            learning_rate=gru_lr_x))
-gru_h0 = fluid.layers.dynamic_gru(input=fc0, size=hid_size,
-        param_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
-            learning_rate=gru_lr_x))
-fc = fluid.layers.fc(input=gru_h0, size=vocab_size, act='softmax',
-        param_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Uniform(low=init_low_bound, high=init_high_bound),
-            learning_rate=fc_lr_x))
-cost = fluid.layers.cross_entropy(input=fc, label=dst)
-```
-## 训练结果示例
-我们在Tesla K40m单GPU卡上训练的日志如下所示
-```text
-epoch_1 start
-step:100 ppl:771.053
-step:200 ppl:449.597
-step:300 ppl:642.654
-step:400 ppl:458.128
-step:500 ppl:510.912
-step:600 ppl:451.545
-step:700 ppl:364.404
-step:800 ppl:324.272
-step:900 ppl:360.797
-step:1000 ppl:275.761
-step:1100 ppl:294.599
-step:1200 ppl:335.877
-step:1300 ppl:185.262
-step:1400 ppl:241.744
-step:1500 ppl:211.507
-step:1600 ppl:233.431
-step:1700 ppl:298.767
-step:1800 ppl:203.403
-step:1900 ppl:158.828
-step:2000 ppl:171.148
-step:2100 ppl:280.884
-epoch:1 num_steps:2104 time_cost(s):47.478780
-model saved in model/epoch_1
-epoch_2 start
-step:100 ppl:238.099
-step:200 ppl:136.527
-step:300 ppl:204.184
-step:400 ppl:252.886
-step:500 ppl:177.377
-step:600 ppl:197.688
-step:700 ppl:131.650
-step:800 ppl:223.906
-step:900 ppl:144.785
-step:1000 ppl:176.286
-step:1100 ppl:148.158
-step:1200 ppl:203.581
-step:1300 ppl:168.208
-step:1400 ppl:159.412
-step:1500 ppl:114.032
-step:1600 ppl:157.985
-step:1700 ppl:147.743
-step:1800 ppl:88.676
-step:1900 ppl:141.962
-step:2000 ppl:106.087
-step:2100 ppl:122.709
-epoch:2 num_steps:2104 time_cost(s):47.583789
-model saved in model/epoch_2
-...
-```
-## 预测
-运行命令 `python infer.py model_dir start_epoch last_epoch(inclusive)` 开始预测，其中，start_epoch指定开始预测的轮次，last_epoch指定结束的轮次，例如
-```python
-python infer.py model 1 12 # prediction from epoch 1 to epoch 12
-```
-## 预测结果示例
-```text
-model:model/epoch_1 ppl:254.540 time_cost(s):3.29
-model:model/epoch_2 ppl:177.671 time_cost(s):3.27
-model:model/epoch_3 ppl:156.251 time_cost(s):3.27
-model:model/epoch_4 ppl:139.036 time_cost(s):3.27
-model:model/epoch_5 ppl:132.661 time_cost(s):3.27
-model:model/epoch_6 ppl:130.092 time_cost(s):3.28
-model:model/epoch_7 ppl:128.751 time_cost(s):3.27
-model:model/epoch_8 ppl:125.411 time_cost(s):3.27
-model:model/epoch_9 ppl:124.604 time_cost(s):3.28
-model:model/epoch_10 ppl:124.754 time_cost(s):3.29
-model:model/epoch_11 ppl:125.421 time_cost(s):3.27
-model:model/epoch_12 ppl:125.676 time_cost(s):3.27
-```
--- a/PaddleNLP/unarchived/language_model/gru/_ce.py
+++ b/PaddleNLP/unarchived/language_model/gru/_ce.py
-# this file is only used for continuous evaluation test!
-import os
-import sys
-sys.path.append(os.environ['ceroot'])
-from kpi import CostKpi
-from kpi import DurationKpi
-imikolov_20_avg_ppl_kpi = CostKpi('imikolov_20_avg_ppl', 0.2, 0)
-imikolov_20_pass_duration_kpi = DurationKpi(
-    'imikolov_20_pass_duration', 0.02, 0, actived=True)
-imikolov_20_avg_ppl_kpi_card4 = CostKpi('imikolov_20_avg_ppl_card4', 0.2, 0)
-imikolov_20_pass_duration_kpi_card4 = DurationKpi(
-    'imikolov_20_pass_duration_card4', 0.03, 0, actived=True)
-tracking_kpis = [
-    imikolov_20_avg_ppl_kpi,
-    imikolov_20_pass_duration_kpi,
-    imikolov_20_avg_ppl_kpi_card4,
-    imikolov_20_pass_duration_kpi_card4,
-]
-def parse_log(log):
-    '''
-    This method should be implemented by model developers.
-    The suggestion:
-    each line in the log should be key, value, for example:
-    "
-    train_cost\t1.0
-    test_cost\t1.0
-    train_cost\t1.0
-    train_cost\t1.0
-    train_acc\t1.2
-    "
-    '''
-    for line in log.split('\n'):
-        fs = line.strip().split('\t')
-        print(fs)
-        if len(fs) == 3 and fs[0] == 'kpis':
-            kpi_name = fs[1]
-            kpi_value = float(fs[2])
-            yield kpi_name, kpi_value
-def log_to_ce(log):
-    kpi_tracker = {}
-    for kpi in tracking_kpis:
-        kpi_tracker[kpi.name] = kpi
-    for (kpi_name, kpi_value) in parse_log(log):
-        print(kpi_name, kpi_value)
-        kpi_tracker[kpi_name].add_record(kpi_value)
-        kpi_tracker[kpi_name].persist()
-if __name__ == '__main__':
-    log = sys.stdin.read()
-    log_to_ce(log)
--- a/PaddleNLP/unarchived/language_model/gru/infer.py
+++ b/PaddleNLP/unarchived/language_model/gru/infer.py
--- a/PaddleNLP/unarchived/language_model/gru/train.py
+++ b/PaddleNLP/unarchived/language_model/gru/train.py
--- a/PaddleNLP/unarchived/language_model/gru/train_on_cloud.py
+++ b/PaddleNLP/unarchived/language_model/gru/train_on_cloud.py
--- a/PaddleNLP/unarchived/language_model/gru/utils.py
+++ b/PaddleNLP/unarchived/language_model/gru/utils.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/.run_ce.sh
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/.run_ce.sh
--- a/PaddleNLP/unarchived/machine_reading_comprehension/README.md
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/README.md
--- a/PaddleNLP/unarchived/machine_reading_comprehension/_ce.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/_ce.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/args.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/args.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/data/download.sh
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/data/download.sh
--- a/PaddleNLP/unarchived/machine_reading_comprehension/data/md5sum.txt
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/data/md5sum.txt
-0ca0510fa625d35d902b73033c4ba9d8  demo.zip
-dc7658b8cdf4f94b8714d130b7d15196  dureader_raw.zip
-3db9a32e5a7c5375a604a70687b45479  dureader_preprocessed.zip
--- a/PaddleNLP/unarchived/machine_reading_comprehension/dataset.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/dataset.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/preprocess.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/preprocess.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/rc_model.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/rc_model.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/run.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/run.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/run.sh
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/run.sh
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/__init__.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/__init__.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/download_thirdparty.sh
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/download_thirdparty.sh
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/dureader_eval.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/dureader_eval.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/get_vocab.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/get_vocab.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marco_tokenize_data.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marco_tokenize_data.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys
-import json
-import pandas as pd
-if __name__ == '__main__':
-    if len(sys.argv) != 3:
-        print('Usage: tojson.py <input_path> <output_path>')
-        exit()
-    infile = sys.argv[1]
-    outfile = sys.argv[2]
-    df = pd.read_json(infile)
-    with open(outfile, 'w') as f:
-        for row in df.iterrows():
-            f.write(row[1].to_json() + '\n')
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/preprocess.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/preprocess.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/run_marco2dureader_preprocess.sh
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/run_marco2dureader_preprocess.sh
--- a/PaddleNLP/unarchived/machine_reading_comprehension/vocab.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/vocab.py
--- a/PaddleNLP/unarchived/neural_machine_translation/README.md
+++ b/PaddleNLP/unarchived/neural_machine_translation/README.md
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/.run_ce.sh
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/.run_ce.sh
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/README.md
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/README.md
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/__init__.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/__init__.py
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/_ce.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/_ce.py
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/args.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/args.py
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/attention_model.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/attention_model.py
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/base_model.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/base_model.py
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/data/download_en-vi.sh
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/data/download_en-vi.sh
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/infer.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/infer.py
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/infer.sh
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/infer.sh
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/reader.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/reader.py
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/run.sh
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/run.sh
--- a/PaddleNLP/unarchived/neural_machine_translation/rnn_search/train.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/rnn_search/train.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/.gitignore
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/.gitignore
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/.run_ce.sh
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/.run_ce.sh
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/README.md
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/README.md
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/README_cn.md
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/README_cn.md
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/_ce.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/_ce.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/gen_data.sh
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/gen_data.sh
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/images/attention_formula.png
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/images/attention_formula.png
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/images/multi_head_attention.png
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/images/multi_head_attention.png
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/images/transformer_network.png
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/images/transformer_network.png
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/local_dist.sh
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/local_dist.sh
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/model.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/model.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/optim.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/optim.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/profile.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/profile.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/.run_ce.sh
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/.run_ce.sh
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/README.md
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/README.md
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/_ce.py
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/_ce.py
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/data/download.sh
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/data/download.sh
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/data/target.txt
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/data/target.txt
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/data/test
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/data/test
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/data/train
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/data/train
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/imgs/convergence_curve.png
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/imgs/convergence_curve.png
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/infer.py
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/infer.py
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/network_conf.py
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/network_conf.py
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/reader.py
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/reader.py
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/train.py
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/train.py
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/utils.py
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/utils.py
--- a/PaddleNLP/unarchived/sequence_tagging_for_ner/utils_extend.py
+++ b/PaddleNLP/unarchived/sequence_tagging_for_ner/utils_extend.py
--- a/PaddleNLP/unarchived/text_classification/.run_ce.sh
+++ b/PaddleNLP/unarchived/text_classification/.run_ce.sh
--- a/PaddleNLP/unarchived/text_classification/README.md
+++ b/PaddleNLP/unarchived/text_classification/README.md
--- a/PaddleNLP/unarchived/text_classification/_ce.py
+++ b/PaddleNLP/unarchived/text_classification/_ce.py
--- a/PaddleNLP/unarchived/text_classification/async_executor/README.md
+++ b/PaddleNLP/unarchived/text_classification/async_executor/README.md
--- a/PaddleNLP/unarchived/text_classification/async_executor/data_generator.sh
+++ b/PaddleNLP/unarchived/text_classification/async_executor/data_generator.sh
--- a/PaddleNLP/unarchived/text_classification/async_executor/data_generator/IMDB.py
+++ b/PaddleNLP/unarchived/text_classification/async_executor/data_generator/IMDB.py
--- a/PaddleNLP/unarchived/text_classification/async_executor/data_generator/build_raw_data.py
+++ b/PaddleNLP/unarchived/text_classification/async_executor/data_generator/build_raw_data.py
--- a/PaddleNLP/unarchived/text_classification/async_executor/data_generator/data_generator.py
+++ b/PaddleNLP/unarchived/text_classification/async_executor/data_generator/data_generator.py
--- a/PaddleNLP/unarchived/text_classification/async_executor/data_generator/splitfile.py
+++ b/PaddleNLP/unarchived/text_classification/async_executor/data_generator/splitfile.py
--- a/PaddleNLP/unarchived/text_classification/async_executor/data_reader.py
+++ b/PaddleNLP/unarchived/text_classification/async_executor/data_reader.py
--- a/PaddleNLP/unarchived/text_classification/async_executor/infer.py
+++ b/PaddleNLP/unarchived/text_classification/async_executor/infer.py
--- a/PaddleNLP/unarchived/text_classification/async_executor/train.py
+++ b/PaddleNLP/unarchived/text_classification/async_executor/train.py
--- a/PaddleNLP/unarchived/text_classification/clouds/scdb_parallel_executor.py
+++ b/PaddleNLP/unarchived/text_classification/clouds/scdb_parallel_executor.py
--- a/PaddleNLP/unarchived/text_classification/clouds/scdb_single_card.py
+++ b/PaddleNLP/unarchived/text_classification/clouds/scdb_single_card.py
--- a/PaddleNLP/unarchived/text_classification/infer.py
+++ b/PaddleNLP/unarchived/text_classification/infer.py
--- a/PaddleNLP/unarchived/text_classification/nets.py
+++ b/PaddleNLP/unarchived/text_classification/nets.py
--- a/PaddleNLP/unarchived/text_classification/train.py
+++ b/PaddleNLP/unarchived/text_classification/train.py
--- a/PaddleNLP/unarchived/text_classification/utils.py
+++ b/PaddleNLP/unarchived/text_classification/utils.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/__init__.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/__init__.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/.run_ce.sh
+++ b/PaddleNLP/unarchived/text_matching_on_quora/.run_ce.sh
--- a/PaddleNLP/unarchived/text_matching_on_quora/README.md
+++ b/PaddleNLP/unarchived/text_matching_on_quora/README.md
--- a/PaddleNLP/unarchived/chinese_ner/__init__.py
+++ b/PaddleNLP/unarchived/chinese_ner/__init__.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/_ce.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/_ce.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/cdssm_base.log
+++ b/PaddleNLP/unarchived/text_matching_on_quora/cdssm_base.log
--- a/PaddleNLP/unarchived/text_matching_on_quora/configs/__init__.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/configs/__init__.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/configs/basic_config.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/configs/basic_config.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/configs/cdssm.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/configs/cdssm.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/configs/dec_att.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/configs/dec_att.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/configs/infer_sent.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/configs/infer_sent.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/configs/sse.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/configs/sse.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/data/prepare_quora_data.sh
+++ b/PaddleNLP/unarchived/text_matching_on_quora/data/prepare_quora_data.sh
--- a/PaddleNLP/unarchived/text_matching_on_quora/imgs/README.md
+++ b/PaddleNLP/unarchived/text_matching_on_quora/imgs/README.md
--- a/PaddleNLP/unarchived/text_matching_on_quora/imgs/models_test_acc.png
+++ b/PaddleNLP/unarchived/text_matching_on_quora/imgs/models_test_acc.png
--- a/PaddleNLP/unarchived/text_matching_on_quora/metric.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/metric.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/models/__init__.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/models/__init__.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/models/cdssm.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/models/cdssm.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/models/dec_att.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/models/dec_att.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/models/infer_sent.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/models/infer_sent.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/models/match_layers.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/models/match_layers.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/models/my_layers.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/models/my_layers.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/models/pwim.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/models/pwim.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/models/sse.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/models/sse.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/models/test.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/models/test.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/pretrained_word2vec.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/pretrained_word2vec.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/quora_question_pairs.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/quora_question_pairs.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/train_and_evaluate.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/train_and_evaluate.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/utils.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/utils.py
--- a/legacy/PaddleRL/DeepQNetwork/DQN_agent.py
+++ b/legacy/PaddleRL/DeepQNetwork/DQN_agent.py
--- a/legacy/PaddleRL/DeepQNetwork/DoubleDQN_agent.py
+++ b/legacy/PaddleRL/DeepQNetwork/DoubleDQN_agent.py
--- a/legacy/PaddleRL/DeepQNetwork/DuelingDQN_agent.py
+++ b/legacy/PaddleRL/DeepQNetwork/DuelingDQN_agent.py
--- a/legacy/PaddleRL/DeepQNetwork/README.md
+++ b/legacy/PaddleRL/DeepQNetwork/README.md
--- a/legacy/PaddleRL/DeepQNetwork/README_cn.md
+++ b/legacy/PaddleRL/DeepQNetwork/README_cn.md
--- a/legacy/PaddleRL/DeepQNetwork/assets/dqn.png
+++ b/legacy/PaddleRL/DeepQNetwork/assets/dqn.png
--- a/legacy/PaddleRL/DeepQNetwork/atari.py
+++ b/legacy/PaddleRL/DeepQNetwork/atari.py
--- a/legacy/PaddleRL/DeepQNetwork/atari_wrapper.py
+++ b/legacy/PaddleRL/DeepQNetwork/atari_wrapper.py
--- a/legacy/PaddleRL/DeepQNetwork/expreplay.py
+++ b/legacy/PaddleRL/DeepQNetwork/expreplay.py
--- a/legacy/PaddleRL/DeepQNetwork/play.py
+++ b/legacy/PaddleRL/DeepQNetwork/play.py
--- a/legacy/PaddleRL/DeepQNetwork/requirement.txt
+++ b/legacy/PaddleRL/DeepQNetwork/requirement.txt
--- a/legacy/PaddleRL/DeepQNetwork/rom_files/breakout.bin
+++ b/legacy/PaddleRL/DeepQNetwork/rom_files/breakout.bin
--- a/legacy/PaddleRL/DeepQNetwork/rom_files/pong.bin
+++ b/legacy/PaddleRL/DeepQNetwork/rom_files/pong.bin
--- a/legacy/PaddleRL/DeepQNetwork/train.py
+++ b/legacy/PaddleRL/DeepQNetwork/train.py
--- a/legacy/PaddleRL/README.md
+++ b/legacy/PaddleRL/README.md
--- a/legacy/PaddleRL/policy_gradient/README.md
+++ b/legacy/PaddleRL/policy_gradient/README.md
--- a/legacy/PaddleRL/policy_gradient/brain.py
+++ b/legacy/PaddleRL/policy_gradient/brain.py
--- a/legacy/PaddleRL/policy_gradient/env.py
+++ b/legacy/PaddleRL/policy_gradient/env.py
--- a/legacy/PaddleRL/policy_gradient/images/PG_1.svg
+++ b/legacy/PaddleRL/policy_gradient/images/PG_1.svg
--- a/legacy/PaddleRL/policy_gradient/images/PG_2.svg
+++ b/legacy/PaddleRL/policy_gradient/images/PG_2.svg
--- a/legacy/PaddleRL/policy_gradient/images/PG_3.svg
+++ b/legacy/PaddleRL/policy_gradient/images/PG_3.svg
--- a/legacy/PaddleRL/policy_gradient/run.py
+++ b/legacy/PaddleRL/policy_gradient/run.py
--- a/legacy/README.cn.md
+++ b/legacy/README.cn.md
--- a/legacy/README.md
+++ b/legacy/README.md
--- a/legacy/conv_seq2seq/README.md
+++ b/legacy/conv_seq2seq/README.md
--- a/legacy/conv_seq2seq/beamsearch.py
+++ b/legacy/conv_seq2seq/beamsearch.py
--- a/legacy/conv_seq2seq/download.sh
+++ b/legacy/conv_seq2seq/download.sh
--- a/legacy/conv_seq2seq/infer.py
+++ b/legacy/conv_seq2seq/infer.py
--- a/legacy/conv_seq2seq/model.py
+++ b/legacy/conv_seq2seq/model.py
--- a/legacy/conv_seq2seq/preprocess.py
+++ b/legacy/conv_seq2seq/preprocess.py
--- a/legacy/conv_seq2seq/reader.py
+++ b/legacy/conv_seq2seq/reader.py
--- a/legacy/conv_seq2seq/train.py
+++ b/legacy/conv_seq2seq/train.py
--- a/legacy/ctr/README.cn.md
+++ b/legacy/ctr/README.cn.md
--- a/legacy/ctr/README.md
+++ b/legacy/ctr/README.md
--- a/legacy/ctr/avazu_data_processer.py
+++ b/legacy/ctr/avazu_data_processer.py
--- a/legacy/ctr/dataset.md
+++ b/legacy/ctr/dataset.md
--- a/legacy/ctr/images/lr_vs_dnn.jpg
+++ b/legacy/ctr/images/lr_vs_dnn.jpg
--- a/legacy/ctr/images/wide_deep.png
+++ b/legacy/ctr/images/wide_deep.png
--- a/legacy/ctr/infer.py
+++ b/legacy/ctr/infer.py
--- a/legacy/ctr/network_conf.py
+++ b/legacy/ctr/network_conf.py
--- a/legacy/ctr/reader.py
+++ b/legacy/ctr/reader.py
--- a/legacy/ctr/train.py
+++ b/legacy/ctr/train.py
--- a/legacy/ctr/utils.py
+++ b/legacy/ctr/utils.py
--- a/legacy/deep_fm/README.cn.md
+++ b/legacy/deep_fm/README.cn.md
--- a/legacy/deep_fm/README.md
+++ b/legacy/deep_fm/README.md
--- a/legacy/deep_fm/data/download.sh
+++ b/legacy/deep_fm/data/download.sh
--- a/legacy/deep_fm/infer.py
+++ b/legacy/deep_fm/infer.py
--- a/legacy/deep_fm/network_conf.py
+++ b/legacy/deep_fm/network_conf.py
--- a/legacy/deep_fm/preprocess.py
+++ b/legacy/deep_fm/preprocess.py
--- a/legacy/deep_fm/reader.py
+++ b/legacy/deep_fm/reader.py
--- a/legacy/deep_fm/train.py
+++ b/legacy/deep_fm/train.py
--- a/legacy/dssm/README.cn.md
+++ b/legacy/dssm/README.cn.md
--- a/legacy/dssm/README.md
+++ b/legacy/dssm/README.md
--- a/legacy/dssm/data/classification/test.txt
+++ b/legacy/dssm/data/classification/test.txt
--- a/legacy/dssm/data/classification/train.txt
+++ b/legacy/dssm/data/classification/train.txt
--- a/legacy/dssm/data/rank/test.txt
+++ b/legacy/dssm/data/rank/test.txt
--- a/legacy/dssm/data/rank/train.txt
+++ b/legacy/dssm/data/rank/train.txt
--- a/legacy/dssm/data/vocab.txt
+++ b/legacy/dssm/data/vocab.txt
--- a/legacy/dssm/images/dssm.jpg
+++ b/legacy/dssm/images/dssm.jpg
--- a/legacy/dssm/images/dssm.png
+++ b/legacy/dssm/images/dssm.png
--- a/legacy/dssm/images/dssm2.jpg
+++ b/legacy/dssm/images/dssm2.jpg
--- a/legacy/dssm/images/dssm2.png
+++ b/legacy/dssm/images/dssm2.png
--- a/legacy/dssm/images/dssm3.jpg
+++ b/legacy/dssm/images/dssm3.jpg
--- a/legacy/dssm/infer.py
+++ b/legacy/dssm/infer.py
--- a/legacy/dssm/network_conf.py
+++ b/legacy/dssm/network_conf.py
--- a/legacy/dssm/reader.py
+++ b/legacy/dssm/reader.py
--- a/legacy/dssm/train.py
+++ b/legacy/dssm/train.py
--- a/legacy/dssm/utils.py
+++ b/legacy/dssm/utils.py
--- a/legacy/generate_chinese_poetry/README.md
+++ b/legacy/generate_chinese_poetry/README.md
--- a/legacy/generate_chinese_poetry/README_en.md
+++ b/legacy/generate_chinese_poetry/README_en.md
--- a/legacy/generate_chinese_poetry/data/download.sh
+++ b/legacy/generate_chinese_poetry/data/download.sh
--- a/legacy/generate_chinese_poetry/generate.py
+++ b/legacy/generate_chinese_poetry/generate.py
--- a/legacy/generate_chinese_poetry/network_conf.py
+++ b/legacy/generate_chinese_poetry/network_conf.py
--- a/legacy/generate_chinese_poetry/preprocess.py
+++ b/legacy/generate_chinese_poetry/preprocess.py
--- a/legacy/generate_chinese_poetry/reader.py
+++ b/legacy/generate_chinese_poetry/reader.py
--- a/legacy/generate_chinese_poetry/train.py
+++ b/legacy/generate_chinese_poetry/train.py
--- a/legacy/generate_chinese_poetry/utils.py
+++ b/legacy/generate_chinese_poetry/utils.py
--- a/legacy/generate_sequence_by_rnn_lm/.gitignore
+++ b/legacy/generate_sequence_by_rnn_lm/.gitignore
--- a/legacy/generate_sequence_by_rnn_lm/README.md
+++ b/legacy/generate_sequence_by_rnn_lm/README.md
--- a/legacy/generate_sequence_by_rnn_lm/beam_search.py
+++ b/legacy/generate_sequence_by_rnn_lm/beam_search.py
--- a/legacy/generate_sequence_by_rnn_lm/config.py
+++ b/legacy/generate_sequence_by_rnn_lm/config.py
--- a/legacy/generate_sequence_by_rnn_lm/data/train_data_examples.txt
+++ b/legacy/generate_sequence_by_rnn_lm/data/train_data_examples.txt
--- a/legacy/generate_sequence_by_rnn_lm/generate.py
+++ b/legacy/generate_sequence_by_rnn_lm/generate.py
--- a/legacy/generate_sequence_by_rnn_lm/images/ngram.png
+++ b/legacy/generate_sequence_by_rnn_lm/images/ngram.png
--- a/legacy/generate_sequence_by_rnn_lm/images/rnn.png
+++ b/legacy/generate_sequence_by_rnn_lm/images/rnn.png
--- a/legacy/generate_sequence_by_rnn_lm/network_conf.py
+++ b/legacy/generate_sequence_by_rnn_lm/network_conf.py
--- a/legacy/generate_sequence_by_rnn_lm/reader.py
+++ b/legacy/generate_sequence_by_rnn_lm/reader.py
--- a/legacy/generate_sequence_by_rnn_lm/train.py
+++ b/legacy/generate_sequence_by_rnn_lm/train.py
--- a/legacy/generate_sequence_by_rnn_lm/utils.py
+++ b/legacy/generate_sequence_by_rnn_lm/utils.py
--- a/legacy/globally_normalized_reader/.gitignore
+++ b/legacy/globally_normalized_reader/.gitignore
--- a/legacy/globally_normalized_reader/README.cn.md
+++ b/legacy/globally_normalized_reader/README.cn.md
--- a/legacy/globally_normalized_reader/README.md
+++ b/legacy/globally_normalized_reader/README.md
--- a/legacy/globally_normalized_reader/basic_modules.py
+++ b/legacy/globally_normalized_reader/basic_modules.py
--- a/legacy/globally_normalized_reader/beam_decoding.py
+++ b/legacy/globally_normalized_reader/beam_decoding.py
--- a/legacy/globally_normalized_reader/config.py
+++ b/legacy/globally_normalized_reader/config.py
--- a/legacy/globally_normalized_reader/data/download.sh
+++ b/legacy/globally_normalized_reader/data/download.sh
--- a/legacy/globally_normalized_reader/evaluate.py
+++ b/legacy/globally_normalized_reader/evaluate.py
--- a/legacy/globally_normalized_reader/featurize.py
+++ b/legacy/globally_normalized_reader/featurize.py
--- a/legacy/globally_normalized_reader/infer.py
+++ b/legacy/globally_normalized_reader/infer.py
--- a/legacy/globally_normalized_reader/model.py
+++ b/legacy/globally_normalized_reader/model.py
--- a/legacy/globally_normalized_reader/reader.py
+++ b/legacy/globally_normalized_reader/reader.py
--- a/legacy/globally_normalized_reader/train.py
+++ b/legacy/globally_normalized_reader/train.py
--- a/legacy/globally_normalized_reader/vocab.py
+++ b/legacy/globally_normalized_reader/vocab.py
--- a/legacy/hsigmoid/.gitignore
+++ b/legacy/hsigmoid/.gitignore
--- a/legacy/hsigmoid/README.md
+++ b/legacy/hsigmoid/README.md
--- a/legacy/hsigmoid/images/binary_tree.png
+++ b/legacy/hsigmoid/images/binary_tree.png
--- a/legacy/hsigmoid/images/network_conf.png
+++ b/legacy/hsigmoid/images/network_conf.png
--- a/legacy/hsigmoid/images/path_to_1.png
+++ b/legacy/hsigmoid/images/path_to_1.png
--- a/legacy/hsigmoid/infer.py
+++ b/legacy/hsigmoid/infer.py
--- a/legacy/hsigmoid/network_conf.py
+++ b/legacy/hsigmoid/network_conf.py
--- a/legacy/hsigmoid/train.py
+++ b/legacy/hsigmoid/train.py
--- a/legacy/image_classification/README.md
+++ b/legacy/image_classification/README.md
--- a/legacy/image_classification/alexnet.py
+++ b/legacy/image_classification/alexnet.py
--- a/legacy/image_classification/caffe2paddle/README.md
+++ b/legacy/image_classification/caffe2paddle/README.md
--- a/legacy/image_classification/caffe2paddle/caffe2paddle.py
+++ b/legacy/image_classification/caffe2paddle/caffe2paddle.py
--- a/legacy/image_classification/googlenet.py
+++ b/legacy/image_classification/googlenet.py
--- a/legacy/image_classification/inception_resnet_v2.py
+++ b/legacy/image_classification/inception_resnet_v2.py
--- a/legacy/image_classification/inception_v4.py
+++ b/legacy/image_classification/inception_v4.py
--- a/legacy/image_classification/infer.py
+++ b/legacy/image_classification/infer.py
--- a/legacy/image_classification/models/model_download.sh
+++ b/legacy/image_classification/models/model_download.sh
--- a/legacy/image_classification/reader.py
+++ b/legacy/image_classification/reader.py
--- a/legacy/image_classification/resnet.py
+++ b/legacy/image_classification/resnet.py
--- a/legacy/image_classification/tf2paddle/README.md
+++ b/legacy/image_classification/tf2paddle/README.md
--- a/legacy/image_classification/tf2paddle/tf2paddle.py
+++ b/legacy/image_classification/tf2paddle/tf2paddle.py
--- a/legacy/image_classification/train.py
+++ b/legacy/image_classification/train.py
--- a/legacy/image_classification/vgg.py
+++ b/legacy/image_classification/vgg.py
--- a/legacy/image_classification/xception.py
+++ b/legacy/image_classification/xception.py
--- a/legacy/ltr/README.md
+++ b/legacy/ltr/README.md
--- a/legacy/ltr/README_en.md
+++ b/legacy/ltr/README_en.md
--- a/legacy/ltr/images/LambdaRank_EN.png
+++ b/legacy/ltr/images/LambdaRank_EN.png
--- a/legacy/ltr/images/lambdarank.jpg
+++ b/legacy/ltr/images/lambdarank.jpg
--- a/legacy/ltr/images/learning_to_rank.jpg
+++ b/legacy/ltr/images/learning_to_rank.jpg
--- a/legacy/ltr/images/ranknet.jpg
+++ b/legacy/ltr/images/ranknet.jpg
--- a/legacy/ltr/images/ranknet_en.png
+++ b/legacy/ltr/images/ranknet_en.png
--- a/legacy/ltr/images/search_engine_example.png
+++ b/legacy/ltr/images/search_engine_example.png
--- a/legacy/ltr/infer.py
+++ b/legacy/ltr/infer.py
--- a/legacy/ltr/lambda_rank.py
+++ b/legacy/ltr/lambda_rank.py
--- a/legacy/ltr/ranknet.py
+++ b/legacy/ltr/ranknet.py
--- a/legacy/ltr/train.py
+++ b/legacy/ltr/train.py
--- a/legacy/mt_with_external_memory/README.md
+++ b/legacy/mt_with_external_memory/README.md
--- a/legacy/mt_with_external_memory/data_utils.py
+++ b/legacy/mt_with_external_memory/data_utils.py
--- a/legacy/mt_with_external_memory/external_memory.py
+++ b/legacy/mt_with_external_memory/external_memory.py
--- a/legacy/mt_with_external_memory/image/lstm_c_state.png
+++ b/legacy/mt_with_external_memory/image/lstm_c_state.png
--- a/legacy/mt_with_external_memory/image/memory_enhanced_decoder.png
+++ b/legacy/mt_with_external_memory/image/memory_enhanced_decoder.png
--- a/legacy/mt_with_external_memory/image/neural_turing_machine_arch.png
+++ b/legacy/mt_with_external_memory/image/neural_turing_machine_arch.png
--- a/legacy/mt_with_external_memory/image/turing_machine_cartoon.gif
+++ b/legacy/mt_with_external_memory/image/turing_machine_cartoon.gif
--- a/legacy/mt_with_external_memory/infer.py
+++ b/legacy/mt_with_external_memory/infer.py
--- a/legacy/mt_with_external_memory/model.py
+++ b/legacy/mt_with_external_memory/model.py
--- a/legacy/mt_with_external_memory/train.py
+++ b/legacy/mt_with_external_memory/train.py
--- a/legacy/nce_cost/.gitignore
+++ b/legacy/nce_cost/.gitignore
--- a/legacy/nce_cost/README.md
+++ b/legacy/nce_cost/README.md
--- a/legacy/nce_cost/images/network_conf.png
+++ b/legacy/nce_cost/images/network_conf.png
--- a/legacy/nce_cost/infer.py
+++ b/legacy/nce_cost/infer.py
--- a/legacy/nce_cost/network_conf.py
+++ b/legacy/nce_cost/network_conf.py
--- a/legacy/nce_cost/train.py
+++ b/legacy/nce_cost/train.py
--- a/legacy/nested_sequence/README.md
+++ b/legacy/nested_sequence/README.md
--- a/legacy/nested_sequence/README_en.md
+++ b/legacy/nested_sequence/README_en.md
--- a/legacy/nested_sequence/text_classification/.gitignore
+++ b/legacy/nested_sequence/text_classification/.gitignore
--- a/legacy/nested_sequence/text_classification/README.md
+++ b/legacy/nested_sequence/text_classification/README.md
--- a/legacy/nested_sequence/text_classification/README_en.md
+++ b/legacy/nested_sequence/text_classification/README_en.md
--- a/legacy/nested_sequence/text_classification/config.py
+++ b/legacy/nested_sequence/text_classification/config.py
--- a/legacy/nested_sequence/text_classification/data/infer.txt
+++ b/legacy/nested_sequence/text_classification/data/infer.txt
--- a/legacy/nested_sequence/text_classification/data/test_data/test.txt
+++ b/legacy/nested_sequence/text_classification/data/test_data/test.txt
--- a/legacy/nested_sequence/text_classification/data/train_data/train.txt
+++ b/legacy/nested_sequence/text_classification/data/train_data/train.txt
--- a/legacy/nested_sequence/text_classification/images/model.jpg
+++ b/legacy/nested_sequence/text_classification/images/model.jpg
--- a/legacy/nested_sequence/text_classification/infer.py
+++ b/legacy/nested_sequence/text_classification/infer.py
--- a/legacy/nested_sequence/text_classification/network_conf.py
+++ b/legacy/nested_sequence/text_classification/network_conf.py
--- a/legacy/nested_sequence/text_classification/reader.py
+++ b/legacy/nested_sequence/text_classification/reader.py
--- a/legacy/nested_sequence/text_classification/requirements.txt
+++ b/legacy/nested_sequence/text_classification/requirements.txt
--- a/legacy/nested_sequence/text_classification/train.py
+++ b/legacy/nested_sequence/text_classification/train.py
--- a/legacy/nested_sequence/text_classification/utils.py
+++ b/legacy/nested_sequence/text_classification/utils.py
--- a/legacy/neural_qa/.gitignore
+++ b/legacy/neural_qa/.gitignore
--- a/legacy/neural_qa/README.md
+++ b/legacy/neural_qa/README.md
--- a/legacy/neural_qa/config.py
+++ b/legacy/neural_qa/config.py
--- a/legacy/neural_qa/infer.py
+++ b/legacy/neural_qa/infer.py
--- a/legacy/neural_qa/network.py
+++ b/legacy/neural_qa/network.py
--- a/legacy/neural_qa/pre-trained-models/download-models.sh
+++ b/legacy/neural_qa/pre-trained-models/download-models.sh
--- a/legacy/neural_qa/pre-trained-models/neural_seq_qa.pre-trained-models.2017-10-27.tar.gz.md5
+++ b/legacy/neural_qa/pre-trained-models/neural_seq_qa.pre-trained-models.2017-10-27.tar.gz.md5
--- a/legacy/neural_qa/reader.py
+++ b/legacy/neural_qa/reader.py
--- a/legacy/neural_qa/test/test_reader.py
+++ b/legacy/neural_qa/test/test_reader.py
--- a/legacy/neural_qa/test/trn_data.gz
+++ b/legacy/neural_qa/test/trn_data.gz
--- a/legacy/neural_qa/train.py
+++ b/legacy/neural_qa/train.py
--- a/legacy/neural_qa/utils.py
+++ b/legacy/neural_qa/utils.py
--- a/legacy/neural_qa/val_and_test.py
+++ b/legacy/neural_qa/val_and_test.py
--- a/legacy/nmt_without_attention/README.cn.md
+++ b/legacy/nmt_without_attention/README.cn.md
--- a/legacy/nmt_without_attention/README.md
+++ b/legacy/nmt_without_attention/README.md
--- a/legacy/nmt_without_attention/generate.py
+++ b/legacy/nmt_without_attention/generate.py
--- a/legacy/nmt_without_attention/images/bidirectional-encoder.png
+++ b/legacy/nmt_without_attention/images/bidirectional-encoder.png
--- a/legacy/nmt_without_attention/images/encoder-decoder.png
+++ b/legacy/nmt_without_attention/images/encoder-decoder.png
--- a/legacy/nmt_without_attention/images/gru.png
+++ b/legacy/nmt_without_attention/images/gru.png
--- a/legacy/nmt_without_attention/network_conf.py
+++ b/legacy/nmt_without_attention/network_conf.py
--- a/legacy/nmt_without_attention/train.py
+++ b/legacy/nmt_without_attention/train.py
--- a/legacy/scene_text_recognition/README.md
+++ b/legacy/scene_text_recognition/README.md
--- a/legacy/scene_text_recognition/config.py
+++ b/legacy/scene_text_recognition/config.py
--- a/legacy/scene_text_recognition/decoder.py
+++ b/legacy/scene_text_recognition/decoder.py
--- a/legacy/scene_text_recognition/images/503.jpg
+++ b/legacy/scene_text_recognition/images/503.jpg
--- a/legacy/scene_text_recognition/images/504.jpg
+++ b/legacy/scene_text_recognition/images/504.jpg
--- a/legacy/scene_text_recognition/images/505.jpg
+++ b/legacy/scene_text_recognition/images/505.jpg
--- a/legacy/scene_text_recognition/images/ctc.png
+++ b/legacy/scene_text_recognition/images/ctc.png
--- a/legacy/scene_text_recognition/images/feature_vector.png
+++ b/legacy/scene_text_recognition/images/feature_vector.png
--- a/legacy/scene_text_recognition/images/transcription.png
+++ b/legacy/scene_text_recognition/images/transcription.png
--- a/legacy/scene_text_recognition/infer.py
+++ b/legacy/scene_text_recognition/infer.py
--- a/legacy/scene_text_recognition/network_conf.py
+++ b/legacy/scene_text_recognition/network_conf.py
--- a/legacy/scene_text_recognition/reader.py
+++ b/legacy/scene_text_recognition/reader.py
--- a/legacy/scene_text_recognition/requirements.txt
+++ b/legacy/scene_text_recognition/requirements.txt
--- a/legacy/scene_text_recognition/train.py
+++ b/legacy/scene_text_recognition/train.py
--- a/legacy/scene_text_recognition/utils.py
+++ b/legacy/scene_text_recognition/utils.py
--- a/legacy/scheduled_sampling/README.md
+++ b/legacy/scheduled_sampling/README.md
--- a/legacy/scheduled_sampling/README_en.md
+++ b/legacy/scheduled_sampling/README_en.md
--- a/legacy/scheduled_sampling/generate.py
+++ b/legacy/scheduled_sampling/generate.py
--- a/legacy/scheduled_sampling/images/Scheduled_Sampling.jpg
+++ b/legacy/scheduled_sampling/images/Scheduled_Sampling.jpg
--- a/legacy/scheduled_sampling/images/decay.jpg
+++ b/legacy/scheduled_sampling/images/decay.jpg
--- a/legacy/scheduled_sampling/network_conf.py
+++ b/legacy/scheduled_sampling/network_conf.py
--- a/legacy/scheduled_sampling/reader.py
+++ b/legacy/scheduled_sampling/reader.py
--- a/legacy/scheduled_sampling/train.py
+++ b/legacy/scheduled_sampling/train.py
--- a/legacy/scheduled_sampling/utils.py
+++ b/legacy/scheduled_sampling/utils.py
--- a/legacy/sequence_tagging_for_ner/.gitignore
+++ b/legacy/sequence_tagging_for_ner/.gitignore
--- a/legacy/sequence_tagging_for_ner/README.md
+++ b/legacy/sequence_tagging_for_ner/README.md
--- a/legacy/sequence_tagging_for_ner/data/download.sh
+++ b/legacy/sequence_tagging_for_ner/data/download.sh
--- a/legacy/sequence_tagging_for_ner/data/target.txt
+++ b/legacy/sequence_tagging_for_ner/data/target.txt
--- a/legacy/sequence_tagging_for_ner/data/test
+++ b/legacy/sequence_tagging_for_ner/data/test
--- a/legacy/sequence_tagging_for_ner/data/train
+++ b/legacy/sequence_tagging_for_ner/data/train
--- a/legacy/sequence_tagging_for_ner/data/vocab.txt
+++ b/legacy/sequence_tagging_for_ner/data/vocab.txt
--- a/legacy/sequence_tagging_for_ner/images/BIO tag example.png
+++ b/legacy/sequence_tagging_for_ner/images/BIO tag example.png
--- a/legacy/sequence_tagging_for_ner/images/ner_label_ins.png
+++ b/legacy/sequence_tagging_for_ner/images/ner_label_ins.png
--- a/legacy/sequence_tagging_for_ner/images/ner_model_en.png
+++ b/legacy/sequence_tagging_for_ner/images/ner_model_en.png
--- a/legacy/sequence_tagging_for_ner/images/ner_network.png
+++ b/legacy/sequence_tagging_for_ner/images/ner_network.png
--- a/legacy/sequence_tagging_for_ner/infer.py
+++ b/legacy/sequence_tagging_for_ner/infer.py
--- a/legacy/sequence_tagging_for_ner/network_conf.py
+++ b/legacy/sequence_tagging_for_ner/network_conf.py
--- a/legacy/sequence_tagging_for_ner/reader.py
+++ b/legacy/sequence_tagging_for_ner/reader.py
--- a/legacy/sequence_tagging_for_ner/train.py
+++ b/legacy/sequence_tagging_for_ner/train.py
--- a/legacy/sequence_tagging_for_ner/utils.py
+++ b/legacy/sequence_tagging_for_ner/utils.py
--- a/legacy/ssd/README.cn.md
+++ b/legacy/ssd/README.cn.md
--- a/legacy/ssd/README.md
+++ b/legacy/ssd/README.md
--- a/legacy/ssd/config/__init__.py
+++ b/legacy/ssd/config/__init__.py
--- a/legacy/ssd/config/pascal_voc_conf.py
+++ b/legacy/ssd/config/pascal_voc_conf.py
--- a/legacy/ssd/data/label_list
+++ b/legacy/ssd/data/label_list
--- a/legacy/ssd/data/prepare_voc_data.py
+++ b/legacy/ssd/data/prepare_voc_data.py
--- a/legacy/ssd/data_provider.py
+++ b/legacy/ssd/data_provider.py
--- a/legacy/ssd/eval.py
+++ b/legacy/ssd/eval.py
--- a/legacy/ssd/image_util.py
+++ b/legacy/ssd/image_util.py
--- a/legacy/ssd/images/SSD300x300_map.png
+++ b/legacy/ssd/images/SSD300x300_map.png
--- a/legacy/ssd/images/ssd_network.png
+++ b/legacy/ssd/images/ssd_network.png
--- a/legacy/ssd/images/vis_1.jpg
+++ b/legacy/ssd/images/vis_1.jpg
--- a/legacy/ssd/images/vis_2.jpg
+++ b/legacy/ssd/images/vis_2.jpg
--- a/legacy/ssd/images/vis_3.jpg
+++ b/legacy/ssd/images/vis_3.jpg
--- a/legacy/ssd/images/vis_4.jpg
+++ b/legacy/ssd/images/vis_4.jpg
--- a/legacy/ssd/infer.py
+++ b/legacy/ssd/infer.py
--- a/legacy/ssd/train.py
+++ b/legacy/ssd/train.py
--- a/legacy/ssd/vgg_ssd_net.py
+++ b/legacy/ssd/vgg_ssd_net.py
--- a/legacy/ssd/visual.py
+++ b/legacy/ssd/visual.py
--- a/legacy/text_classification/.gitignore
+++ b/legacy/text_classification/.gitignore
--- a/legacy/text_classification/README.md
+++ b/legacy/text_classification/README.md
--- a/legacy/text_classification/images/cnn_net.png
+++ b/legacy/text_classification/images/cnn_net.png
--- a/legacy/text_classification/images/dnn_net.png
+++ b/legacy/text_classification/images/dnn_net.png
--- a/legacy/text_classification/infer.py
+++ b/legacy/text_classification/infer.py
--- a/legacy/text_classification/network_conf.py
+++ b/legacy/text_classification/network_conf.py
--- a/legacy/text_classification/reader.py
+++ b/legacy/text_classification/reader.py
--- a/legacy/text_classification/run.sh
+++ b/legacy/text_classification/run.sh
--- a/legacy/text_classification/train.py
+++ b/legacy/text_classification/train.py
--- a/legacy/text_classification/utils.py
+++ b/legacy/text_classification/utils.py
--- a/legacy/youtube_recall/README.cn.md
+++ b/legacy/youtube_recall/README.cn.md
--- a/legacy/youtube_recall/README.md
+++ b/legacy/youtube_recall/README.md
--- a/legacy/youtube_recall/data/data.tar
+++ b/legacy/youtube_recall/data/data.tar
--- a/legacy/youtube_recall/data_processor.py
+++ b/legacy/youtube_recall/data_processor.py
--- a/legacy/youtube_recall/images/model_network.png
+++ b/legacy/youtube_recall/images/model_network.png
--- a/legacy/youtube_recall/images/recommendation_system.png
+++ b/legacy/youtube_recall/images/recommendation_system.png
--- a/legacy/youtube_recall/infer.py
+++ b/legacy/youtube_recall/infer.py
--- a/legacy/youtube_recall/infer_user.py
+++ b/legacy/youtube_recall/infer_user.py
--- a/legacy/youtube_recall/item_vector.py
+++ b/legacy/youtube_recall/item_vector.py
--- a/legacy/youtube_recall/network_conf.py
+++ b/legacy/youtube_recall/network_conf.py
--- a/legacy/youtube_recall/reader.py
+++ b/legacy/youtube_recall/reader.py
--- a/legacy/youtube_recall/train.py
+++ b/legacy/youtube_recall/train.py
--- a/legacy/youtube_recall/user_vector.py
+++ b/legacy/youtube_recall/user_vector.py
--- a/legacy/youtube_recall/utils.py
+++ b/legacy/youtube_recall/utils.py