Merge pull request #30 from PaddlePaddle/develop

update

Merge pull request #30 from PaddlePaddle/develop
update
19aa404f · zhengya01 · GitHub · 17364eb8 · ad9d219a · 19aa404f
596 changed file
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,3 +13,9 @@
 [submodule "PaddleNLP/knowledge-driven-dialogue"]
 	path = PaddleNLP/knowledge-driven-dialogue
 	url = https://github.com/baidu/knowledge-driven-dialogue
+[submodule "PaddleNLP/language_representations_kit"]
+	path = PaddleNLP/language_representations_kit
+	url = https://github.com/PaddlePaddle/LARK
+[submodule "PaddleNLP/knowledge_driven_dialogue"]
+	path = PaddleNLP/knowledge_driven_dialogue
+	url = https://github.com/baidu/knowledge-driven-dialogue/
--- a/PaddleCV/gan/cycle_gan/README.md
+++ b/PaddleCV/gan/cycle_gan/README.md
-

 运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。如果您的PaddlePaddle安装版本低于此要求，请按照[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)中的说明更新PaddlePaddle安装版本。

@@ -73,8 +72,8 @@ env CUDA_VISIBLE_DEVICES=0 python train.py
 执行以下命令读取多张图片进行预测：

 ```
-env CUDA_VISIBLE_DEVICE=0 python infer.py \
-    --init_model="checkpoints/1" --input="./data/inputA/*" \
+env CUDA_VISIBLE_DEVICES=0 python infer.py \
+    --init_model="output/checkpoints/1" --input="./data/horse2zebra/trainA/*" \
    --input_style A --output="./output"
 ```

@@ -89,3 +88,5 @@ env CUDA_VISIBLE_DEVICE=0 python infer.py \
 <img src="images/B2A.jpg" width="620" hspace='10'/> <br/>
 <strong>图 3</strong>
 </p>
+
+>在本文示例中，均可通过修改`CUDA_VISIBLE_DEVICES`改变使用的显卡号。
--- a/PaddleCV/gan/cycle_gan/infer.py
+++ b/PaddleCV/gan/cycle_gan/infer.py
@@ -2,7 +2,6 @@ import argparse
 import functools
 import os
 from PIL import Image
-from paddle.fluid import core
 import paddle.fluid as fluid
 import paddle
 import numpy as np
@@ -44,7 +43,6 @@ def infer(args):
    if not os.path.exists(args.output):
        os.makedirs(args.output)
    for file in glob.glob(args.input):
-        print "read %s" % file
        image_name = os.path.basename(file)
        image = Image.open(file)
        image = image.resize((256, 256))
@@ -52,7 +50,7 @@ def infer(args):
        if len(image.shape) != 3:
            continue
        data = image.transpose([2, 0, 1])[np.newaxis, :].astype("float32")
-        tensor = core.LoDTensor()
+        tensor = fluid.LoDTensor()
        tensor.set(data, place)

        fake_temp = exe.run(fetch_list=[fake.name], feed={"input": tensor})

--- a/PaddleCV/gan/cycle_gan/train.py
+++ b/PaddleCV/gan/cycle_gan/train.py
@@ -12,7 +12,6 @@ import numpy as np
 from scipy.misc import imsave
 import paddle.fluid as fluid
 import paddle.fluid.profiler as profiler
-from paddle.fluid import core
 import data_reader
 from utility import add_arguments, print_arguments, ImagePool
 from trainer import *
@@ -22,7 +21,7 @@ add_arg = functools.partial(add_arguments, argparser=parser)
 # yapf: disable
 add_arg('batch_size',        int,   1,          "Minibatch size.")
 add_arg('epoch',             int,   2,        "The number of epoched to be trained.")
-add_arg('output',            str,   "./output_0", "The directory the model and the test result to be saved to.")
+add_arg('output',            str,   "./output", "The directory the model and the test result to be saved to.")
 add_arg('init_model',        str,   None,       "The init model file of directory.")
 add_arg('save_checkpoints',  bool,  True,       "Whether to save checkpoints.")
 add_arg('run_test',          bool,  True,       "Whether to run test.")
@@ -82,8 +81,8 @@ def train(args):
        for data_A, data_B in zip(A_test_reader(), B_test_reader()):
            A_name = data_A[1]
            B_name = data_B[1]
-            tensor_A = core.LoDTensor()
-            tensor_B = core.LoDTensor()
+            tensor_A = fluid.LoDTensor()
+            tensor_B = fluid.LoDTensor()
            tensor_A.set(data_A[0], place)
            tensor_B.set(data_B[0], place)
            fake_A_temp, fake_B_temp, cyc_A_temp, cyc_B_temp = exe.run(
@@ -168,8 +167,8 @@ def train(args):
        for i in range(max_images_num):
            data_A = next(A_reader)
            data_B = next(B_reader)
-            tensor_A = core.LoDTensor()
-            tensor_B = core.LoDTensor()
+            tensor_A = fluid.LoDTensor()
+            tensor_B = fluid.LoDTensor()
            tensor_A.set(data_A, place)
            tensor_B.set(data_B, place)
            s_time = time.time()

--- a/PaddleCV/human_pose_estimation/README.md
+++ b/PaddleCV/human_pose_estimation/README.md
@@ -9,13 +9,13 @@ This is a simple demonstration of re-implementation in [PaddlePaddle.Fluid](http

 ## Requirements

-  - Python == 2.7
-  - PaddlePaddle >= 1.1.0
+  - Python == 2.7 or 3.6
+  - PaddlePaddle >= 1.1.0 (<= 1.3.0)
  - opencv-python >= 3.3

 ## Environment

-The code is developed and tested under 4 Tesla K40/P40 GPUS cards on CentOS with installed CUDA-9.2/8.0 and cuDNN-7.1.
+The code is developed and tested under 4 Tesla K40/P40 GPUS cards on CentOS with installed CUDA-9.0/8.0 and cuDNN-7.0.

 ## Results on MPII Val
 | Arch | Head | Shoulder | Elbow | Wrist | Hip | Knee | Ankle | Mean | Mean@0.1| Models |
@@ -85,19 +85,21 @@ python2 setup.py install --user
 Downloading the checkpoints of Pose-ResNet-50 trained on MPII dataset from [here](https://paddlemodels.bj.bcebos.com/pose/pose-resnet50-mpii-384x384.tar.gz). Extract it into the folder `checkpoints` under the directory root of this repo. Then run

 ```bash
-python val.py --dataset 'mpii' --checkpoint 'checkpoints/pose-resnet50-mpii-384x384'
+python val.py --dataset 'mpii' --checkpoint 'checkpoints/pose-resnet50-mpii-384x384' --data_root 'data/mpii'
 ```

 ### Perform Training

 ```bash
-python train.py --dataset 'mpii' # or coco
+python train.py --dataset 'mpii' --data_root 'data/mpii'
 ```

 **Note**: Configurations for training are aggregated in the `lib/mpii_reader.py` and `lib/coco_reader.py`.

 ### Perform Test on Images

+We also support to apply pre-trained models on customized images.
+
 Put the images into the folder `test` under the directory root of this repo. Then run

 ```bash

--- a/PaddleCV/human_pose_estimation/README_cn.md
+++ b/PaddleCV/human_pose_estimation/README_cn.md
@@ -9,10 +9,10 @@

 ## 环境依赖

-本目录下的代码均在4卡Tesla K40/P40 GPU，CentOS系统，CUDA-9.2/8.0，cuDNN-7.1环境下测试运行无误
+本目录下的代码均在4卡Tesla K40/P40 GPU，CentOS系统，CUDA-9.0/8.0，cuDNN-7.0环境下测试运行无误

-  - Python == 2.7
-  - PaddlePaddle >= 1.1.0
+  - Python == 2.7 / 3.6
+  - PaddlePaddle >= 1.1.0 (<= 1.3.0)
  - opencv-python >= 3.3

 ## MPII Val结果
@@ -83,19 +83,21 @@ python2 setup.py install --user
 下载COCO/MPII预训练模型（见上表最后一列所附链接），保存到根目录下的'checkpoints'文件夹中，运行：

 ```bash
-python val.py --dataset 'mpii' --checkpoint 'checkpoints/pose-resnet50-mpii-384x384'
+python val.py --dataset 'mpii' --checkpoint 'checkpoints/pose-resnet50-mpii-384x384' --data_root 'data/mpii'
 ```

 ### 模型训练

 ```bash
-python train.py --dataset 'mpii' # or coco
+python train.py --dataset 'mpii'
 ```

 **说明** 详细参数配置已保存到`lib/mpii_reader.py` 和 `lib/coco_reader.py`文件中，通过设置dataset来选择使用具体的参数配置

 ### 模型测试（任意图片，使用上述COCO或MPII预训练好的模型）

+同时，我们支持使用预训练好的关键点检测模型预测任意图片
+
 将测试图片放入根目录下的'test'文件夹中，执行

 ```bash
@@ -104,4 +106,4 @@ python test.py --checkpoint 'checkpoints/pose-resnet-50-384x384-mpii'

 ## 引用

- Simple Baselines for Human Pose Estimation and Tracking in PyTorch [`code`](https://github.com/Microsoft/human-pose-estimation.pytorch#data-preparation)
+- Simple Baselines for Human Pose Estimation and Tracking in PyTorch [`code`](https://github.com/Microsoft/human-pose-estimation.pytorch#data-preparation)
\ No newline at end of file
--- a/PaddleCV/human_pose_estimation/test.py
+++ b/PaddleCV/human_pose_estimation/test.py
@@ -15,7 +15,7 @@

 """Functions for inference."""

-import os
+import sys
 import argparse
 import functools
 import paddle
@@ -34,13 +34,18 @@ add_arg('batch_size',       int,   32,                  "Minibatch size.")
 add_arg('dataset',          str,   'mpii',              "Dataset")
 add_arg('use_gpu',          bool,  True,                "Whether to use GPU or not.")
 add_arg('kp_dim',           int,   16,                  "Class number.")
-add_arg('model_save_dir',   str,   "output",            "Model save directory")
 add_arg('with_mem_opt',     bool,  True,               "Whether to use memory optimization or not.")
 add_arg('checkpoint',       str,   None,                "Whether to resume checkpoint.")
 add_arg('flip_test',        bool,  True,                "Flip test")
 add_arg('shift_heatmap',    bool,  True,                "Shift heatmap")
 # yapf: enable

+
+def print_immediately(s):
+    print(s)
+    sys.stdout.flush()
+
+
 def test(args):
    import lib.mpii_reader as reader
    if args.dataset == 'coco':
@@ -89,6 +94,7 @@ def test(args):
    fetch_list = [image.name, output.name]

    for batch_id, data in enumerate(test_reader()):
+        print_immediately("Processing batch #%d" % batch_id)
        num_images = len(data)

        file_ids = []
@@ -124,6 +130,7 @@ def test(args):
            out_heatmaps = (out_heatmaps + output_flipped) * 0.5
            save_predict_results(input_image, out_heatmaps, file_ids, fold_name='results')

+
 if __name__ == '__main__':
    args = parser.parse_args()
    test(args)
--- a/PaddleCV/human_pose_estimation/train.py
+++ b/PaddleCV/human_pose_estimation/train.py
@@ -16,6 +16,7 @@
 """Functions for training."""

 import os
+import sys
 import numpy as np
 import cv2
 import paddle
@@ -75,6 +76,12 @@ def optimizer_setting(args, params):

    return optimizer

+
+def print_immediately(s):
+    print(s)
+    sys.stdout.flush()
+
+
 def train(args):
    if args.dataset == 'coco':
        import lib.coco_reader as reader
@@ -152,7 +159,7 @@ def train(args):

            loss = np.mean(np.array(loss))

-            print('Epoch [{:4d}/{:3d}] LR: {:.10f} '
+            print_immediately('Epoch [{:4d}/{:3d}] LR: {:.10f} '
                  'Loss = {:.5f}'.format(
                  batch_id, pass_id, current_lr[0], loss))


--- a/PaddleCV/human_pose_estimation/utils/coco_evaluator.py
+++ b/PaddleCV/human_pose_estimation/utils/coco_evaluator.py
@@ -24,9 +24,9 @@ from collections import OrderedDict
 import pickle

 from utils.base_evaluator import BaseEvaluator
+from utils.nms_utils import oks_nms
 from pycocotools.coco import COCO
 from pycocotools.cocoeval import COCOeval
-from nms.nms import oks_nms


 class COCOEvaluator(BaseEvaluator):

--- a/PaddleCV/human_pose_estimation/utils/nms_utils.py
+++ b/PaddleCV/human_pose_estimation/utils/nms_utils.py
+# Copyright (c) 2019-present, Baidu, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+##############################################################################
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+
+def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
+    if not isinstance(sigmas, np.ndarray):
+        sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
+    vars = (sigmas * 2) ** 2
+    xg = g[0::3]
+    yg = g[1::3]
+    vg = g[2::3]
+    ious = np.zeros((d.shape[0]))
+    for n_d in range(0, d.shape[0]):
+        xd = d[n_d, 0::3]
+        yd = d[n_d, 1::3]
+        vd = d[n_d, 2::3]
+        dx = xd - xg
+        dy = yd - yg
+        e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
+        if in_vis_thre is not None:
+            ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
+            e = e[ind]
+        ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
+    return ious
+
+
+def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
+    """
+    greedily select boxes with high confidence and overlap with current maximum <= thresh
+    rule out overlap >= thresh, overlap = oks
+    :param kpts_db
+    :param thresh: retain overlap < thresh
+    :return: indexes to keep
+    """
+    if len(kpts_db) == 0:
+        return []
+
+    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
+    kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
+    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
+
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
+
+        inds = np.where(oks_ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
\ No newline at end of file
--- a/PaddleCV/human_pose_estimation/utils/utility.py
+++ b/PaddleCV/human_pose_estimation/utils/utility.py
@@ -41,7 +41,7 @@ def print_arguments(args):
    :type args: argparse.Namespace
    """
    print("-----------  Configuration Arguments -----------")
-    for arg, value in sorted(vars(args).iteritems()):
+    for arg, value in sorted(vars(args).items()):
        print("%s: %s" % (arg, value))
    print("------------------------------------------------")


--- a/PaddleCV/human_pose_estimation/val.py
+++ b/PaddleCV/human_pose_estimation/val.py
@@ -16,6 +16,7 @@
 """Functions for validation."""

 import os
+import sys
 import argparse
 import functools
 import paddle
@@ -37,7 +38,6 @@ add_arg('use_gpu',          bool,  True,                "Whether to use GPU or n
 add_arg('num_epochs',       int,   140,                 "Number of epochs.")
 add_arg('total_images',     int,   144406,              "Training image number.")
 add_arg('kp_dim',           int,   16,                  "Class number.")
-add_arg('model_save_dir',   str,   "output",            "Model save directory")
 add_arg('with_mem_opt',     bool,  True,                "Whether to use memory optimization or not.")
 add_arg('pretrained_model', str,   None,                "Whether to use pretrained model.")
 add_arg('checkpoint',       str,   None,                "Whether to resume checkpoint.")
@@ -49,6 +49,12 @@ add_arg('post_process',     bool,  True,                "Post process")
 add_arg('data_root',        str,   "data/coco",         "Root directory of dataset")
 # yapf: enable

+
+def print_immediately(s):
+    print(s)
+    sys.stdout.flush()
+
+
 def valid(args):
    if args.dataset == 'coco':
        import lib.coco_reader as reader
@@ -208,7 +214,7 @@ def valid(args):

        idx += num_images

-        print('Epoch [{:4d}] '
+        print_immediately('Epoch [{:4d}] '
              'Loss = {:.5f} '
              'Acc = {:.5f}'.format(batch_id, loss, acc.avg))


--- a/PaddleCV/icnet/README.md
+++ b/PaddleCV/icnet/README.md
-运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。如果您的PaddlePaddle安装版本低于此要求，请按照[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)中的说明更新PaddlePaddle安装版本。
-

 ## 代码结构
 ```
@@ -68,7 +66,7 @@ Iter[0]; train loss: 2.338; sub4_loss: 3.367; sub24_loss: 4.120; sub124_loss: 0.
 ### 测试
 执行以下命令在`Cityscape`测试数据集上进行测试：
 ```
-python eval.py --model_path="./model/" --use_gpu=True
+python eval.py --model_path="./cnkpnt/100" --use_gpu=True
 ```
 需要通过选项`--model_path`指定模型文件。
 测试脚本的输出的评估指标为[mean IoU]()。
@@ -77,7 +75,7 @@ python eval.py --model_path="./model/" --use_gpu=True
 执行以下命令对指定的数据进行预测：
 ```
 python infer.py \
--model_path="./model" \
+--model_path="./cnkpnt/100" \
 --images_path="./data/cityscape/" \
 --images_list="./data/cityscape/infer.list"
 ```

--- a/PaddleCV/icnet/infer.py
+++ b/PaddleCV/icnet/infer.py
@@ -115,13 +115,14 @@ def infer(args):
            image_file, is_color=True).astype("float32")
        image -= IMG_MEAN
        img = paddle.dataset.image.to_chw(image)[np.newaxis, :]
-        image_t = fluid.core.LoDTensor()
+        image_t = fluid.LoDTensor()
        image_t.set(img, place)
        result = exe.run(inference_program,
                         feed={"image": image_t},
                         fetch_list=[predict])
        cv2.imwrite(args.out_path + "/" + filename + "_result.png",
                    color(result[0]))
+    print("Saved images into: %s" % args.out_path)


 def main():

--- a/PaddleCV/icnet/train.py
+++ b/PaddleCV/icnet/train.py
@@ -96,8 +96,11 @@ def train(args):

    if args.init_model is not None:
        print("load model from: %s" % args.init_model)
-        sys.stdout.flush()
-        fluid.io.load_params(exe, args.init_model)
+
+        def if_exist(var):
+            return os.path.exists(os.path.join(args.init_model, var.name))
+
+        fluid.io.load_vars(exe, args.init_model, predicate=if_exist)

    iter_id = 0
    t_loss = 0.

--- a/PaddleCV/icnet/utils.py
+++ b/PaddleCV/icnet/utils.py
@@ -18,7 +18,6 @@ from __future__ import division
 from __future__ import print_function
 import distutils.util
 import numpy as np
-from paddle.fluid import core
 import six


@@ -72,7 +71,7 @@ def to_lodtensor(data, place):
        lod.append(cur_len)
    flattened_data = np.concatenate(data, axis=0).astype("int32")
    flattened_data = flattened_data.reshape([len(flattened_data), 1])
-    res = core.LoDTensor()
+    res = fluid.LoDTensor()
    res.set(flattened_data, place)
    res.set_lod([lod])
    return res
@@ -80,17 +79,17 @@ def to_lodtensor(data, place):

 def get_feeder_data(data, place, for_test=False):
    feed_dict = {}
-    image_t = core.LoDTensor()
+    image_t = fluid.LoDTensor()
    image_t.set(data[0], place)
    feed_dict["image"] = image_t

    if not for_test:
-        labels_sub1_t = core.LoDTensor()
-        labels_sub2_t = core.LoDTensor()
-        labels_sub4_t = core.LoDTensor()
-        mask_sub1_t = core.LoDTensor()
-        mask_sub2_t = core.LoDTensor()
-        mask_sub4_t = core.LoDTensor()
+        labels_sub1_t = fluid.LoDTensor()
+        labels_sub2_t = fluid.LoDTensor()
+        labels_sub4_t = fluid.LoDTensor()
+        mask_sub1_t = fluid.LoDTensor()
+        mask_sub2_t = fluid.LoDTensor()
+        mask_sub4_t = fluid.LoDTensor()

        labels_sub1_t.set(data[1], place)
        labels_sub2_t.set(data[3], place)
@@ -105,8 +104,8 @@ def get_feeder_data(data, place, for_test=False):
        feed_dict["label_sub4"] = labels_sub4_t
        feed_dict["mask_sub4"] = mask_sub4_t
    else:
-        label_t = core.LoDTensor()
-        mask_t = core.LoDTensor()
+        label_t = fluid.LoDTensor()
+        mask_t = fluid.LoDTensor()
        label_t.set(data[1], place)
        mask_t.set(data[2], place)
        feed_dict["label"] = label_t

--- a/PaddleCV/image_classification/README_ngraph.md
+++ b/PaddleCV/image_classification/README_ngraph.md
@@ -16,7 +16,7 @@ Only support Adam optimizer yet.
 Short description of aforementioned steps:

 ## 1. Install PaddlePaddle
-Follow PaddlePaddle [installation instruction](https://github.com/PaddlePaddle/models/tree/develop/fluid/PaddleCV/image_classification#installation) to install PaddlePaddle. If you [build from source](https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/beginners_guide/install/compile/compile_Ubuntu_en.md), please use the following cmake arguments and ensure to set `-DWITH_NGRAPH=ON`. 
+Follow PaddlePaddle [installation instruction](https://github.com/PaddlePaddle/models/tree/develop/fluid/PaddleCV/image_classification#installation) to install PaddlePaddle. If you [build from source](https://github.com/PaddlePaddle/FluidDoc/blob/develop/doc/fluid/beginners_guide/install/compile/compile_Ubuntu_en.md), please use the following cmake arguments and ensure to set `-DWITH_NGRAPH=ON`.
 ```
 cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=ON -DWITH_MKLDNN=ON  -DWITH_NGRAPH=ON
 ```
@@ -35,9 +35,8 @@ export KMP_AFFINITY=granularity=fine,compact,1,0
 ```

 ## 3. How the benchmark script might be run.
-If everything built successfully, you can run command in ResNet50 nGraph session in script [run.sh](https://github.com/PaddlePaddle/models/blob/develop/fluid/PaddleCV/image_classification/run.sh) to start the benchmark job locally. You will need to uncomment the `#ResNet50 nGraph` part of script.
+If everything built successfully, you can run command in ResNet50 nGraph session in script [run.sh](https://github.com/PaddlePaddle/models/blob/develop/PaddleCV/image_classification/run.sh) to start the benchmark job locally. You will need to uncomment the `#ResNet50 nGraph` part of script.

 Above is training job using the nGraph, to run the inference job using the nGraph:

 Please download the pre-trained resnet50 model from [supported models](https://github.com/PaddlePaddle/models/tree/72dcc7c1a8d5de9d19fbd65b4143bd0d661eee2c/fluid/PaddleCV/image_classification#supported-models-and-performances) for inference script.
-
--- a/PaddleCV/image_classification/dist_train/README.md
+++ b/PaddleCV/image_classification/dist_train/README.md
@@ -108,3 +108,24 @@ The second figure shows speed-ups when using multiple GPUs according to the abov
 Speed-ups of Multiple-GPU Training of Resnet50 on Imagenet
 </p>

+## Deep Gradient Compression([arXiv:1712.01887](https://arxiv.org/abs/1712.01887)) for resnet
+
+#### Environment
+
+  - GPU: NVIDIA® Tesla® V100 
+  - Machine number * Card number: 4 * 4
+  - System: Centos 6u3
+  - Cuda/Cudnn: 9.0/7.1
+  - Dataset: ImageNet
+  - Date: 2017.04
+  - PaddleVersion: 1.4
+  - Batch size: 32
+
+#### Performance
+
+<p align="center">
+<img src="../images/resnet_dgc.png" width=528> <br />
+Performance using DGC for resnet-fp32 under different bandwidth
+</p>
+
+
--- a/PaddleCV/image_classification/dist_train/dist_train.py
+++ b/PaddleCV/image_classification/dist_train/dist_train.py
@@ -68,6 +68,9 @@ def parse_args():
    add_arg('reduce_strategy',    str,  "allreduce",        "Choose from reduce or allreduce.")
    add_arg('skip_unbalanced_data', bool, False,            "Skip data not if data not balanced on nodes.")
    add_arg('enable_sequential_execution', bool, False,            "Skip data not if data not balanced on nodes.")
+    #for dgc
+    add_arg('enable_dgc', bool, False,            "Skip data not if data not balanced on nodes.")
+    add_arg('rampup_begin_step', int, 5008,            "Skip data not if data not balanced on nodes.")
    # yapf: enable
    args = parser.parse_args()
    return args
@@ -157,6 +160,17 @@ def build_program(is_train, main_prog, startup_prog, args):
                            boundaries=bd, values=lr),
                        warmup_steps, start_lr, end_lr),
                    momentum=0.9)
+
+                if args.enable_dgc:
+                    optimizer = fluid.optimizer.DGCMomentumOptimizer(
+                        learning_rate=utils.learning_rate.lr_warmup(
+                            fluid.layers.piecewise_decay(
+                                boundaries=bd, values=lr),
+                            warmup_steps, start_lr, end_lr),
+                        momentum=0.9,
+                        sparsity=[0.999, 0.999],
+                        rampup_begin_step=args.rampup_begin_step)
+
                if args.fp16:
                    params_grads = optimizer.backward(avg_cost)
                    master_params_grads = utils.create_master_params_grads(
@@ -224,7 +238,7 @@ def train_parallel(args):
    if args.update_method == "pserver":
        train_prog, startup_prog = pserver_prepare(args, train_prog, startup_prog)
    elif args.update_method == "nccl2":
-        nccl2_prepare(args, startup_prog)
+        nccl2_prepare(args, startup_prog, main_prog=train_prog)

    if args.dist_env["training_role"] == "PSERVER":
        run_pserver(train_prog, startup_prog)
@@ -247,11 +261,16 @@ def train_parallel(args):

    strategy = fluid.ExecutionStrategy()
    strategy.num_threads = args.num_threads
+    # num_iteration_per_drop_scope indicates how
+    #  many iterations to clean up the temp variables which
+    #  is generated during execution. It may make the execution faster,
+    #  because the temp variable's shape maybe the same between two iterations
+    strategy.num_iteration_per_drop_scope = 30
+
    build_strategy = fluid.BuildStrategy()
    build_strategy.enable_inplace = False
    build_strategy.memory_optimize = False
    build_strategy.enable_sequential_execution = bool(args.enable_sequential_execution)
-
    
    if args.reduce_strategy == "reduce":
        build_strategy.reduce_strategy = fluid.BuildStrategy(

--- a/PaddleCV/image_classification/dist_train/dist_utils.py
+++ b/PaddleCV/image_classification/dist_train/dist_utils.py
@@ -2,7 +2,7 @@ import os
 import paddle.fluid as fluid


-def nccl2_prepare(args, startup_prog):
+def nccl2_prepare(args, startup_prog, main_prog):
    config = fluid.DistributeTranspilerConfig()
    config.mode = "nccl2"
    t = fluid.DistributeTranspiler(config=config)
@@ -12,7 +12,8 @@ def nccl2_prepare(args, startup_prog):
    t.transpile(envs["trainer_id"],
        trainers=','.join(envs["trainer_endpoints"]),
        current_endpoint=envs["current_endpoint"],
-        startup_program=startup_prog)
+        startup_program=startup_prog,
+        program=main_prog)


 def pserver_prepare(args, train_prog, startup_prog):

--- a/PaddleCV/image_classification/dist_train/run_nccl2_mode.sh
+++ b/PaddleCV/image_classification/dist_train/run_nccl2_mode.sh
 #!/bin/bash
+set -e
+
+enable_dgc=False
+
+while true ; do
+  case "$1" in
+    -enable_dgc) enable_dgc="$2" ; shift 2 ;;
+    *)
+       if [[ ${#1} > 0 ]]; then
+          echo "not supported arugments ${1}" ; exit 1 ;
+       else
+           break
+       fi
+       ;;
+  esac
+done
+
+case "${enable_dgc}" in
+    True) ;;
+    False) ;;
+    *) echo "not support argument -enable_dgc: ${dgc}" ; exit 1 ;;
+esac

 export MODEL="DistResNet"
 export PADDLE_TRAINER_ENDPOINTS="127.0.0.1:7160,127.0.0.1:7161"
@@ -9,16 +31,20 @@ mkdir -p logs

 # NOTE: set NCCL_P2P_DISABLE so that can run nccl2 distribute train on one node.

+# You can set vlog to see more details' log.
+# export GLOG_v=1
+# export GLOG_logtostderr=1
+
 PADDLE_TRAINING_ROLE="TRAINER" \
 PADDLE_CURRENT_ENDPOINT="127.0.0.1:7160" \
 PADDLE_TRAINER_ID="0" \
 CUDA_VISIBLE_DEVICES="0" \
 NCCL_P2P_DISABLE="1" \
-python dist_train.py --model $MODEL --update_method nccl2 --batch_size 32 &> logs/tr0.log &
+python -u dist_train.py --enable_dgc ${enable_dgc} --model $MODEL --update_method nccl2 --batch_size 32   &> logs/tr0.log &

 PADDLE_TRAINING_ROLE="TRAINER" \
 PADDLE_CURRENT_ENDPOINT="127.0.0.1:7161" \
 PADDLE_TRAINER_ID="1" \
 CUDA_VISIBLE_DEVICES="1" \
 NCCL_P2P_DISABLE="1" \
-python dist_train.py --model $MODEL --update_method nccl2 --batch_size 32 &> logs/tr1.log &
+python -u dist_train.py --enable_dgc ${enable_dgc} --model $MODEL --update_method nccl2 --batch_size 32  &> logs/tr1.log &
--- a/PaddleCV/image_classification/fast_imagenet/train.py
+++ b/PaddleCV/image_classification/fast_imagenet/train.py
@@ -335,7 +335,7 @@ def print_paddle_envs():
    print("DEVICE_NUM: %d" % DEVICE_NUM)
    for k in os.environ:
        if "PADDLE_" in k:
-            print "ENV %s:%s" % (k, os.environ[k])
+            print("ENV %s:%s" % (k, os.environ[k]))
    print('------------------------------------------------')



--- a/PaddleCV/image_classification/images/resnet_dgc.png
+++ b/PaddleCV/image_classification/images/resnet_dgc.png
--- a/PaddleCV/image_classification/models/fast_imagenet.py
+++ b/PaddleCV/image_classification/models/fast_imagenet.py
@@ -22,8 +22,6 @@ import time
 import os
 import math

-import cProfile, pstats, StringIO
-
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
@@ -135,7 +133,7 @@ def lr_decay(lrs, epochs, bs, total_image):
            step += 1
        ratio = (lrs[idx][1] - lrs[idx][0])*1.0 / (epoch[1] - epoch[0])
        lr_base = lrs[idx][0]
-        for s in xrange(epoch[0], epoch[1]):
+        for s in range(epoch[0], epoch[1]):
            if boundaries:
                boundaries.append(boundaries[-1] + step + 1)
            else:

--- a/PaddleCV/image_classification/train.py
+++ b/PaddleCV/image_classification/train.py
@@ -323,6 +323,7 @@ def train(args):
    train_py_reader.decorate_paddle_reader(train_reader)
    test_py_reader.decorate_paddle_reader(test_reader)

+    # use_ngraph is for CPU only, please refer to README_ngraph.md for details
    use_ngraph = os.getenv('FLAGS_use_ngraph')
    if not use_ngraph:
        train_exe = fluid.ParallelExecutor(

--- a/PaddleCV/image_classification/utils/learning_rate.py
+++ b/PaddleCV/image_classification/utils/learning_rate.py
@@ -38,7 +38,7 @@ def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
        shape=[1], dtype='float32', value=float(5), force_cpu=True)

    with init_on_cpu():
-	epoch = ops.floor(global_step / step_each_epoch)
+        epoch = ops.floor(global_step / step_each_epoch)
        with control_flow.Switch() as switch:
            with switch.case(epoch < warmup_epoch):
                decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch))

--- a/PaddleCV/metric_learning/README.md
+++ b/PaddleCV/metric_learning/README.md
@@ -13,7 +13,7 @@ Metric learning is a kind of methods to learn discriminative features for each s

 ## Installation

-Running sample code in this directory requires PaddelPaddle Fluid v0.14.0 and later. If the PaddlePaddle on your device is lower than this version, please follow the instructions in [installation document](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html) and make an update.
+Running sample code in this directory requires PaddelPaddle Fluid v0.14.0 and later. If the PaddlePaddle on your device is lower than this version, please follow the instructions in [installation document](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html) and make an update.

 ## Data preparation

@@ -25,7 +25,7 @@ sh download_sop.sh

 ## Training metric learning models

-To train a metric learning model, one need to set the neural network as backbone and the metric loss function to optimize. We train meiric learning model using softmax or arcmargin loss firstly, and then fine-turned the model using other metric learning loss, such as triplet, quadruplet and eml loss. One example of training using arcmargin loss is shown below:
+To train a metric learning model, one need to set the neural network as backbone and the metric loss function to optimize. You can download [ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.zip) pretrained on imagenet dataset as backbone. We train meiric learning model using softmax or arcmargin loss firstly, and then fine-turned the model using other metric learning loss, such as triplet, quadruplet and eml loss. One example of training using arcmargin loss is shown below:


 ```

--- a/PaddleCV/metric_learning/README_cn.md
+++ b/PaddleCV/metric_learning/README_cn.md
 # 深度度量学习
-度量学习是一种为样本对学习具有区分性特征的方法，目的是在特征空间中，让同一个类别的样本具有较小的特征距离，不同类的样本具有较大的特征距离。随着深度学习技术的发展，基于深度神经网络的度量学习方法已经在许多视觉任务上提升了很大的性能，例如：人脸识别、人脸校验、行人重识别和图像检索等等。在本章节，介绍在PaddlePaddle Fluid里实现的几种度量学习方法和使用方法，具体包括[数据准备](#数据准备)，[模型训练](#模型训练)，[模型微调](#模型微调)，[模型评估](#模型评估)，[模型预测](#模型预测)。
+度量学习是一种对样本对学习区分性特征的方法，目的是在特征空间中，让同一个类别的样本具有较小的特征距离，不同类的样本具有较大的特征距离。随着深度学习技术的发展，基于深度神经网络的度量学习方法已经在许多视觉任务上提升了很大的性能，例如：人脸识别、人脸校验、行人重识别和图像检索等等。在本章节，介绍在PaddlePaddle Fluid里实现的几种度量学习方法和使用方法，具体包括[数据准备](#数据准备)，[模型训练](#模型训练)，[模型微调](#模型微调)，[模型评估](#模型评估)，[模型预测](#模型预测)。

 ---
 ## 简介
@@ -13,7 +13,7 @@

 ## 安装

-运行本章节代码需要在PaddlePaddle Fluid v0.14.0 或更高的版本环境。如果你的设备上的PaddlePaddle版本低于v0.14.0，请按照此[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)进行安装和跟新。
+运行本章节代码需要在PaddlePaddle Fluid v0.14.0 或更高的版本环境。如果你的设备上的PaddlePaddle版本低于v0.14.0，请按照此[安装文档](http://paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html)进行安装和跟新。

 ## 数据准备

@@ -25,7 +25,7 @@ sh download_sop.sh

 ## 模型训练 

-为了训练度量学习模型，我们需要一个神经网络模型作为骨架模型（如ResNet50）和度量学习代价函数来进行优化。我们首先使用 softmax 或者 arcmargin 来进行训练，然后使用其它的代价函数来进行微调，例如：triplet，quadruplet和eml。下面是一个使用arcmargin训练的例子：
+为了训练度量学习模型，我们需要一个神经网络模型作为骨架模型（如[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.zip)）和度量学习代价函数来进行优化。我们首先使用 softmax 或者 arcmargin 来进行训练，然后使用其它的代价函数来进行微调，例如：triplet，quadruplet和eml。下面是一个使用arcmargin训练的例子：


 ```

--- a/PaddleCV/metric_learning/infer.py
+++ b/PaddleCV/metric_learning/infer.py
@@ -70,7 +70,7 @@ def infer(args):
    for batch_id, data in enumerate(infer_reader()):
        result = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data))
        result = result[0][0].reshape(-1)
-        print("Test-{0}-feature: {1}".format(batch_id, result))
+        print("Test-{0}-feature: {1}".format(batch_id, result[:5]))
        sys.stdout.flush()



--- a/PaddleCV/object_detection/train.py
+++ b/PaddleCV/object_detection/train.py
@@ -179,8 +179,12 @@ def train(args,
                           predicate=if_exist)

    if parallel:
+        loss.persistable = True
+        build_strategy = fluid.BuildStrategy()
+        build_strategy.enable_inplace = True
+        build_strategy.memory_optimize = True
        train_exe = fluid.ParallelExecutor(main_program=train_prog,
-            use_cuda=use_gpu, loss_name=loss.name)
+            use_cuda=use_gpu, loss_name=loss.name, build_strategy=build_strategy)
    train_reader = reader.train(data_args,
                                train_file_list,
                                batch_size_per_device,

--- a/PaddleCV/ocr_recognition/README.md
+++ b/PaddleCV/ocr_recognition/README.md
-

 运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。如果您的PaddlePaddle安装版本低于此要求，请按照[安装文档](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html)中的说明更新PaddlePaddle安装版本。

@@ -156,12 +155,13 @@ env CUDA_VISIBLE_DEVICES=0,1,2,3 python train.py --parallel=True
 通过以下命令调用评估脚本用指定数据集对模型进行评估：

 ```
-env CUDA_VISIBLE_DEVICE=0 python eval.py \
+env CUDA_VISIBLE_DEVICES=0 python eval.py \
    --model_path="./models/model_0" \
    --input_images_dir="./eval_data/images/" \
-    --input_images_list="./eval_data/eval_list\" \
+    --input_images_list="./eval_data/eval_list"
 ```

+
 执行`python train.py --help`可查看参数详细说明。


@@ -170,7 +170,7 @@ env CUDA_VISIBLE_DEVICE=0 python eval.py \
 从标准输入读取一张图片的路径，并对齐进行预测：

 ```
-env CUDA_VISIBLE_DEVICE=0 python infer.py \
+env CUDA_VISIBLE_DEVICES=0 python infer.py \
    --model_path="models/model_00044_15000"
 ```

@@ -193,7 +193,7 @@ result: [2067 2067 8187 8477 5027 7191 2431 1462]
 从文件中批量读取图片路径，并对其进行预测：

 ```
-env CUDA_VISIBLE_DEVICE=0 python infer.py \
+env CUDA_VISIBLE_DEVICES=0 python infer.py \
    --model_path="models/model_00044_15000" \
    --input_images_list="data/test.list"
 ```
@@ -204,3 +204,5 @@ env CUDA_VISIBLE_DEVICE=0 python infer.py \
 |- |:-: |
 |[ocr_ctc_params](https://paddle-ocr-models.bj.bcebos.com/ocr_ctc.zip) | 22.3% |
 |[ocr_attention_params](https://paddle-ocr-models.bj.bcebos.com/ocr_attention.zip) | 15.8%|
+
+>在本文示例中，均可通过修改`CUDA_VISIBLE_DEVICES`改变当前任务使用的显卡号。
--- a/PaddleCV/ocr_recognition/attention_model.py
+++ b/PaddleCV/ocr_recognition/attention_model.py
@@ -339,7 +339,7 @@ def attention_infer(images, num_classes, use_cudnn=True):
    return ids


-def attention_eval(data_shape, num_classes):
+def attention_eval(data_shape, num_classes, use_cudnn=True):
    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    label_in = fluid.layers.data(
        name='label_in', shape=[1], dtype='int32', lod_level=1)
@@ -349,7 +349,7 @@ def attention_eval(data_shape, num_classes):
    label_in = fluid.layers.cast(x=label_in, dtype='int64')

    gru_backward, encoded_vector, encoded_proj = encoder_net(
-        images, is_test=True)
+        images, is_test=True, use_cudnn=use_cudnn)

    backward_first = fluid.layers.sequence_pool(
        input=gru_backward, pool_type='first')

--- a/PaddleCV/ocr_recognition/crnn_ctc_model.py
+++ b/PaddleCV/ocr_recognition/crnn_ctc_model.py
@@ -213,12 +213,12 @@ def ctc_train_net(args, data_shape, num_classes):
    return sum_cost, error_evaluator, inference_program, model_average


-def ctc_infer(images, num_classes, use_cudnn):
+def ctc_infer(images, num_classes, use_cudnn=True):
    fc_out = encoder_net(images, num_classes, is_test=True, use_cudnn=use_cudnn)
    return fluid.layers.ctc_greedy_decoder(input=fc_out, blank=num_classes)


-def ctc_eval(data_shape, num_classes, use_cudnn):
+def ctc_eval(data_shape, num_classes, use_cudnn=True):
    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    label = fluid.layers.data(
        name='label', shape=[1], dtype='int32', lod_level=1)

--- a/PaddleCV/ocr_recognition/data_reader.py
+++ b/PaddleCV/ocr_recognition/data_reader.py
@@ -10,6 +10,11 @@ from os import path
 from paddle.dataset.image import load_image
 import paddle

+try:
+    input = raw_input
+except NameError:
+    pass
+
 SOS = 0
 EOS = 1
 NUM_CLASSES = 95
@@ -175,7 +180,7 @@ class DataGenerator(object):
                        yield img, label
            else:
                while True:
-                    img_path = raw_input("Please input the path of image: ")
+                    img_path = input("Please input the path of image: ")
                    img = Image.open(img_path).convert('L')
                    img = np.array(img) - 127.5
                    img = img[np.newaxis, ...]

--- a/PaddleCV/ocr_recognition/eval.py
+++ b/PaddleCV/ocr_recognition/eval.py
@@ -31,7 +31,8 @@ def evaluate(args):
    num_classes = data_reader.num_classes()
    data_shape = data_reader.data_shape()
    # define network
-    evaluator, cost = eval(data_shape, num_classes)
+    evaluator, cost = eval(
+        data_shape, num_classes, use_cudnn=True if args.use_gpu else False)

    # data reader
    test_reader = data_reader.test(
@@ -62,8 +63,8 @@ def evaluate(args):
        count += 1
        exe.run(fluid.default_main_program(), feed=get_feeder_data(data, place))
    avg_distance, avg_seq_error = evaluator.eval(exe)
-    print("Read %d samples; avg_distance: %s; avg_seq_error: %s" % (
-        count, avg_distance, avg_seq_error))
+    print("Read %d samples; avg_distance: %s; avg_seq_error: %s" %
+          (count, avg_distance, avg_seq_error))


 def main():

--- a/PaddleCV/ocr_recognition/infer.py
+++ b/PaddleCV/ocr_recognition/infer.py
@@ -31,7 +31,7 @@ def inference(args):
    """OCR inference"""
    if args.model == "crnn_ctc":
        infer = ctc_infer
-        get_feeder_data = get_ctc_feeder_data
+        get_feeder_data = get_ctc_feeder_for_infer
    else:
        infer = attention_infer
        get_feeder_data = get_attention_feeder_for_infer
@@ -78,7 +78,7 @@ def inference(args):
    batch_times = []
    iters = 0
    for data in infer_reader():
-        feed_dict = get_feeder_data(data, place, need_label=False)
+        feed_dict = get_feeder_data(data, place)
        if args.iterations > 0 and iters == args.iterations + args.skip_batch_num:
            break
        if iters < args.skip_batch_num:

--- a/PaddleCV/ocr_recognition/utility.py
+++ b/PaddleCV/ocr_recognition/utility.py
@@ -18,7 +18,6 @@ from __future__ import division
 from __future__ import print_function
 import distutils.util
 import numpy as np
-from paddle.fluid import core
 import paddle.fluid as fluid
 import six

@@ -73,17 +72,18 @@ def to_lodtensor(data, place):
        lod.append(cur_len)
    flattened_data = np.concatenate(data, axis=0).astype("int32")
    flattened_data = flattened_data.reshape([len(flattened_data), 1])
-    res = core.LoDTensor()
+    res = fluid.LoDTensor()
    res.set(flattened_data, place)
    res.set_lod([lod])
    return res


 def get_ctc_feeder_data(data, place, need_label=True):
-    pixel_tensor = core.LoDTensor()
+    pixel_tensor = fluid.LoDTensor()
    pixel_data = None
    pixel_data = np.concatenate(
-        list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
+        list(map(lambda x: x[0][np.newaxis, :], data)),
+        axis=0).astype("float32")
    pixel_tensor.set(pixel_data, place)
    label_tensor = to_lodtensor(list(map(lambda x: x[1], data)), place)
    if need_label:
@@ -92,11 +92,16 @@ def get_ctc_feeder_data(data, place, need_label=True):
        return {"pixel": pixel_tensor}


+def get_ctc_feeder_for_infer(data, place):
+    return get_ctc_feeder_data(data, place, need_label=False)
+
+
 def get_attention_feeder_data(data, place, need_label=True):
-    pixel_tensor = core.LoDTensor()
+    pixel_tensor = fluid.LoDTensor()
    pixel_data = None
    pixel_data = np.concatenate(
-        list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
+        list(map(lambda x: x[0][np.newaxis, :], data)),
+        axis=0).astype("float32")
    pixel_tensor.set(pixel_data, place)
    label_in_tensor = to_lodtensor(list(map(lambda x: x[1], data)), place)
    label_out_tensor = to_lodtensor(list(map(lambda x: x[2], data)), place)
@@ -124,10 +129,11 @@ def get_attention_feeder_for_infer(data, place):
    init_scores = fluid.create_lod_tensor(init_scores_data,
                                          init_recursive_seq_lens, place)

-    pixel_tensor = core.LoDTensor()
+    pixel_tensor = fluid.LoDTensor()
    pixel_data = None
    pixel_data = np.concatenate(
-        list(map(lambda x: x[0][np.newaxis, :], data)), axis=0).astype("float32")
+        list(map(lambda x: x[0][np.newaxis, :], data)),
+        axis=0).astype("float32")
    pixel_tensor.set(pixel_data, place)
    return {
        "pixel": pixel_tensor,

--- a/PaddleCV/rcnn/.run_ce.sh
+++ b/PaddleCV/rcnn/.run_ce.sh
@@ -7,11 +7,11 @@ export OMP_NUM_THREADS=1
 cudaid=${face_detection:=0} # use 0-th card as default
 export CUDA_VISIBLE_DEVICES=$cudaid

-FLAGS_benchmark=true  python train.py --model_save_dir=output/ --data_dir=dataset/coco/ --max_iter=100 --enable_ce --pretrained_model=./imagenet_resnet50_fusebn | python _ce.py
+FLAGS_benchmark=true  python train.py --model_save_dir=output/ --data_dir=dataset/coco/ --max_iter=500 --enable_ce --pretrained_model=./imagenet_resnet50_fusebn --learning_rate=0.00125 | python _ce.py


 cudaid=${face_detection_m:=0,1,2,3} # use 0,1,2,3 card as default
 export CUDA_VISIBLE_DEVICES=$cudaid

-FLAGS_benchmark=true  python train.py --model_save_dir=output/ --data_dir=dataset/coco/ --max_iter=100 --enable_ce --pretrained_model=./imagenet_resnet50_fusebn | python _ce.py
+FLAGS_benchmark=true  python train.py --model_save_dir=output/ --data_dir=dataset/coco/ --max_iter=500 --enable_ce --pretrained_model=./imagenet_resnet50_fusebn --learning_rate=0.005 | python _ce.py

--- a/PaddleCV/rcnn/models/model_builder.py
+++ b/PaddleCV/rcnn/models/model_builder.py
@@ -338,7 +338,6 @@ class RCNN(object):
                                        self.roi_has_mask_int32)
            self.mask_fcn_logits = self.SuffixNet(conv5)
        else:
-            self.eval_bbox()
            pred_res_shape = fluid.layers.shape(self.pred_result)
            shape = fluid.layers.reduce_prod(pred_res_shape)
            shape = fluid.layers.reshape(shape, [1, 1])

--- a/PaddleCV/video/README.md
+++ b/PaddleCV/video/README.md
@@ -10,17 +10,19 @@
 | [Attention LSTM](./models/attention_lstm/README.md)  | 视频分类| 常用模型，速度快精度高 |
 | [NeXtVLAD](./models/nextvlad/README.md)  | 视频分类| 2nd-Youtube-8M最优单模型 |
 | [StNet](./models/stnet/README.md)  | 视频分类| AAAI'19提出的视频联合时空建模方法 |
+| [TSM](./models/tsm/README.md) | 视频分类| 基于时序移位的简单高效视频时空建模方法 |
 | [TSN](./models/tsn/README.md) | 视频分类| ECCV'16提出的基于2D-CNN经典解决方案 |
+| [Non-local](./models/nonlocal_model/README.md) | 视频分类| 视频非局部关联建模模型 |

 ### 主要特点

- 包含视频分类方向的多个主流领先模型，其中Attention LSTM，Attention Cluster和NeXtVLAD是比较流行的特征序列模型，TSN和StNet是两个End-to-End的视频分类模型。Attention LSTM模型速度快精度高，NeXtVLAD是2nd-Youtube-8M比赛中最好的单模型, TSN是基于2D-CNN的经典解决方案。Attention Cluster和StNet是百度自研模型，分别发表于CVPR2018和AAAI2019，是Kinetics600比赛第一名中使用到的模型。
+- 包含视频分类方向的多个主流领先模型，其中Attention LSTM，Attention Cluster和NeXtVLAD是比较流行的特征序列模型，Non-local, TSN, TSM和StNet是End-to-End的视频分类模型。Attention LSTM模型速度快精度高，NeXtVLAD是2nd-Youtube-8M比赛中最好的单模型, TSN是基于2D-CNN的经典解决方案，TSM是基于时序移位的简单高效视频时空建模方法，Non-local模型提出了视频非局部关联建模方法。Attention Cluster和StNet是百度自研模型，分别发表于CVPR2018和AAAI2019，是Kinetics600比赛第一名中使用到的模型。

 - 提供了适合视频分类任务的通用骨架代码，用户可一键式高效配置模型完成训练和评测。

 ## 安装

-在当前模型库运行样例代码需要PadddlePaddle Fluid v.1.2.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本，请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html)中的说明来更新PaddlePaddle。
+在当前模型库运行样例代码需要PadddlePaddle Fluid v.1.4.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本，请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/1.4/beginners_guide/install/index_cn.html)中的说明来更新PaddlePaddle。

 ## 数据准备

@@ -36,18 +38,18 @@

 ``` bash
 export CUDA_VISIBLE_DEVICES=0
-python train.py --model-name=STNET
+python train.py --model_name=STNET
        --config=./configs/stnet.txt
-        --save-dir=checkpoints
+        --save_dir=checkpoints
 ```

 多卡训练：

 ``` bash
 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-python train.py --model-name=STNET
+python train.py --model_name=STNET
        --config=./configs/stnet.txt
-        --save-dir=checkpoints
+        --save_dir=checkpoints
 ```

 视频模型库同时提供了快速训练脚本，脚本位于`scripts/train`目录下，可通过如下命令启动训练:
@@ -113,9 +115,10 @@ infer.py

 | 模型 | Batch Size | 环境配置 | cuDNN版本 | Top-1 | 下载链接 |
 | :-------: | :---: | :---------: | :----: | :----: | :----------: |
-| StNet | 128 | 8卡P40 | 5.1 | 0.69 | [model](https://paddlemodels.bj.bcebos.com/video_classification/stnet_kinetics.tar.gz) |
+| StNet | 128 | 8卡P40 | 7.1 | 0.69 | [model](https://paddlemodels.bj.bcebos.com/video_classification/stnet_kinetics.tar.gz) |
 | TSN | 256 | 8卡P40 | 7.1 | 0.67 | [model](https://paddlemodels.bj.bcebos.com/video_classification/tsn_kinetics.tar.gz) |
-
+| TSM | 128 | 8卡P40 | 7.1 | 0.70 | [model](https://paddlemodels.bj.bcebos.com/video_classification/tsm_kinetics.tar.gz) |
+| Non-local | 64 | 8卡P40 | 7.1 | 0.74 | [model](https://paddlemodels.bj.bcebos.com/video_classification/nonlocal_kinetics.tar.gz) |
 ## 参考文献

 - [Attention Clusters: Purely Attention Based Local Feature Integration for Video Classification](https://arxiv.org/abs/1711.09550), Xiang Long, Chuang Gan, Gerard de Melo, Jiajun Wu, Xiao Liu, Shilei Wen
@@ -123,8 +126,9 @@ infer.py
 - [NeXtVLAD: An Efficient Neural Network to Aggregate Frame-level Features for Large-scale Video Classification](https://arxiv.org/abs/1811.05014), Rongcheng Lin, Jing Xiao, Jianping Fan
 - [StNet:Local and Global Spatial-Temporal Modeling for Human Action Recognition](https://arxiv.org/abs/1811.01549), Dongliang He, Zhichao Zhou, Chuang Gan, Fu Li, Xiao Liu, Yandong Li, Limin Wang, Shilei Wen
 - [Temporal Segment Networks: Towards Good Practices for Deep Action Recognition](https://arxiv.org/abs/1608.00859), Limin Wang, Yuanjun Xiong, Zhe Wang, Yu Qiao, Dahua Lin, Xiaoou Tang, Luc Van Gool
-
+- [Temporal Shift Module for Efficient Video Understanding](https://arxiv.org/abs/1811.08383v1), Ji Lin, Chuang Gan, Song Han
+- [Non-local Neural Networks](https://arxiv.org/abs/1711.07971v1), Xiaolong Wang, Ross Girshick, Abhinav Gupta, Kaiming He
 ## 版本更新

 - 3/2019: 新增模型库，发布Attention Cluster，Attention LSTM，NeXtVLAD，StNet，TSN五个视频分类模型。
-
+- 4/2019: 发布Non-local, TSM两个视频分类模型。
--- a/PaddleCV/video/config.py
+++ b/PaddleCV/video/config.py
@@ -19,12 +19,15 @@ except:

 from utils import AttrDict

+import logging
+logger = logging.getLogger(__name__)
+
 CONFIG_SECS = [
-        'train',
-        'valid',
-        'test',
-        'infer',
-        ]
+    'train',
+    'valid',
+    'test',
+    'infer',
+]


 def parse_config(cfg_file):
@@ -43,6 +46,7 @@ def parse_config(cfg_file):

    return cfg

+
 def merge_configs(cfg, sec, args_dict):
    assert sec in CONFIG_SECS, "invalid config section {}".format(sec)
    sec_dict = getattr(cfg, sec.upper())
@@ -56,3 +60,11 @@ def merge_configs(cfg, sec, args_dict):
            pass
    return cfg

+
+def print_configs(cfg, mode):
+    logger.info("---------------- {:>5} Arguments ----------------".format(mode))
+    for sec, sec_items in cfg.items():
+        logger.info("{}:".format(sec))
+        for k, v in sec_items.items():
+            logger.info("    {}:{}".format(k, v))
+    logger.info("-------------------------------------------------")
--- a/PaddleCV/video/configs/attention_cluster.txt
+++ b/PaddleCV/video/configs/attention_cluster.txt
--- a/PaddleCV/video/configs/attention_lstm.txt
+++ b/PaddleCV/video/configs/attention_lstm.txt
--- a/PaddleCV/video/configs/nextvlad.txt
+++ b/PaddleCV/video/configs/nextvlad.txt
--- a/PaddleCV/video/configs/nonlocal.txt
+++ b/PaddleCV/video/configs/nonlocal.txt
+[MODEL]
+name = "NONLOCAL"
+num_classes = 400
+image_mean = 114.75
+image_std = 57.375
+depth = 50
+dataset = 'kinetics400'
+video_arc_choice = 1
+use_affine = False
+fc_init_std = 0.01
+bn_momentum = 0.9
+bn_epsilon = 1.0e-5
+bn_init_gamma = 0.
+
+[RESNETS]
+num_groups = 1
+width_per_group = 64
+trans_func =  bottleneck_transformation_3d
+
+[NONLOCAL]
+bn_momentum = 0.9
+bn_epsilon = 1.0e-5
+bn_init_gamma = 0.0
+layer_mod = 2
+conv3_nonlocal = True
+conv4_nonlocal = True
+conv_init_std = 0.01
+no_bias = 0
+use_maxpool = True
+use_softmax = True
+use_scale = True
+use_zero_init_conv = False
+use_bn = True
+use_affine = False
+
+[TRAIN]
+num_reader_threads = 8
+batch_size = 64
+num_gpus = 8
+filelist = './dataset/nonlocal/trainlist.txt'
+crop_size = 224
+sample_rate = 8
+video_length = 8
+jitter_scales = [256, 320]
+
+dropout_rate = 0.5
+
+learning_rate = 0.01
+learning_rate_decay = 0.1
+step_sizes = [150000, 150000, 100000]
+max_iter = 400000
+
+weight_decay = 0.0001
+weight_decay_bn = 0.0
+momentum = 0.9
+nesterov = True
+scale_momentum = True
+
+[VALID]
+num_reader_threads = 8
+batch_size = 64
+filelist = './dataset/nonlocal/vallist.txt'
+crop_size = 224
+sample_rate = 8
+video_length = 8
+jitter_scales = [256, 320]
+
+[TEST]
+num_reader_threads = 8
+batch_size = 4
+filelist = 'dataset/nonlocal/testlist.txt'
+filename_gt = 'dataset/nonlocal/vallist.txt'
+checkpoint_dir = './output'
+crop_size = 256
+sample_rate = 8
+video_length = 8
+jitter_scales = [256, 256]
+num_test_clips = 30
+dataset_size = 19761
+use_multi_crop = 1
+
+[INFER]
+num_reader_threads = 8
+batch_size = 1
+filelist = 'dataset/nonlocal/inferlist.txt'
+crop_size = 256
+sample_rate = 8
+video_length = 8
+jitter_scales = [256, 256]
+num_test_clips = 30
+use_multi_crop = 1
+
--- a/PaddleCV/video/configs/stnet.txt
+++ b/PaddleCV/video/configs/stnet.txt
@@ -34,14 +34,16 @@ batch_size = 128
 filelist = "./dataset/kinetics/val.list"

 [TEST]
+seg_num = 25
 short_size = 256
 target_size = 256
 num_reader_threads = 12
 buf_size = 1024
-batch_size = 16
+batch_size = 4
 filelist = "./dataset/kinetics/test.list"

 [INFER]
+seg_num = 25
 short_size = 256
 target_size = 256
 num_reader_threads = 12

--- a/PaddleCV/video/configs/tsm.txt
+++ b/PaddleCV/video/configs/tsm.txt
+[MODEL]
+name = "TSM"
+format = "pkl"
+num_classes = 400
+seg_num = 8
+seglen = 1
+image_mean = [0.485, 0.456, 0.406]
+image_std = [0.229, 0.224, 0.225]
+num_layers = 50
+
+[TRAIN]
+epoch = 65
+short_size = 256
+target_size = 224
+num_reader_threads = 12
+buf_size = 1024
+batch_size = 128
+use_gpu = True
+num_gpus = 8
+filelist = "./dataset/kinetics/train.list"
+learning_rate = 0.01
+learning_rate_decay = 0.1
+decay_epochs = [40, 60]
+l2_weight_decay = 1e-4
+momentum = 0.9
+total_videos = 239781 
+
+[VALID]
+short_size = 256
+target_size = 224
+num_reader_threads = 12
+buf_size = 1024
+batch_size = 128
+filelist = "./dataset/kinetics/val.list"
+
+[TEST]
+short_size = 256
+target_size = 224
+num_reader_threads = 12
+buf_size = 1024
+batch_size = 16
+filelist = "./dataset/kinetics/test.list"
+
+[INFER]
+short_size = 256
+target_size = 224
+num_reader_threads = 12
+buf_size = 1024
+batch_size = 1
+filelist = "./dataset/kinetics/infer.list"
+
--- a/PaddleCV/video/configs/tsn.txt
+++ b/PaddleCV/video/configs/tsn.txt
@@ -33,11 +33,12 @@ batch_size = 256
 filelist = "./dataset/kinetics/val.list"

 [TEST]
+seg_num = 7
 short_size = 256
 target_size = 224
 num_reader_threads = 12
 buf_size = 1024
-batch_size = 32
+batch_size = 16
 filelist = "./dataset/kinetics/test.list"

 [INFER]

--- a/PaddleCV/video/datareader/__init__.py
+++ b/PaddleCV/video/datareader/__init__.py
@@ -3,10 +3,11 @@ from .feature_reader import FeatureReader
 from .kinetics_reader import KineticsReader
 from .nonlocal_reader import NonlocalReader

+# regist reader, sort by alphabet
 regist_reader("ATTENTIONCLUSTER", FeatureReader)
-regist_reader("NEXTVLAD", FeatureReader)
 regist_reader("ATTENTIONLSTM", FeatureReader)
-regist_reader("TSN", KineticsReader)
+regist_reader("NEXTVLAD", FeatureReader)
+regist_reader("NONLOCAL", NonlocalReader)
 regist_reader("TSM", KineticsReader)
+regist_reader("TSN", KineticsReader)
 regist_reader("STNET", KineticsReader)
-regist_reader("NONLOCAL", NonlocalReader)
--- a/PaddleCV/video/datareader/kinetics_reader.py
+++ b/PaddleCV/video/datareader/kinetics_reader.py
@@ -54,16 +54,17 @@ class KineticsReader(DataReader):
    """

    def __init__(self, name, mode, cfg):
-        self.name = name
-        self.mode = mode
+        super(KineticsReader, self).__init__(name, mode, cfg)
        self.format = cfg.MODEL.format
-        self.num_classes = cfg.MODEL.num_classes
-        self.seg_num = cfg.MODEL.seg_num
-        self.seglen = cfg.MODEL.seglen
-        self.short_size = cfg[mode.upper()]['short_size']
-        self.target_size = cfg[mode.upper()]['target_size']
-        self.num_reader_threads = cfg[mode.upper()]['num_reader_threads']
-        self.buf_size = cfg[mode.upper()]['buf_size']
+        self.num_classes = self.get_config_from_sec('model', 'num_classes')
+        self.seg_num = self.get_config_from_sec('model', 'seg_num')
+        self.seglen = self.get_config_from_sec('model', 'seglen')
+
+        self.seg_num = self.get_config_from_sec(mode, 'seg_num', self.seg_num)
+        self.short_size = self.get_config_from_sec(mode, 'short_size')
+        self.target_size = self.get_config_from_sec(mode, 'target_size')
+        self.num_reader_threads = self.get_config_from_sec(mode, 'num_reader_threads')
+        self.buf_size = self.get_config_from_sec(mode, 'buf_size')

        self.img_mean = np.array(cfg.MODEL.image_mean).reshape(
            [3, 1, 1]).astype(np.float32)
@@ -74,7 +75,7 @@ class KineticsReader(DataReader):
        self.filelist = cfg[mode.upper()]['filelist']

    def create_reader(self):
-        _reader = _reader_creator(self.filelist, self.mode, seg_num=self.seg_num, seglen = self.seglen, \
+        _reader = self._reader_creator(self.filelist, self.mode, seg_num=self.seg_num, seglen = self.seglen, \
                             short_size = self.short_size, target_size = self.target_size, \
                             img_mean = self.img_mean, img_std = self.img_std, \
                             shuffle = (self.mode == 'train'), \
@@ -94,117 +95,183 @@ class KineticsReader(DataReader):
        return _batch_reader


-def _reader_creator(pickle_list,
-                    mode,
-                    seg_num,
-                    seglen,
-                    short_size,
-                    target_size,
-                    img_mean,
-                    img_std,
-                    shuffle=False,
-                    num_threads=1,
-                    buf_size=1024,
-                    format='pkl'):
-    def reader():
-        with open(pickle_list) as flist:
-            lines = [line.strip() for line in flist]
-            if shuffle:
-                random.shuffle(lines)
-            for line in lines:
-                pickle_path = line.strip()
-                yield [pickle_path]
-
-    if format == 'pkl':
-        decode_func = decode_pickle
-    elif format == 'mp4':
-        decode_func = decode_mp4
-    else:
-        raise "Not implemented format {}".format(format)
-
-    mapper = functools.partial(
-        decode_func,
-        mode=mode,
-        seg_num=seg_num,
-        seglen=seglen,
-        short_size=short_size,
-        target_size=target_size,
-        img_mean=img_mean,
-        img_std=img_std)
-
-    return paddle.reader.xmap_readers(mapper, reader, num_threads, buf_size)
-
-
-def decode_mp4(sample, mode, seg_num, seglen, short_size, target_size, img_mean,
-               img_std):
-    sample = sample[0].split(' ')
-    mp4_path = sample[0]
-    # when infer, we store vid as label
-    label = int(sample[1])
-    try:
-        imgs = mp4_loader(mp4_path, seg_num, seglen, mode)
-        if len(imgs) < 1:
-            logger.error('{} frame length {} less than 1.'.format(mp4_path,
-                                                                  len(imgs)))
-            return None, None
-    except:
-        logger.error('Error when loading {}'.format(mp4_path))
-        return None, None
-
-    return imgs_transform(imgs, label, mode, seg_num, seglen, \
-                 short_size, target_size, img_mean, img_std)
-
-
-def decode_pickle(sample, mode, seg_num, seglen, short_size, target_size,
-                  img_mean, img_std):
-    pickle_path = sample[0]
-    try:
-        if python_ver < (3, 0):
-            data_loaded = pickle.load(open(pickle_path, 'rb'))
+    def _reader_creator(self,
+                        pickle_list,
+                        mode,
+                        seg_num,
+                        seglen,
+                        short_size,
+                        target_size,
+                        img_mean,
+                        img_std,
+                        shuffle=False,
+                        num_threads=1,
+                        buf_size=1024,
+                        format='pkl'):
+        def decode_mp4(sample, mode, seg_num, seglen, short_size, target_size, img_mean,
+                       img_std):
+            sample = sample[0].split(' ')
+            mp4_path = sample[0]
+            # when infer, we store vid as label
+            label = int(sample[1])
+            try:
+                imgs = mp4_loader(mp4_path, seg_num, seglen, mode)
+                if len(imgs) < 1:
+                    logger.error('{} frame length {} less than 1.'.format(mp4_path,
+                                                                          len(imgs)))
+                    return None, None
+            except:
+                logger.error('Error when loading {}'.format(mp4_path))
+                return None, None
+
+            return imgs_transform(imgs, label, mode, seg_num, seglen, \
+                         short_size, target_size, img_mean, img_std)
+
+
+        def decode_pickle(sample, mode, seg_num, seglen, short_size, target_size,
+                          img_mean, img_std):
+            pickle_path = sample[0]
+            try:
+                if python_ver < (3, 0):
+                    data_loaded = pickle.load(open(pickle_path, 'rb'))
+                else:
+                    data_loaded = pickle.load(open(pickle_path, 'rb'), encoding='bytes')
+
+                vid, label, frames = data_loaded
+                if len(frames) < 1:
+                    logger.error('{} frame length {} less than 1.'.format(pickle_path,
+                                                                          len(frames)))
+                    return None, None
+            except:
+                logger.info('Error when loading {}'.format(pickle_path))
+                return None, None
+
+            if mode == 'train' or mode == 'valid' or mode == 'test':
+                ret_label = label
+            elif mode == 'infer':
+                ret_label = vid
+
+            imgs = video_loader(frames, seg_num, seglen, mode)
+            return imgs_transform(imgs, ret_label, mode, seg_num, seglen, \
+                         short_size, target_size, img_mean, img_std)
+
+
+        def imgs_transform(imgs, label, mode, seg_num, seglen, short_size, target_size,
+                           img_mean, img_std):
+            imgs = group_scale(imgs, short_size)
+
+            if mode == 'train':
+                if self.name == "TSM":
+                    imgs = group_multi_scale_crop(imgs, short_size)
+                imgs = group_random_crop(imgs, target_size)
+                imgs = group_random_flip(imgs)
+            else:
+                imgs = group_center_crop(imgs, target_size)
+
+            np_imgs = (np.array(imgs[0]).astype('float32').transpose(
+                (2, 0, 1))).reshape(1, 3, target_size, target_size) / 255
+            for i in range(len(imgs) - 1):
+                img = (np.array(imgs[i + 1]).astype('float32').transpose(
+                    (2, 0, 1))).reshape(1, 3, target_size, target_size) / 255
+                np_imgs = np.concatenate((np_imgs, img))
+            imgs = np_imgs
+            imgs -= img_mean
+            imgs /= img_std
+            imgs = np.reshape(imgs, (seg_num, seglen * 3, target_size, target_size))
+
+            return imgs, label
+
+
+        def reader():
+            with open(pickle_list) as flist:
+                lines = [line.strip() for line in flist]
+                if shuffle:
+                    random.shuffle(lines)
+                for line in lines:
+                    pickle_path = line.strip()
+                    yield [pickle_path]
+
+        if format == 'pkl':
+            decode_func = decode_pickle
+        elif format == 'mp4':
+            decode_func = decode_mp4
        else:
-            data_loaded = pickle.load(open(pickle_path, 'rb'), encoding='bytes')
-
-        vid, label, frames = data_loaded
-        if len(frames) < 1:
-            logger.error('{} frame length {} less than 1.'.format(pickle_path,
-                                                                  len(frames)))
-            return None, None
-    except:
-        logger.info('Error when loading {}'.format(pickle_path))
-        return None, None
-
-    if mode == 'train' or mode == 'valid' or mode == 'test':
-        ret_label = label
-    elif mode == 'infer':
-        ret_label = vid
-
-    imgs = video_loader(frames, seg_num, seglen, mode)
-    return imgs_transform(imgs, ret_label, mode, seg_num, seglen, \
-                 short_size, target_size, img_mean, img_std)
-
-
-def imgs_transform(imgs, label, mode, seg_num, seglen, short_size, target_size,
-                   img_mean, img_std):
-    imgs = group_scale(imgs, short_size)
-
-    if mode == 'train':
-        imgs = group_random_crop(imgs, target_size)
-        imgs = group_random_flip(imgs)
-    else:
-        imgs = group_center_crop(imgs, target_size)
-
-    np_imgs = (np.array(imgs[0]).astype('float32').transpose(
-        (2, 0, 1))).reshape(1, 3, target_size, target_size) / 255
-    for i in range(len(imgs) - 1):
-        img = (np.array(imgs[i + 1]).astype('float32').transpose(
-            (2, 0, 1))).reshape(1, 3, target_size, target_size) / 255
-        np_imgs = np.concatenate((np_imgs, img))
-    imgs = np_imgs
-    imgs -= img_mean
-    imgs /= img_std
-    imgs = np.reshape(imgs, (seg_num, seglen * 3, target_size, target_size))
-
-    return imgs, label
+            raise "Not implemented format {}".format(format)
+
+        mapper = functools.partial(
+            decode_func,
+            mode=mode,
+            seg_num=seg_num,
+            seglen=seglen,
+            short_size=short_size,
+            target_size=target_size,
+            img_mean=img_mean,
+            img_std=img_std)
+
+        return paddle.reader.xmap_readers(mapper, reader, num_threads, buf_size)
+
+
+def group_multi_scale_crop(img_group, target_size, scales=None, \
+        max_distort=1, fix_crop=True, more_fix_crop=True):
+    scales = scales if scales is not None else [1, .875, .75, .66]
+    input_size = [target_size, target_size]
+
+    im_size = img_group[0].size
+
+    # get random crop offset
+    def _sample_crop_size(im_size):
+        image_w, image_h = im_size[0], im_size[1]
+
+        base_size = min(image_w, image_h)
+        crop_sizes = [int(base_size * x) for x in scales]
+        crop_h = [input_size[1] if abs(x - input_size[1]) < 3 else x for x in crop_sizes]
+        crop_w = [input_size[0] if abs(x - input_size[0]) < 3 else x for x in crop_sizes]
+
+        pairs = []
+        for i, h in enumerate(crop_h):
+            for j, w in enumerate(crop_w):
+                if abs(i - j) <= max_distort:
+                    pairs.append((w, h))
+
+        crop_pair = random.choice(pairs)
+        if not fix_crop:
+            w_offset = random.randint(0, image_w - crop_pair[0])
+            h_offset = random.randint(0, image_h - crop_pair[1])
+        else:
+            w_step = (image_w - crop_pair[0]) / 4
+            h_step = (image_h - crop_pair[1]) / 4
+
+            ret = list()
+            ret.append((0, 0))  # upper left
+            if w_step != 0:
+                ret.append((4 * w_step, 0))  # upper right
+            if h_step != 0:
+                ret.append((0, 4 * h_step))  # lower left
+            if h_step != 0 and w_step != 0:
+                ret.append((4 * w_step, 4 * h_step))  # lower right
+            if h_step != 0 or w_step != 0:
+                ret.append((2 * w_step, 2 * h_step))  # center
+
+            if more_fix_crop:
+                ret.append((0, 2 * h_step))  # center left
+                ret.append((4 * w_step, 2 * h_step))  # center right
+                ret.append((2 * w_step, 4 * h_step))  # lower center
+                ret.append((2 * w_step, 0 * h_step))  # upper center
+
+                ret.append((1 * w_step, 1 * h_step))  # upper left quarter
+                ret.append((3 * w_step, 1 * h_step))  # upper right quarter
+                ret.append((1 * w_step, 3 * h_step))  # lower left quarter
+                ret.append((3 * w_step, 3 * h_step))  # lower righ quarter
+
+            w_offset, h_offset = random.choice(ret)
+
+        return crop_pair[0], crop_pair[1], w_offset, h_offset
+
+    crop_w, crop_h, offset_w, offset_h = _sample_crop_size(im_size)
+    crop_img_group = [img.crop((offset_w, offset_h, offset_w + crop_w, offset_h + crop_h)) for img in img_group]
+    ret_img_group = [img.resize((input_size[0], input_size[1]), Image.BILINEAR) for img in crop_img_group]
+
+    return ret_img_group


 def group_random_crop(img_group, target_size):

--- a/PaddleCV/video/datareader/nonlocal_reader.py
+++ b/PaddleCV/video/datareader/nonlocal_reader.py
@@ -34,7 +34,7 @@ class NonlocalReader(DataReader):
                image_mean
                image_std
                batch_size
-                list
+                filelist
                crop_size
                sample_rate
                video_length
@@ -68,7 +68,7 @@ class NonlocalReader(DataReader):
        dataset_args['min_size'] = cfg[mode.upper()]['jitter_scales'][0]
        dataset_args['max_size'] = cfg[mode.upper()]['jitter_scales'][1]
        dataset_args['num_reader_threads'] = num_reader_threads
-        filelist = cfg[mode.upper()]['list']
+        filelist = cfg[mode.upper()]['filelist']
        batch_size = cfg[mode.upper()]['batch_size']

        if self.mode == 'train':
@@ -79,7 +79,7 @@ class NonlocalReader(DataReader):
            sample_times = 1
            return reader_func(filelist, batch_size, sample_times, False, False,
                               **dataset_args)
-        elif self.mode == 'test':
+        elif self.mode == 'test' or self.mode == 'infer':
            sample_times = cfg['TEST']['num_test_clips']
            if cfg['TEST']['use_multi_crop'] == 1:
                sample_times = int(sample_times / 3)
@@ -146,8 +146,8 @@ def apply_resize(rgbdata, min_size, max_size):
        ratio = float(side_length) / float(width)
    else:
        ratio = float(side_length) / float(height)
-    out_height = int(height * ratio)
-    out_width = int(width * ratio)
+    out_height = int(round(height * ratio))
+    out_width = int(round(width * ratio))
    outdata = np.zeros(
        (length, out_height, out_width, channel), dtype=rgbdata.dtype)
    for i in range(length):
@@ -197,14 +197,13 @@ def crop_mirror_transform(rgbdata,

 def make_reader(filelist, batch_size, sample_times, is_training, shuffle,
                **dataset_args):
-    # should add smaple_times param
-    fl = open(filelist).readlines()
-    fl = [line.strip() for line in fl if line.strip() != '']
+    def reader():
+        fl = open(filelist).readlines()
+        fl = [line.strip() for line in fl if line.strip() != '']

-    if shuffle:
-        random.shuffle(fl)
+        if shuffle:
+            random.shuffle(fl)

-    def reader():
        batch_out = []
        for line in fl:
            # start_time = time.time()
@@ -253,23 +252,6 @@ def make_reader(filelist, batch_size, sample_times, is_training, shuffle,

 def make_multi_reader(filelist, batch_size, sample_times, is_training, shuffle,
                      **dataset_args):
-    fl = open(filelist).readlines()
-    fl = [line.strip() for line in fl if line.strip() != '']
-
-    if shuffle:
-        random.shuffle(fl)
-
-    n = dataset_args['num_reader_threads']
-    queue_size = 20
-    reader_lists = [None] * n
-    file_num = int(len(fl) // n)
-    for i in range(n):
-        if i < len(reader_lists) - 1:
-            tmp_list = fl[i * file_num:(i + 1) * file_num]
-        else:
-            tmp_list = fl[i * file_num:]
-        reader_lists[i] = tmp_list
-
    def read_into_queue(flq, queue):
        batch_out = []
        for line in flq:
@@ -315,6 +297,24 @@ def make_multi_reader(filelist, batch_size, sample_times, is_training, shuffle,
        queue.put(None)

    def queue_reader():
+        # split file list and shuffle
+        fl = open(filelist).readlines()
+        fl = [line.strip() for line in fl if line.strip() != '']
+
+        if shuffle:
+            random.shuffle(fl)
+
+        n = dataset_args['num_reader_threads']
+        queue_size = 20
+        reader_lists = [None] * n
+        file_num = int(len(fl) // n)
+        for i in range(n):
+            if i < len(reader_lists) - 1:
+                tmp_list = fl[i * file_num:(i + 1) * file_num]
+            else:
+                tmp_list = fl[i * file_num:]
+            reader_lists[i] = tmp_list
+
        queue = multiprocessing.Queue(queue_size)
        p_list = [None] * len(reader_lists)
        # for reader_list in reader_lists:
@@ -332,7 +332,7 @@ def make_multi_reader(filelist, batch_size, sample_times, is_training, shuffle,
            else:
                yield sample
        for i in range(len(p_list)):
-            p_list[i].terminate()
-            p_list[i].join()
+            if p_list[i].is_alive():
+                p_list[i].join()

    return queue_reader
--- a/PaddleCV/video/datareader/reader_utils.py
+++ b/PaddleCV/video/datareader/reader_utils.py
@@ -38,13 +38,20 @@ class DataReader(object):
    """data reader for video input"""

    def __init__(self, model_name, mode, cfg):
-        """Not implemented"""
-        pass
+        self.name = model_name
+        self.mode = mode
+        self.cfg = cfg

    def create_reader(self):
        """Not implemented"""
        pass

+    def get_config_from_sec(self, sec, item, default=None):
+        if sec.upper() not in self.cfg:
+            return default
+        return self.cfg[sec.upper()].get(item, default)
+
+

 class ReaderZoo(object):
    def __init__(self):

--- a/PaddleCV/video/dataset/README.md
+++ b/PaddleCV/video/dataset/README.md
@@ -2,6 +2,7 @@

 - [Youtube-8M](#Youtube-8M数据集)
 - [Kinetics](#Kinetics数据集)
+- [Non-local](#Non-local)

 ## Youtube-8M数据集
 这里用到的是YouTube-8M 2018年更新之后的数据集。使用官方数据集，并将TFRecord文件转化为pickle文件以便PaddlePaddle使用。Youtube-8M数据集官方提供了frame-level和video-level的特征，这里只需使用到frame-level的特征。
@@ -117,3 +118,6 @@ ActivityNet官方提供了Kinetics的下载工具，具体参考其[官方repo ]

 即可生成相应的文件列表，train.list和val.list的每一行表示一个pkl文件的绝对路径。

+## Non-local
+
+Non-local模型也使用kinetics数据集，不过其数据处理方式和其他模型不一样，详细内容见[Non-local数据说明](./nonlocal/README.md)
--- a/PaddleCV/video/dataset/nonlocal/README.md
+++ b/PaddleCV/video/dataset/nonlocal/README.md
+# Non-local模型数据说明
+
+在Non-local模型中，输入数据是mp4文件，在datareader部分的代码中，使用opencv读取mp4文件对视频进行解码和采样。train和valid数据随机选取起始帧的位置，对每帧图像做随机增强，短边缩放至[256, 320]之间的某个随机数，长边根据长宽比计算出来，截取出224x224大小的区域。test时每条视频会选取10个不同的位置作为起始帧，同时会选取三个不同的空间位置作为crop区域的起始点，这样每个视频会进行10x3次采样，对这30个样本的预测概率求和，选取概率最大的分类作为最终的预测结果。
+
+## 数据下载
+
+下载kinetics400数据，具体方法见[数据说明](../README.md)中kinetics数据部分，假设下载的mp4文件存放在DATADIR目录下，train和validation数据分别位于$DATADIR/train和$DATADIR/valid目录。在下载数据的时候，将所有视频的高度缩放至256，宽度通过长宽比计算出来。
+
+## 下载官方数据列表
+
+将官方提供的数据集文件表格[kinetics-400\_train.csv](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics/data/kinetics-400_train.csv)和[kinetics-400\_val.csv](https://github.com/activitynet/ActivityNet/tree/master/Crawler/Kinetics/data/kinetics-400_val.csv)下载到此目录。
+
+## 生成文件列表
+
+打开generate\_list.sh，将其中的TRAIN\_DIR和VALID\_DIR修改成用户所保存的mp4文件路径，运行脚本
+
+    bash generate_list.sh
+
+即可生成trainlist.txt、vallist.txt和testlist.txt。
--- a/PaddleCV/video/dataset/nonlocal/change_filelist.py
+++ b/PaddleCV/video/dataset/nonlocal/change_filelist.py
+# Copyright (c) 2017-present, Facebook, Inc.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+#
+
+import sys
+import numpy as np
+import random
+
+# src = 'trainlist_download.txt'
+# outlist = 'trainlist.txt'
+# original_folder = '/nfs.yoda/xiaolonw/kinetics/data/train'
+# replace_folder = '/scratch/xiaolonw/kinetics/data/compress/train_256'
+assert (len(sys.argv) == 5)
+
+src = sys.argv[1]
+outlist = sys.argv[2]
+original_folder = sys.argv[3]
+replace_folder = sys.argv[4]
+
+f = open(src, 'r')
+flist = []
+for line in f:
+    flist.append(line)
+f.close()
+
+f2 = open(outlist, 'w')
+
+listlen = len(flist)
+for i in range(listlen):
+    line = flist[i]
+    line = line.replace(original_folder, replace_folder)
+    f2.write(line)
+
+f2.close()
--- a/PaddleCV/video/dataset/nonlocal/generate_filelist.py
+++ b/PaddleCV/video/dataset/nonlocal/generate_filelist.py
+import os
+import numpy as np
+import sys
+
+num_classes = 400
+replace_space_by_underliner = True  # whether to replace space by '_' in labels
+
+fn = sys.argv[1]  #'trainlist_download400.txt'
+train_dir = sys.argv[
+    2]  #'/docker_mount/data/k400/Kinetics_trimmed_processed_train'
+val_dir = sys.argv[3]  #'/docker_mount/data/k400/Kinetics_trimmed_processed_val'
+trainlist = sys.argv[4]  #'trainlist.txt'
+vallist = sys.argv[5]  #'vallist.txt'
+
+fl = open(fn).readlines()
+fl = [line.strip() for line in fl if line.strip() != '']
+action_list = []
+
+for line in fl[1:]:
+    act = line.split(',')[0].strip('\"')
+    action_list.append(act)
+
+action_set = set(action_list)
+action_list = list(action_set)
+action_list.sort()
+if replace_space_by_underliner:
+    action_list = [item.replace(' ', '_') for item in action_list]
+
+# assign integer label to each category, abseiling is labeled as 0, 
+# zumba labeled as 399 and so on, sorted by the category name
+action_label_dict = {}
+for i in range(len(action_list)):
+    key = action_list[i]
+    action_label_dict[key] = i
+
+assert len(action_label_dict.keys(
+)) == num_classes, "action num should be {}".format(num_classes)
+
+
+def generate_file(Faction_label_dict, Ftrain_dir, Ftrainlist, Fnum_classes):
+    trainactions = os.listdir(Ftrain_dir)
+    trainactions.sort()
+    assert len(
+        trainactions) == Fnum_classes, "train action num should be {}".format(
+            Fnum_classes)
+
+    train_items = []
+    trainlist_outfile = open(Ftrainlist, 'w')
+    for trainaction in trainactions:
+        assert trainaction in Faction_label_dict.keys(
+        ), "action {} should be in action_dict".format(trainaction)
+        trainaction_dir = os.path.join(Ftrain_dir, trainaction)
+        trainaction_label = Faction_label_dict[trainaction]
+        trainaction_files = os.listdir(trainaction_dir)
+        for f in trainaction_files:
+            fn = os.path.join(trainaction_dir, f)
+            item = fn + ' ' + str(trainaction_label)
+            train_items.append(item)
+            trainlist_outfile.write(item + '\n')
+    trainlist_outfile.flush()
+    trainlist_outfile.close()
+
+
+generate_file(action_label_dict, train_dir, trainlist, num_classes)
+generate_file(action_label_dict, val_dir, vallist, num_classes)
--- a/PaddleCV/video/dataset/nonlocal/generate_list.sh
+++ b/PaddleCV/video/dataset/nonlocal/generate_list.sh
+# Download txt name
+TRAINLIST_DOWNLOAD="kinetics-400_train.csv"
+
+# path of the train and valid data
+TRAIN_DIR=YOUR_TRAIN_DATA_DIR # replace this with your train data dir
+VALID_DIR=YOUR_VALID_DATA_DIR # replace this with your valid data dir
+
+python generate_filelist.py $TRAINLIST_DOWNLOAD $TRAIN_DIR $VALID_DIR trainlist.txt vallist.txt
+
+# generate test list
+python generate_testlist_multicrop.py
+
--- a/PaddleCV/video/dataset/nonlocal/generate_testlist_multicrop.py
+++ b/PaddleCV/video/dataset/nonlocal/generate_testlist_multicrop.py
+import os
+
+vallist = 'vallist.txt'
+testlist = 'testlist.txt'
+sampling_times = 10
+cropping_times = 3
+
+fl = open(vallist).readlines()
+fl = [line.strip() for line in fl if line.strip() != '']
+f_test = open(testlist, 'w')
+
+for i in range(len(fl)):
+    line = fl[i].split(' ')
+    fn = line[0]
+    label = line[1]
+    for j in range(sampling_times):
+        for k in range(cropping_times):
+            test_item = fn + ' ' + str(i) + ' ' + str(j) + ' ' + str(k) + '\n'
+            f_test.write(test_item)
+
+f_test.close()
--- a/PaddleCV/video/images/nonlocal_instantiation.png
+++ b/PaddleCV/video/images/nonlocal_instantiation.png
--- a/PaddleCV/video/images/temporal_shift.png
+++ b/PaddleCV/video/images/temporal_shift.png
--- a/PaddleCV/video/infer.py
+++ b/PaddleCV/video/infer.py
@@ -37,7 +37,7 @@ logger = logging.getLogger(__name__)
 def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
-        '--model-name',
+        '--model_name',
        type=str,
        default='AttentionCluster',
        help='name of model to train.')
@@ -47,14 +47,14 @@ def parse_args():
        default='configs/attention_cluster.txt',
        help='path to config file of model')
    parser.add_argument(
-        '--use-gpu', type=bool, default=True, help='default use gpu.')
+        '--use_gpu', type=bool, default=True, help='default use gpu.')
    parser.add_argument(
        '--weights',
        type=str,
        default=None,
        help='weight path, None to use weights from Paddle.')
    parser.add_argument(
-        '--batch-size',
+        '--batch_size',
        type=int,
        default=1,
        help='sample number in a batch for inference.')
@@ -64,17 +64,17 @@ def parse_args():
        default=None,
        help='path to inferenece data file lists file.')
    parser.add_argument(
-        '--log-interval',
+        '--log_interval',
        type=int,
        default=1,
        help='mini-batch interval to log.')
    parser.add_argument(
-        '--infer-topk',
+        '--infer_topk',
        type=int,
        default=20,
        help='topk predictions to restore.')
    parser.add_argument(
-        '--save-dir', type=str, default='./', help='directory to store results')
+        '--save_dir', type=str, default='./', help='directory to store results')
    args = parser.parse_args()
    return args

@@ -83,8 +83,8 @@ def infer(args):
    # parse config
    config = parse_config(args.config)
    infer_config = merge_configs(config, 'infer', vars(args))
+    print_configs(infer_config, "Infer")
    infer_model = models.get_model(args.model_name, infer_config, mode='infer')
-
    infer_model.build_input(use_pyreader=False)
    infer_model.build_model()
    infer_feeds = infer_model.feeds()
@@ -105,10 +105,8 @@ def infer(args):
    # if no weight files specified, download weights from paddle
    weights = args.weights or infer_model.get_weights()

-    def if_exist(var):
-        return os.path.exists(os.path.join(weights, var.name))
-
-    fluid.io.load_vars(exe, weights, predicate=if_exist)
+    infer_model.load_test_weights(exe, weights,
+                                  fluid.default_main_program(), place)

    infer_feeder = fluid.DataFeeder(place=place, feed_list=infer_feeds)
    fetch_list = [x.name for x in infer_outputs]
@@ -126,8 +124,7 @@ def infer(args):
            topk_inds = predictions[i].argsort()[0 - args.infer_topk:]
            topk_inds = topk_inds[::-1]
            preds = predictions[i][topk_inds]
-            results.append(
-                (video_id[i], preds.tolist(), topk_inds.tolist()))
+            results.append((video_id[i], preds.tolist(), topk_inds.tolist()))
        prev_time = cur_time
        cur_time = time.time()
        period = cur_time - prev_time
@@ -145,6 +142,7 @@ def infer(args):
                                    "{}_infer_result".format(args.model_name))
    pickle.dump(results, open(result_file_name, 'wb'))

+
 if __name__ == "__main__":
    args = parse_args()
    logger.info(args)

--- a/PaddleCV/video/metrics/metrics_util.py
+++ b/PaddleCV/video/metrics/metrics_util.py
@@ -187,10 +187,11 @@ def get_metrics(name, mode, cfg):
    return metrics_zoo.get(name, mode, cfg)


-regist_metrics("NEXTVLAD", Youtube8mMetrics)
-regist_metrics("ATTENTIONLSTM", Youtube8mMetrics)
+# sort by alphabet
 regist_metrics("ATTENTIONCLUSTER", Youtube8mMetrics)
-regist_metrics("TSN", Kinetics400Metrics)
+regist_metrics("ATTENTIONLSTM", Youtube8mMetrics)
+regist_metrics("NEXTVLAD", Youtube8mMetrics)
+regist_metrics("NONLOCAL", MulticropMetrics)
 regist_metrics("TSM", Kinetics400Metrics)
+regist_metrics("TSN", Kinetics400Metrics)
 regist_metrics("STNET", Kinetics400Metrics)
-regist_metrics("NONLOCAL", MulticropMetrics)
--- a/PaddleCV/video/metrics/multicrop_test/multicrop_test_metrics.py
+++ b/PaddleCV/video/metrics/multicrop_test/multicrop_test_metrics.py
@@ -63,6 +63,7 @@ class MetricsCalculator():

    def accumulate(self, loss, pred, labels):
        labels = labels.astype(int)
+        labels = labels[:, 0]
        for i in range(pred.shape[0]):
            probs = pred[i, :].tolist()
            vid = labels[i]
@@ -81,6 +82,8 @@ class MetricsCalculator():
            evaluate_results(self.results, self.filename_gt, self.dataset_size, \
                             self.num_classes, self.num_test_clips)
        # save temporary file
+        if not os.path.isdir(self.checkpoint_dir):
+            os.makedirs(self.checkpoint_dir)
        pkl_path = os.path.join(self.checkpoint_dir, "results_probs.pkl")

        with open(pkl_path, 'w') as f:
@@ -188,26 +191,4 @@ def evaluate_results(results, filename_gt, test_dataset_size, num_classes,
    logger.info('top-5 accuracy: {:.2f} percent'.format(accuracy_top5 * 100))
    logger.info('-' * 80)

-    for i in range(sample_num):
-        prob = probs[i]
-
-        # top-1
-        idx = prob.argmax()
-        if idx == gt_labels[i] and counts[i] > 0:
-            accuracy = accuracy + 1
-
-        ids = np.argsort(prob)[::-1]
-        for j in range(5):
-            if ids[j] == gt_labels[i] and counts[i] > 0:
-                accuracy_top5 = accuracy_top5 + 1
-                break
-
-    accuracy = float(accuracy) / float(sample_num)
-    accuracy_top5 = float(accuracy_top5) / float(sample_num)
-
-    logger.info('-' * 80)
-    logger.info('top-1 accuracy: {:.2f} percent'.format(accuracy * 100))
-    logger.info('top-5 accuracy: {:.2f} percent'.format(accuracy_top5 * 100))
-    logger.info('-' * 80)
-
    return
--- a/PaddleCV/video/models/__init__.py
+++ b/PaddleCV/video/models/__init__.py
 from .model import regist_model, get_model
 from .attention_cluster import AttentionCluster
+from .attention_lstm import AttentionLSTM
 from .nextvlad import NEXTVLAD
+from .nonlocal_model import NonLocal
+from .tsm import TSM
 from .tsn import TSN
 from .stnet import STNET
-from .attention_lstm import AttentionLSTM

-# regist models
+# regist models, sort by alphabet
 regist_model("AttentionCluster", AttentionCluster)
+regist_model("AttentionLSTM", AttentionLSTM)
 regist_model("NEXTVLAD", NEXTVLAD)
+regist_model('NONLOCAL', NonLocal)
+regist_model("TSM", TSM)
 regist_model("TSN", TSN)
 regist_model("STNET", STNET)
-regist_model("AttentionLSTM", AttentionLSTM)
--- a/PaddleCV/video/models/attention_cluster/README.md
+++ b/PaddleCV/video/models/attention_cluster/README.md
@@ -32,11 +32,11 @@ Attention Cluster模型使用2nd-Youtube-8M数据集, 数据下载及准备请

 数据准备完毕后，可以通过如下两种方式启动训练：

-    python train.py --model-name=AttentionCluster 
+    python train.py --model_name=AttentionCluster
            --config=./configs/attention_cluster.txt
-            --save-dir=checkpoints 
-            --log-interval=10 
-            --valid-interval=1
+            --save_dir=checkpoints
+            --log_interval=10
+            --valid_interval=1

    bash scripts/train/train_attention_cluster.sh

@@ -56,9 +56,9 @@ Attention Cluster模型使用2nd-Youtube-8M数据集, 数据下载及准备请

 可通过如下两种方式进行模型评估:

-    python test.py --model-name=AttentionCluster 
+    python test.py --model_name=AttentionCluster
            --config=configs/attention_cluster.txt
-            --log-interval=1 
+            --log_interval=1
            --weights=$PATH_TO_WEIGHTS

    bash scripts/test/test_attention_cluster.sh
@@ -89,10 +89,10 @@ Attention Cluster模型使用2nd-Youtube-8M数据集, 数据下载及准备请

 可通过如下命令进行模型推断：

-    python infer.py --model-name=attention_cluster 
+    python infer.py --model_name=attention_cluster
            --config=configs/attention_cluster.txt
-            --log-interval=1 
-            --weights=$PATH_TO_WEIGHTS 
+            --log_interval=1
+            --weights=$PATH_TO_WEIGHTS
            --filelist=$FILELIST

 - 模型推断结果存储于`AttentionCluster_infer_result`中，通过`pickle`格式存储。
@@ -102,4 +102,3 @@ Attention Cluster模型使用2nd-Youtube-8M数据集, 数据下载及准备请
 ## 参考论文

 - [Attention Clusters: Purely Attention Based Local Feature Integration for Video Classification](https://arxiv.org/abs/1711.09550), Xiang Long, Chuang Gan, Gerard de Melo, Jiajun Wu, Xiao Liu, Shilei Wen
-
--- a/PaddleCV/video/models/attention_lstm/README.md
+++ b/PaddleCV/video/models/attention_lstm/README.md
@@ -26,11 +26,11 @@ AttentionLSTM模型使用2nd-Youtube-8M数据集，关于数据部分请参考[

 数据准备完毕后，可以通过如下两种方式启动训练：

-    python train.py --model-name=AttentionLSTM
+    python train.py --model_name=AttentionLSTM
            --config=./configs/attention_lstm.txt
-            --save-dir=checkpoints 
-            --log-interval=10 
-            --valid-interval=1
+            --save_dir=checkpoints
+            --log_interval=10
+            --valid_interval=1

    bash scripts/train/train_attention_lstm.sh

@@ -42,9 +42,9 @@ AttentionLSTM模型使用2nd-Youtube-8M数据集，关于数据部分请参考[
 ## 模型评估
 可通过如下两种方式进行模型评估:

-    python test.py --model-name=AttentionLSTM
+    python test.py --model_name=AttentionLSTM
            --config=configs/attention_lstm.txt
-            --log-interval=1
+            --log_interval=1
            --weights=$PATH_TO_WEIGHTS

    bash scripts/test/test_attention_lstm.sh
@@ -75,10 +75,10 @@ AttentionLSTM模型使用2nd-Youtube-8M数据集，关于数据部分请参考[

 可通过如下命令进行模型推断：

-    python infer.py --model-name=attention_lstm
+    python infer.py --model_name=attention_lstm
            --config=configs/attention_lstm.txt
-            --log-interval=1 
-            --weights=$PATH_TO_WEIGHTS 
+            --log_interval=1
+            --weights=$PATH_TO_WEIGHTS
            --filelist=$FILELIST

 - 模型推断结果存储于`AttentionLSTM_infer_result`中，通过`pickle`格式存储。
@@ -90,4 +90,3 @@ AttentionLSTM模型使用2nd-Youtube-8M数据集，关于数据部分请参考[
 - [Beyond Short Snippets: Deep Networks for Video Classification](https://arxiv.org/abs/1503.08909) Joe Yue-Hei Ng, Matthew Hausknecht, Sudheendra Vijayanarasimhan, Oriol Vinyals, Rajat Monga, George Toderici

 - [Attention Clusters: Purely Attention Based Local Feature Integration for Video Classification](https://arxiv.org/abs/1711.09550), Xiang Long, Chuang Gan, Gerard de Melo, Jiajun Wu, Xiao Liu, Shilei Wen
-
--- a/PaddleCV/video/models/model.py
+++ b/PaddleCV/video/models/model.py
@@ -65,17 +65,9 @@ class ModelBase(object):
        self.name = name
        self.is_training = (mode == 'train')
        self.mode = mode
+        self.cfg = cfg
        self.py_reader = None

-        # parse config
-        # assert os.path.exists(cfg), \
-        #         "Config file {} not exists".format(cfg)
-        # self._config = ModelConfig(cfg)
-        # self._config.parse()
-        # if args and isinstance(args, dict):
-        #     self._config.merge_configs(mode, args)
-        # self.cfg = self._config.get_configs()
-        self.cfg = cfg

    def build_model(self):
        "build model struct"
@@ -137,8 +129,8 @@ class ModelBase(object):
        if os.path.exists(path):
            return path

-        logger.info("Download pretrain weights of {} from {}".format(
-                self.name, url))
+        logger.info("Download pretrain weights of {} from {}".format(self.name,
+                                                                     url))
        download(url, path)
        return path

@@ -146,6 +138,12 @@ class ModelBase(object):
        logger.info("Load pretrain weights from {}".format(pretrain))
        fluid.io.load_params(exe, pretrain, main_program=prog)

+    def load_test_weights(self, exe, weights, prog, place):
+        def if_exist(var):
+            return os.path.exists(os.path.join(weights, var.name))
+
+        fluid.io.load_vars(exe, weights, predicate=if_exist)
+
    def get_config_from_sec(self, sec, item, default=None):
        if sec.upper() not in self.cfg:
            return default
@@ -163,7 +161,7 @@ class ModelZoo(object):

    def get(self, name, cfg, mode='train'):
        for k, v in self.model_zoo.items():
-            if k == name:
+            if k.upper() == name.upper():
                return v(name, cfg, mode)
        raise ModelNotFoundError(name, self.model_zoo.keys())

@@ -178,4 +176,3 @@ def regist_model(name, model):

 def get_model(name, cfg, mode='train'):
    return model_zoo.get(name, cfg, mode)
-
--- a/PaddleCV/video/models/nonlocal_model/README.md
+++ b/PaddleCV/video/models/nonlocal_model/README.md
+# Non-local Neural Networks视频分类模型
+
+---
+## 目录
+
+- [模型简介](#模型简介)
+- [数据准备](#数据准备)
+- [模型训练](#模型训练)
+- [模型评估](#模型评估)
+- [模型推断](#模型推断)
+- [参考论文](#参考论文)
+
+
+## 模型简介
+
+Non-local Neural Networks是由Xiaolong Wang等研究者在2017年提出的模型，主要特点是通过引入Non-local操作来描述距离较远的像素点之间的关联关系。提取大范围内数据点之间的关联关系，一直是一个比较重要的问题。对于序列化数据，比如语音、视频等，比较主流的做法是使用循环神经网络(RNN)；对于图片来说，通常使用卷积神经网络(CNN)来提取像素之间的依赖关系。然而，CNN和RNN都只是在其空间或者时间的很小的邻域内进行特征提取，很难捕捉到距离更远位置的数据的依赖关系。借助于传统计算机视觉中的Non-local mean的思想，并将其扩展到神经网络中，通过定义输出位置和所有输入位置之间的关联函数，建立起了一种具有全局关联特性的操作，输出feature map上的每个位置，都会受到输入feature map上所有位置的数据的影响。在CNN中，经过一次卷积操作，输出feature map上的像素点，只能获取其相应的感受野之内的信息，为了获得更多的上下文信息，就需要做多次卷积操作。然而在Non-local操作中，每个输出点的感受野都相当于整个输入feature map区域，能比CNN和RNN提取到更加全局的信息。
+
+详细信息请参考论文[Non-local Neural Networks](https://arxiv.org/abs/1711.07971v1)
+
+### Non-local操作
+
+Non-local 关联函数的定义如下
+
+<p align="center">
+<a href="https://www.codecogs.com/eqnedit.php?latex=y_{i}=\frac{1}{C(x)}&space;\sum_{j}f(x_i,&space;x_j)g(x_j)" target="_blank"><img src="https://latex.codecogs.com/gif.latex?y_{i}=\frac{1}{C(x)}&space;\sum_{j}f(x_i,&space;x_j)g(x_j)" title="y_{i}=\frac{1}{C(x)} \sum_{j}f(x_i, x_j)g(x_j)" /></a>
+</p>
+
+在上面的公式中，x表示输入feature map， y表示输出feature map，i是输出feature map的位置，j是输入feature map的位置，f(xi, xj)描述了输出点i跟所有输入点j之间的关联，C是根据f(xi, xj)选取的归一化函数。g(xj)是对输入feature map做一个变换操作，通常可以选取比较简单的线性变换形式；f(xi, xj)可以选取不同的形式，通常可以使用如下几种形式
+
+#### Gaussian
+
+<p align="center">
+<a href="https://www.codecogs.com/eqnedit.php?latex=f(x_i,&space;x_j)&space;=&space;e^{x_i^Tx_j},&space;\qquad&space;C(x)&space;=&space;\sum_{j}f(x_i,&space;x_j)" target="_blank"><img src="https://latex.codecogs.com/gif.latex?f(x_i,&space;x_j)&space;=&space;e^{x_i^Tx_j},&space;\qquad&space;C(x)&space;=&space;\sum_{j}f(x_i,&space;x_j)" title="f(x_i, x_j) = e^{x_i^Tx_j}, \qquad C(x) = \sum_{j}f(x_i, x_j)" /></a>
+</p>
+
+#### Embedded Gaussian
+
+<p align="center">
+<a href="https://www.codecogs.com/eqnedit.php?latex=f(x_i,&space;x_j)&space;=&space;e^{{\theta(x_i)}^T\phi(x_j)},&space;\qquad&space;C(x)&space;=&space;\sum_{j}f(x_i,&space;x_j)" target="_blank"><img src="https://latex.codecogs.com/gif.latex?f(x_i,&space;x_j)&space;=&space;e^{{\theta(x_i)}^T\phi(x_j)},&space;\qquad&space;C(x)&space;=&space;\sum_{j}f(x_i,&space;x_j)" title="f(x_i, x_j) = e^{{\theta(x_i)}^T\phi(x_j)}, \qquad C(x) = \sum_{j}f(x_i, x_j)" /></a>
+</p>
+
+#### Dot product
+
+<p align="center">
+<a href="https://www.codecogs.com/eqnedit.php?latex=f(x_i,&space;x_j)&space;=&space;\theta(x_i)^T\phi(x_j),&space;\qquad&space;C(x)&space;=\mathit{N}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?f(x_i,&space;x_j)&space;=&space;\theta(x_i)^T\phi(x_j),&space;\qquad&space;C(x)&space;=\mathit{N}" title="f(x_i, x_j) = \theta(x_i)^T\phi(x_j), \qquad C(x) =\mathit{N}" /></a>
+</p>
+
+#### Concatenation
+
+<p align="center">
+<a href="https://www.codecogs.com/eqnedit.php?latex=f(x_i,&space;x_j)&space;=&space;ReLU(w_f^T[\theta(x_i),\phi(x_j)]),&space;\qquad&space;C(x)&space;=\mathit{N}" target="_blank"><img src="https://latex.codecogs.com/gif.latex?f(x_i,&space;x_j)&space;=&space;ReLU(w_f^T[\theta(x_i),\phi(x_j)]),&space;\qquad&space;C(x)&space;=\mathit{N}" title="f(x_i, x_j) = ReLU(w_f^T[\theta(x_i),\phi(x_j)]), \qquad C(x) =\mathit{N}" /></a>
+</p>
+
+其中
+<p align="center">
+<a href="https://www.codecogs.com/eqnedit.php?latex=\theta(x_i)=W_{\theta}x_i,&space;\qquad&space;\phi(x_j)=W_{\phi}x_j" target="_blank"><img src="https://latex.codecogs.com/gif.latex?\theta(x_i)=W_{\theta}x_i,&space;\qquad&space;\phi(x_j)=W_{\phi}x_j" title="\theta(x_i)=W_{\theta}x_i, \qquad \phi(x_j)=W_{\phi}x_j" /></a>
+</p>
+上述函数形式中的参数可以使用随机初始化的方式进行赋值，在训练过程中通过End-2-End的方式不断迭代求解。
+
+### Non-local block
+
+采用类似Resnet的结构，定义如下的Non-local block
+<p align="center">
+<a href="https://www.codecogs.com/eqnedit.php?latex=Z_i&space;=&space;W_zy_i&plus;x_i" target="_blank"><img src="https://latex.codecogs.com/gif.latex?Z_i&space;=&space;W_zy_i&plus;x_i" title="Z_i = W_zy_i+x_i" /></a>
+</p>
+
+Non-local操作引入的部分与Resnet中的残差项类似，通过使用Non-local block，可以方便的在网络中的任何地方添加Non-local操作，而其他地方照样可以使用原始的预训练模型进行初始化。如果将Wz初始化为0，则跟不使用Non-local block的初始情形等价。
+
+### 具体实现
+
+下图描述了Non-local block使用内嵌高斯形式关联函数的具体实现过程，
+<p align="center">
+<img src="../../images/nonlocal_instantiation.png" height=488 width=585 hspace='10'/> <br />
+使用Eembedded Gaussian关联函数的Non-local block
+</p>
+
+g(Xj)是对输入feature map做一个线性变换，使用1x1x1的卷积；theta和phi也是线性变化，同样使用1x1x1的卷积来实现。从上图中可以看到，Non-local操作只需用到通常的卷积、矩阵相乘、加法、softmax等比较常用的算子，不需要额外添加新的算子，用户可以非常方便的实现组网以构建模型。
+
+### 模型效果
+
+原作者的论文中指出，Non-local模型在视频分类问题上取得了较好的效果，在Resnet-50基础网络上添加Non-local block，能取得比Resnet-101更好的分类效果，TOP-1准确率要高出1～2个百分点。在图像分类和目标检测问题上，也有比较明显的提升效果。
+
+## 数据准备
+
+Non-local模型的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考Non-local模型的[数据说明](../../dataset/nonlocal/README.md)
+
+## 模型训练
+
+数据准备完毕后，可以通过如下两种方式启动训练：
+
+    python train.py --model_name=NONLOCAL
+            --config=./configs/nonlocal.txt
+            --save_dir=checkpoints
+            --log_interval=10
+            --valid_interval=1
+
+    bash scripts/train/train_nonlocal.sh
+
+- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/nonlocal_kinetics.tar.gz)通过`--resume`指定权重存放路径进行finetune等开发
+
+**数据读取器说明：** 模型读取Kinetics-400数据集中的`mp4`数据，根据视频长度和采样频率随机选取起始帧的位置，每个视频抽取`video_length`帧图像，对每帧图像做随机增强，短边缩放至[256, 320]之间的某个随机数，长边根据长宽比计算出来，然后再截取出224x224的区域作为训练数据输入网络。
+
+**训练策略：**
+
+*  采用Momentum优化算法训练，momentum=0.9
+*  采用L2正则化，卷积和fc层weight decay系数为1e-4；bn层则设置weight decay系数为0
+*  初始学习率base\_learning\_rate=0.01，在150,000和300,000次迭代的时候分别降一次学习率，衰减系数为0.1
+
+
+## 模型评估
+
+测试时数据预处理的方式跟训练时不一样，crop区域的大小为256x256，不同于训练时的224x224，所以需要将训练中预测输出时使用的全连接操作改为1x1x1的卷积。每个视频抽取图像帧数据的时候，会选取10个不同的位置作为时间起始点，做crop的时候会选取三个不同的空间起始点。在每个视频上会进行10x3次采样，将这30个样本的预测结果进行求和，选取概率最大的类别作为最终的预测结果。
+
+可通过如下两种方式进行模型评估:
+
+    python test.py --model_name=NONLOCAL
+            --config=configs/nonlocal.txt
+            --log_interval=1
+            --weights=$PATH_TO_WEIGHTS
+
+    bash scripts/test/test_nonlocal.sh
+
+- 使用`scripts/test/test_nonlocal.sh`进行评估时，需要修改脚本中的`--weights`参数指定需要评估的权重。
+
+- 若未指定`--weights`参数，脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/nonlocal_kinetics.tar.gz)进行评估
+
+
+当取如下参数时:
+
+| 参数 | 取值 |
+| :---------: | :----: |
+| back bone | Resnet-50 |
+| 卷积形式 | c2d |
+| 采样频率 | 8 |
+| 视频长度 | 8 |
+
+在Kinetics400的validation数据集下评估精度如下:
+
+| 精度指标 | 模型精度 |
+| :---------: | :----: |
+| TOP\_1 | 0.739 |
+
+### 备注
+
+由于Youtube上部分数据已删除，只下载到了kinetics400数据集中的234619条，而原始数据集包含246535条视频，可能会导致精度略微下降。
+
+## 模型推断
+
+可通过如下命令进行模型推断：
+
+    python infer.py --model_name=NONLOCAL
+            --config=configs/nonlocal.txt
+            --log_interval=1
+            --weights=$PATH_TO_WEIGHTS
+            --filelist=$FILELIST
+
+- 模型推断结果存储于`NONLOCAL_infer_result`中，通过`pickle`格式存储。
+
+- 若未指定`--weights`参数，脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/nonlocal_kinetics.tar.gz)进行推断
+
+
+## 参考论文
+
+- [Non-local Neural Networks](https://arxiv.org/abs/1711.07971v1), Xiaolong Wang, Ross Girshick, Abhinav Gupta, Kaiming He
+
--- a/PaddleCV/video/models/nonlocal_model/__init__.py
+++ b/PaddleCV/video/models/nonlocal_model/__init__.py
+from .nonlocal_model import *
--- a/PaddleCV/video/models/nonlocal_model/nonlocal_helper.py
+++ b/PaddleCV/video/models/nonlocal_model/nonlocal_helper.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import ParamAttr
+
+
+# 3d spacetime nonlocal (v1, spatial downsample)
+def spacetime_nonlocal(blob_in, dim_in, dim_out, batch_size, prefix, dim_inner, cfg, \
+                       test_mode = False, max_pool_stride = 2):
+    #------------
+    cur = blob_in
+    # we do projection to convert each spacetime location to a feature
+    # theta original size
+    # e.g.,  (8, 1024, 4, 14, 14) => (8, 1024, 4, 14, 14)
+    theta = fluid.layers.conv3d(
+        input=cur,
+        num_filters=dim_inner,
+        filter_size=[1, 1, 1],
+        stride=[1, 1, 1],
+        padding=[0, 0, 0],
+        param_attr=ParamAttr(
+            name=prefix + '_theta' + "_w",
+            initializer=fluid.initializer.Normal(
+                loc=0.0, scale=cfg.NONLOCAL.conv_init_std)),
+        bias_attr=ParamAttr(
+            name=prefix + '_theta' + "_b",
+            initializer=fluid.initializer.Constant(value=0.))
+        if (cfg.NONLOCAL.no_bias == 0) else False,
+        name=prefix + '_theta')
+    theta_shape = theta.shape
+
+    # phi and g: half spatial size
+    # e.g., (8, 1024, 4, 14, 14) => (8, 1024, 4, 7, 7)
+    if cfg.NONLOCAL.use_maxpool:
+        max_pool = fluid.layers.pool3d(
+            input=cur,
+            pool_size=[1, max_pool_stride, max_pool_stride],
+            pool_type='max',
+            pool_stride=[1, max_pool_stride, max_pool_stride],
+            pool_padding=[0, 0, 0],
+            name=prefix + '_pool')
+    else:
+        max_pool = cur
+
+    phi = fluid.layers.conv3d(
+        input=max_pool,
+        num_filters=dim_inner,
+        filter_size=[1, 1, 1],
+        stride=[1, 1, 1],
+        padding=[0, 0, 0],
+        param_attr=ParamAttr(
+            name=prefix + '_phi' + "_w",
+            initializer=fluid.initializer.Normal(
+                loc=0.0, scale=cfg.NONLOCAL.conv_init_std)),
+        bias_attr=ParamAttr(
+            name=prefix + '_phi' + "_b",
+            initializer=fluid.initializer.Constant(value=0.))
+        if (cfg.NONLOCAL.no_bias == 0) else False,
+        name=prefix + '_phi')
+    phi_shape = phi.shape
+    g = fluid.layers.conv3d(
+        input=max_pool,
+        num_filters=dim_inner,
+        filter_size=[1, 1, 1],
+        stride=[1, 1, 1],
+        padding=[0, 0, 0],
+        param_attr=ParamAttr(
+            name=prefix + '_g' + "_w",
+            initializer=fluid.initializer.Normal(
+                loc=0.0, scale=cfg.NONLOCAL.conv_init_std)),
+        bias_attr=ParamAttr(
+            name=prefix + '_g' + "_b",
+            initializer=fluid.initializer.Constant(value=0.))
+        if (cfg.NONLOCAL.no_bias == 0) else False,
+        name=prefix + '_g')
+    g_shape = g.shape
+
+    # we have to use explicit batch size (to support arbitrary spacetime size)
+    # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784)
+    theta = fluid.layers.reshape(
+        theta, [-1, 0, theta_shape[2] * theta_shape[3] * theta_shape[4]])
+    theta = fluid.layers.transpose(theta, [0, 2, 1])
+    phi = fluid.layers.reshape(
+        phi, [-1, 0, phi_shape[2] * phi_shape[3] * phi_shape[4]])
+    theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity')
+    g = fluid.layers.reshape(g, [-1, 0, g_shape[2] * g_shape[3] * g_shape[4]])
+    if cfg.NONLOCAL.use_softmax:
+        if cfg.NONLOCAL.use_scale is True:
+            theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5)
+        else:
+            theta_phi_sc = theta_phi
+        p = fluid.layers.softmax(
+            theta_phi_sc, name=prefix + '_affinity' + '_prob')
+    else:
+        # not clear about what is doing in xlw's code
+        p = None  # not implemented
+        raise "Not implemented when not use softmax"
+
+    # note g's axis[2] corresponds to p's axis[2]
+    # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1)
+    p = fluid.layers.transpose(p, [0, 2, 1])
+    t = fluid.layers.matmul(g, p, name=prefix + '_y')
+
+    # reshape back
+    # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14)
+    t_shape = t.shape
+    # print(t_shape)
+    # print(theta_shape)
+    t_re = fluid.layers.reshape(t, shape=list(theta_shape))
+    blob_out = t_re
+
+    blob_out = fluid.layers.conv3d(
+        input=blob_out,
+        num_filters=dim_out,
+        filter_size=[1, 1, 1],
+        stride=[1, 1, 1],
+        padding=[0, 0, 0],
+        param_attr=ParamAttr(
+            name=prefix + '_out' + "_w",
+            initializer=fluid.initializer.Constant(value=0.)
+            if cfg.NONLOCAL.use_zero_init_conv else fluid.initializer.Normal(
+                loc=0.0, scale=cfg.NONLOCAL.conv_init_std)),
+        bias_attr=ParamAttr(
+            name=prefix + '_out' + "_b",
+            initializer=fluid.initializer.Constant(value=0.))
+        if (cfg.NONLOCAL.no_bias == 0) else False,
+        name=prefix + '_out')
+    blob_out_shape = blob_out.shape
+
+    if cfg.NONLOCAL.use_bn is True:
+        bn_name = prefix + "_bn"
+        blob_out = fluid.layers.batch_norm(
+            blob_out,
+            is_test=test_mode,
+            momentum=cfg.NONLOCAL.bn_momentum,
+            epsilon=cfg.NONLOCAL.bn_epsilon,
+            name=bn_name,
+            param_attr=ParamAttr(
+                name=bn_name + "_scale",
+                initializer=fluid.initializer.Constant(
+                    value=cfg.NONLOCAL.bn_init_gamma),
+                regularizer=fluid.regularizer.L2Decay(
+                    cfg.TRAIN.weight_decay_bn)),
+            bias_attr=ParamAttr(
+                name=bn_name + "_offset",
+                regularizer=fluid.regularizer.L2Decay(
+                    cfg.TRAIN.weight_decay_bn)),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance")  # add bn
+
+    if cfg.NONLOCAL.use_affine is True:
+        affine_scale = fluid.layers.create_parameter(
+            shape=[blob_out_shape[1]],
+            dtype=blob_out.dtype,
+            attr=ParamAttr(name=prefix + '_affine' + '_s'),
+            default_initializer=fluid.initializer.Constant(value=1.))
+        affine_bias = fluid.layers.create_parameter(
+            shape=[blob_out_shape[1]],
+            dtype=blob_out.dtype,
+            attr=ParamAttr(name=prefix + '_affine' + '_b'),
+            default_initializer=fluid.initializer.Constant(value=0.))
+        blob_out = fluid.layers.affine_channel(
+            blob_out,
+            scale=affine_scale,
+            bias=affine_bias,
+            name=prefix + '_affine')  # add affine
+
+    return blob_out
+
+
+def add_nonlocal(blob_in,
+                 dim_in,
+                 dim_out,
+                 batch_size,
+                 prefix,
+                 dim_inner,
+                 cfg,
+                 test_mode=False):
+    blob_out = spacetime_nonlocal(blob_in, \
+                dim_in, dim_out, batch_size, prefix, dim_inner, cfg, test_mode = test_mode)
+    blob_out = fluid.layers.elementwise_add(
+        blob_out, blob_in, name=prefix + '_sum')
+    return blob_out
+
+
+# this is to reduce memory usage if the feature maps are big
+# devide the feature maps into groups in the temporal dimension,
+# and perform non-local operations inside each group.
+def add_nonlocal_group(blob_in,
+                       dim_in,
+                       dim_out,
+                       batch_size,
+                       pool_stride,
+                       height,
+                       width,
+                       group_size,
+                       prefix,
+                       dim_inner,
+                       cfg,
+                       test_mode=False):
+    group_num = int(pool_stride / group_size)
+    assert (pool_stride % group_size == 0), \
+           'nonlocal block {}: pool_stride({}) should be divided by group size({})'.format(prefix, pool_stride, group_size)
+
+    if group_num > 1:
+        blob_in = fluid.layers.transpose(
+            blob_in, [0, 2, 1, 3, 4], name=prefix + '_pre_trans1')
+        blob_in = fluid.layers.reshape(
+            blob_in,
+            [batch_size * group_num, group_size, dim_in, height, width],
+            name=prefix + '_pre_reshape1')
+        blob_in = fluid.layers.transpose(
+            blob_in, [0, 2, 1, 3, 4], name=prefix + '_pre_trans2')
+
+    blob_out = spacetime_nonlocal(
+        blob_in,
+        dim_in,
+        dim_out,
+        batch_size,
+        prefix,
+        dim_inner,
+        cfg,
+        test_mode=test_mode)
+    blob_out = fluid.layers.elementwise_add(
+        blob_out, blob_in, name=prefix + '_sum')
+
+    if group_num > 1:
+        blob_out = fluid.layers.transpose(
+            blob_out, [0, 2, 1, 3, 4], name=prefix + '_post_trans1')
+        blob_out = fluid.layers.reshape(
+            blob_out,
+            [batch_size, group_num * group_size, dim_out, height, width],
+            name=prefix + '_post_reshape1')
+        blob_out = fluid.layers.transpose(
+            blob_out, [0, 2, 1, 3, 4], name=prefix + '_post_trans2')
+
+    return blob_out
--- a/PaddleCV/video/models/nonlocal_model/nonlocal_model.py
+++ b/PaddleCV/video/models/nonlocal_model/nonlocal_model.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import numpy as np
+import paddle.fluid as fluid
+
+from ..model import ModelBase
+from . import resnet_video
+from .nonlocal_utils import load_params_from_file
+
+import logging
+logger = logging.getLogger(__name__)
+
+__all__ = ["NonLocal"]
+
+# To add new models, import them, add them to this map and models/TARGETS
+
+
+class NonLocal(ModelBase):
+    def __init__(self, name, cfg, mode='train'):
+        super(NonLocal, self).__init__(name, cfg, mode=mode)
+        self.get_config()
+
+    def get_config(self):
+        # video_length
+        self.video_length = self.get_config_from_sec(self.mode, 'video_length')
+        # crop size
+        self.crop_size = self.get_config_from_sec(self.mode, 'crop_size')
+
+    def build_input(self, use_pyreader=True):
+        input_shape = [3, self.video_length, self.crop_size, self.crop_size]
+        label_shape = [1]
+        py_reader = None
+        if use_pyreader:
+            assert self.mode != 'infer', \
+                        'pyreader is not recommendated when infer, please set use_pyreader to be false.'
+            py_reader = fluid.layers.py_reader(
+                capacity=20,
+                shapes=[[-1] + input_shape, [-1] + label_shape],
+                dtypes=['float32', 'int64'],
+                name='train_py_reader'
+                if self.is_training else 'test_py_reader',
+                use_double_buffer=True)
+            data, label = fluid.layers.read_file(py_reader)
+            self.py_reader = py_reader
+        else:
+            data = fluid.layers.data(
+                name='train_data' if self.is_training else 'test_data',
+                shape=input_shape,
+                dtype='float32')
+            if self.mode != 'infer':
+                label = fluid.layers.data(
+                    name='train_label' if self.is_training else 'test_label',
+                    shape=label_shape,
+                    dtype='int64')
+            else:
+                label = None
+        self.feature_input = [data]
+        self.label_input = label
+
+    def create_model_args(self):
+        return None
+
+    def build_model(self):
+        pred, loss = resnet_video.create_model(
+            data=self.feature_input[0],
+            label=self.label_input,
+            cfg=self.cfg,
+            is_training=self.is_training,
+            mode=self.mode)
+        if loss is not None:
+            loss = fluid.layers.mean(loss)
+        self.network_outputs = [pred]
+        self.loss_ = loss
+
+    def optimizer(self):
+        base_lr = self.get_config_from_sec('TRAIN', 'learning_rate')
+        lr_decay = self.get_config_from_sec('TRAIN', 'learning_rate_decay')
+        step_sizes = self.get_config_from_sec('TRAIN', 'step_sizes')
+        lr_bounds, lr_values = get_learning_rate_decay_list(base_lr, lr_decay,
+                                                            step_sizes)
+        learning_rate = fluid.layers.piecewise_decay(
+            boundaries=lr_bounds, values=lr_values)
+
+        momentum = self.get_config_from_sec('TRAIN', 'momentum')
+        use_nesterov = self.get_config_from_sec('TRAIN', 'nesterov')
+        l2_weight_decay = self.get_config_from_sec('TRAIN', 'weight_decay')
+        logger.info(
+            'Build up optimizer, \ntype: {}, \nmomentum: {}, \nnesterov: {}, \
+                                         \nregularization: L2 {}, \nlr_values: {}, lr_bounds: {}'
+            .format('Momentum', momentum, use_nesterov, l2_weight_decay,
+                    lr_values, lr_bounds))
+        optimizer = fluid.optimizer.Momentum(
+            learning_rate=learning_rate,
+            momentum=momentum,
+            use_nesterov=use_nesterov,
+            regularization=fluid.regularizer.L2Decay(l2_weight_decay))
+        return optimizer
+
+    def loss(self):
+        return self.loss_
+
+    def outputs(self):
+        return self.network_outputs
+
+    def feeds(self):
+        return self.feature_input if self.mode == 'infer' else \
+                     self.feature_input + [self.label_input]
+
+    def pretrain_info(self):
+        return None, None
+
+    def weights_info(self):
+        pass
+
+    def load_pretrain_params(self, exe, pretrain, prog, place):
+        load_params_from_file(exe, prog, pretrain, place)
+
+    def load_test_weights(self, exe, weights, prog, place):
+        super(NonLocal, self).load_test_weights(exe, weights, prog, place)
+        pred_w = fluid.global_scope().find_var('pred_w').get_tensor()
+        pred_array = np.array(pred_w)
+        pred_w_shape = pred_array.shape
+        if len(pred_w_shape) == 2:
+            logger.info('reshape for pred_w when test')
+            pred_array = np.transpose(pred_array, (1, 0))
+            pred_w_shape = pred_array.shape
+            pred_array = np.reshape(
+                pred_array, [pred_w_shape[0], pred_w_shape[1], 1, 1, 1])
+            pred_w.set(pred_array.astype('float32'), place)
+
+
+def get_learning_rate_decay_list(base_learning_rate, lr_decay, step_lists):
+    lr_bounds = []
+    lr_values = [base_learning_rate * 1]
+    cur_step = 0
+    for i in range(len(step_lists)):
+        cur_step += step_lists[i]
+        lr_bounds.append(cur_step)
+        decay_rate = lr_decay**(i + 1)
+        lr_values.append(base_learning_rate * decay_rate)
+
+    return lr_bounds, lr_values
--- a/PaddleCV/video/models/nonlocal_model/nonlocal_utils.py
+++ b/PaddleCV/video/models/nonlocal_model/nonlocal_utils.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import numpy as np
+import paddle.fluid as fluid
+import logging
+logger = logging.getLogger(__name__)
+
+
+def load_params_from_file(exe, prog, pretrained_file, place):
+    logger.info('load params from {}'.format(pretrained_file))
+    if os.path.isdir(pretrained_file):
+        param_list = prog.block(0).all_parameters()
+        param_name_list = [p.name for p in param_list]
+        param_shape = {}
+        for name in param_name_list:
+            param_tensor = fluid.global_scope().find_var(name).get_tensor()
+            param_shape[name] = np.array(param_tensor).shape
+
+        param_name_from_file = os.listdir(pretrained_file)
+        common_names = get_common_names(param_name_list, param_name_from_file)
+
+        logger.info('-------- loading params -----------')
+
+        # load params from file 
+        def is_parameter(var):
+            if isinstance(var, fluid.framework.Parameter):
+                return isinstance(var, fluid.framework.Parameter) and \
+                          os.path.exists(os.path.join(pretrained_file, var.name))
+
+        logger.info("Load pretrain weights from file {}".format(
+            pretrained_file))
+        vars = filter(is_parameter, prog.list_vars())
+        fluid.io.load_vars(exe, pretrained_file, vars=vars, main_program=prog)
+
+        # reset params if necessary
+        for name in common_names:
+            t = fluid.global_scope().find_var(name).get_tensor()
+            t_array = np.array(t)
+            origin_shape = param_shape[name]
+            if t_array.shape == origin_shape:
+                logger.info("load param {}".format(name))
+            elif (t_array.shape[:2] == origin_shape[:2]) and (
+                    t_array.shape[-2:] == origin_shape[-2:]):
+                num_inflate = origin_shape[2]
+                stack_t_array = np.stack(
+                    [t_array] * num_inflate, axis=2) / float(num_inflate)
+                assert origin_shape == stack_t_array.shape, "inflated shape should be the same with tensor {}".format(
+                    name)
+                t.set(stack_t_array.astype('float32'), place)
+                logger.info("load inflated({}) param {}".format(num_inflate,
+                                                                name))
+            else:
+                logger.info("Invalid case for name: {}".format(name))
+                raise
+        logger.info("finished loading params from resnet pretrained model")
+    else:
+        logger.info(
+            "pretrained file is not in a directory, not suitable to load params".
+            format(pretrained_file))
+        pass
+
+
+def get_common_names(param_name_list, param_name_from_file):
+    # name check and return common names both in param_name_list and file
+    common_names = []
+    paddle_only_names = []
+    file_only_names = []
+    logger.info('-------- comon params -----------')
+    for name in param_name_list:
+        if name in param_name_from_file:
+            common_names.append(name)
+            logger.info(name)
+        else:
+            paddle_only_names.append(name)
+    logger.info('-------- paddle only params ----------')
+    for name in paddle_only_names:
+        logger.info(name)
+    logger.info('-------- file only params -----------')
+    for name in param_name_from_file:
+        if name in param_name_list:
+            assert name in common_names
+        else:
+            file_only_names.append(name)
+            logger.info(name)
+    return common_names
--- a/PaddleCV/video/models/nonlocal_model/resnet_helper.py
+++ b/PaddleCV/video/models/nonlocal_model/resnet_helper.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import ParamAttr
+
+import numpy as np
+from . import nonlocal_helper
+
+
+def Conv3dAffine(blob_in,
+                 prefix,
+                 dim_in,
+                 dim_out,
+                 filter_size,
+                 stride,
+                 padding,
+                 cfg,
+                 group=1,
+                 test_mode=False,
+                 bn_init=None):
+    blob_out = fluid.layers.conv3d(
+        input=blob_in,
+        num_filters=dim_out,
+        filter_size=filter_size,
+        stride=stride,
+        padding=padding,
+        groups=group,
+        param_attr=ParamAttr(
+            name=prefix + "_weights", initializer=fluid.initializer.MSRA()),
+        bias_attr=False,
+        name=prefix + "_conv")
+    blob_out_shape = blob_out.shape
+
+    affine_name = "bn" + prefix[3:]
+
+    affine_scale = fluid.layers.create_parameter(
+        shape=[blob_out_shape[1]],
+        dtype=blob_out.dtype,
+        attr=ParamAttr(name=affine_name + '_scale'),
+        default_initializer=fluid.initializer.Constant(value=1.))
+    affine_bias = fluid.layers.create_parameter(
+        shape=[blob_out_shape[1]],
+        dtype=blob_out.dtype,
+        attr=ParamAttr(name=affine_name + '_offset'),
+        default_initializer=fluid.initializer.Constant(value=0.))
+    blob_out = fluid.layers.affine_channel(
+        blob_out, scale=affine_scale, bias=affine_bias, name=affine_name)
+
+    return blob_out
+
+
+def Conv3dBN(blob_in,
+             prefix,
+             dim_in,
+             dim_out,
+             filter_size,
+             stride,
+             padding,
+             cfg,
+             group=1,
+             test_mode=False,
+             bn_init=None):
+    blob_out = fluid.layers.conv3d(
+        input=blob_in,
+        num_filters=dim_out,
+        filter_size=filter_size,
+        stride=stride,
+        padding=padding,
+        groups=group,
+        param_attr=ParamAttr(
+            name=prefix + "_weights", initializer=fluid.initializer.MSRA()),
+        bias_attr=False,
+        name=prefix + "_conv")
+
+    bn_name = "bn" + prefix[3:]
+
+    blob_out = fluid.layers.batch_norm(
+        blob_out,
+        is_test=test_mode,
+        momentum=cfg.MODEL.bn_momentum,
+        epsilon=cfg.MODEL.bn_epsilon,
+        name=bn_name,
+        param_attr=ParamAttr(
+            name=bn_name + "_scale",
+            initializer=fluid.initializer.Constant(value=bn_init if
+                                                   (bn_init != None) else 1.),
+            regularizer=fluid.regularizer.L2Decay(cfg.TRAIN.weight_decay_bn)),
+        bias_attr=ParamAttr(
+            name=bn_name + "_offset",
+            regularizer=fluid.regularizer.L2Decay(cfg.TRAIN.weight_decay_bn)),
+        moving_mean_name=bn_name + "_mean",
+        moving_variance_name=bn_name + "_variance")
+    return blob_out
+
+
+# 3d bottleneck
+def bottleneck_transformation_3d(blob_in,
+                                 dim_in,
+                                 dim_out,
+                                 stride,
+                                 prefix,
+                                 dim_inner,
+                                 cfg,
+                                 group=1,
+                                 use_temp_conv=1,
+                                 temp_stride=1,
+                                 test_mode=False):
+    conv_op = Conv3dAffine if cfg.MODEL.use_affine else Conv3dBN
+
+    # 1x1 layer
+    blob_out = conv_op(
+        blob_in,
+        prefix + "_branch2a",
+        dim_in,
+        dim_inner, [1 + use_temp_conv * 2, 1, 1], [temp_stride, 1, 1],
+        [use_temp_conv, 0, 0],
+        cfg,
+        test_mode=test_mode)
+    blob_out = fluid.layers.relu(blob_out, name=prefix + "_branch2a" + "_relu")
+
+    # 3x3 layer
+    blob_out = conv_op(
+        blob_out,
+        prefix + '_branch2b',
+        dim_inner,
+        dim_inner, [1, 3, 3], [1, stride, stride], [0, 1, 1],
+        cfg,
+        group=group,
+        test_mode=test_mode)
+    blob_out = fluid.layers.relu(blob_out, name=prefix + "_branch2b" + "_relu")
+
+    # 1x1 layer, no relu
+    blob_out = conv_op(
+        blob_out,
+        prefix + '_branch2c',
+        dim_inner,
+        dim_out, [1, 1, 1], [1, 1, 1], [0, 0, 0],
+        cfg,
+        test_mode=test_mode,
+        bn_init=cfg.MODEL.bn_init_gamma)
+
+    return blob_out
+
+
+def _add_shortcut_3d(blob_in,
+                     prefix,
+                     dim_in,
+                     dim_out,
+                     stride,
+                     cfg,
+                     temp_stride=1,
+                     test_mode=False):
+    if ((dim_in == dim_out) and (temp_stride == 1) and (stride == 1)):
+        # identity mapping (do nothing)
+        return blob_in
+    else:
+        # when dim changes
+        conv_op = Conv3dAffine if cfg.MODEL.use_affine else Conv3dBN
+        blob_out = conv_op(
+            blob_in,
+            prefix,
+            dim_in,
+            dim_out, [1, 1, 1], [temp_stride, stride, stride], [0, 0, 0],
+            cfg,
+            test_mode=test_mode)
+
+        return blob_out
+
+
+# residual block abstraction
+def _generic_residual_block_3d(blob_in,
+                               dim_in,
+                               dim_out,
+                               stride,
+                               prefix,
+                               dim_inner,
+                               cfg,
+                               group=1,
+                               use_temp_conv=0,
+                               temp_stride=1,
+                               trans_func=None,
+                               test_mode=False):
+    # transformation branch (e.g. 1x1-3x3-1x1, or 3x3-3x3), namely "F(x)"
+    if trans_func is None:
+        trans_func = globals()[cfg.RESNETS.trans_func]
+
+    tr_blob = trans_func(
+        blob_in,
+        dim_in,
+        dim_out,
+        stride,
+        prefix,
+        dim_inner,
+        cfg,
+        group=group,
+        use_temp_conv=use_temp_conv,
+        temp_stride=temp_stride,
+        test_mode=test_mode)
+
+    # create short cut, namely, "x"
+    sc_blob = _add_shortcut_3d(
+        blob_in,
+        prefix + "_branch1",
+        dim_in,
+        dim_out,
+        stride,
+        cfg,
+        temp_stride=temp_stride,
+        test_mode=test_mode)
+
+    # addition, namely, "x + F(x)", and relu
+    sum_blob = fluid.layers.elementwise_add(
+        tr_blob, sc_blob, act='relu', name=prefix + '_sum')
+
+    return sum_blob
+
+
+def res_stage_nonlocal(block_fn,
+                       blob_in,
+                       dim_in,
+                       dim_out,
+                       stride,
+                       num_blocks,
+                       prefix,
+                       cfg,
+                       dim_inner=None,
+                       group=None,
+                       use_temp_convs=None,
+                       temp_strides=None,
+                       batch_size=None,
+                       nonlocal_name=None,
+                       nonlocal_mod=1000,
+                       test_mode=False):
+    # prefix is something like: res2, res3, etc.
+    # each res layer has num_blocks stacked.
+
+    # check dtype and format of use_temp_convs and temp_strides
+    if use_temp_convs is None:
+        use_temp_convs = np.zeros(num_blocks).astype(int)
+    if temp_strides is None:
+        temp_strides = np.ones(num_blocks).astype(int)
+
+    if len(use_temp_convs) < num_blocks:
+        for _ in range(num_blocks - len(use_temp_convs)):
+            use_temp_convs.append(0)
+            temp_strides.append(1)
+
+    for idx in range(num_blocks):
+        block_prefix = '{}{}'.format(prefix, chr(idx + 97))
+        block_stride = 2 if ((idx == 0) and (stride == 2)) else 1
+        blob_in = _generic_residual_block_3d(
+            blob_in,
+            dim_in,
+            dim_out,
+            block_stride,
+            block_prefix,
+            dim_inner,
+            cfg,
+            group=group,
+            use_temp_conv=use_temp_convs[idx],
+            temp_stride=temp_strides[idx],
+            test_mode=test_mode)
+        dim_in = dim_out
+
+        if idx % nonlocal_mod == nonlocal_mod - 1:
+            blob_in = nonlocal_helper.add_nonlocal(
+                blob_in,
+                dim_in,
+                dim_in,
+                batch_size,
+                nonlocal_name + '_{}'.format(idx),
+                int(dim_in / 2),
+                cfg,
+                test_mode=test_mode)
+
+    return blob_in, dim_in
+
+
+def res_stage_nonlocal_group(block_fn,
+                             blob_in,
+                             dim_in,
+                             dim_out,
+                             stride,
+                             num_blocks,
+                             prefix,
+                             cfg,
+                             dim_inner=None,
+                             group=None,
+                             use_temp_convs=None,
+                             temp_strides=None,
+                             batch_size=None,
+                             pool_stride=None,
+                             spatial_dim=None,
+                             group_size=None,
+                             nonlocal_name=None,
+                             nonlocal_mod=1000,
+                             test_mode=False):
+    # prefix is something like res2, res3, etc.
+    # each res layer has num_blocks stacked
+
+    # check dtype and format of use_temp_convs and temp_strides
+    if use_temp_convs is None:
+        use_temp_convs = np.zeros(num_blocks).astype(int)
+    if temp_strides is None:
+        temp_strides = np.ones(num_blocks).astype(int)
+
+    for idx in range(num_blocks):
+        block_prefix = "{}{}".format(prefix, chr(idx + 97))
+        block_stride = 2 if (idx == 0 and stride == 2) else 1
+        blob_in = _generic_residual_block_3d(
+            blob_in,
+            dim_in,
+            dim_out,
+            block_stride,
+            block_prefix,
+            dim_inner,
+            cfg,
+            group=group,
+            use_temp_conv=use_temp_convs[idx],
+            temp_stride=temp_strides[idx],
+            test_mode=test_mode)
+        dim_in = dim_out
+
+        if idx % nonlocal_mod == nonlocal_mod - 1:
+            blob_in = nonlocal_helper.add_nonlocal_group(
+                blob_in,
+                dim_in,
+                dim_in,
+                batch_size,
+                pool_stride,
+                spatial_dim,
+                spatial_dim,
+                group_size,
+                nonlocal_name + "_{}".format(idx),
+                int(dim_in / 2),
+                cfg,
+                test_mode=test_mode)
+
+    return blob_in, dim_in
--- a/PaddleCV/video/models/nonlocal_model/resnet_video.py
+++ b/PaddleCV/video/models/nonlocal_model/resnet_video.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+
+from . import resnet_helper
+import logging
+
+logger = logging.getLogger(__name__)
+
+# For more depths, add the block config here
+BLOCK_CONFIG = {
+    50: (3, 4, 6, 3),
+    101: (3, 4, 23, 3),
+}
+
+
+# ------------------------------------------------------------------------
+# obtain_arc defines the temporal kernel radius and temporal strides for
+# each layers residual blocks in a resnet.
+# e.g. use_temp_convs = 1 means a temporal kernel of 3 is used.
+# In ResNet50, it has (3, 4, 6, 3) blocks in conv2, 3, 4, 5, 
+# so the lengths of the corresponding lists are (3, 4, 6, 3).
+# ------------------------------------------------------------------------
+def obtain_arc(arc_type, video_length):
+
+    pool_stride = 1
+
+    # c2d, ResNet50
+    if arc_type == 1:
+        use_temp_convs_1 = [0]
+        temp_strides_1 = [1]
+        use_temp_convs_2 = [0, 0, 0]
+        temp_strides_2 = [1, 1, 1]
+        use_temp_convs_3 = [0, 0, 0, 0]
+        temp_strides_3 = [1, 1, 1, 1]
+        use_temp_convs_4 = [0, ] * 6
+        temp_strides_4 = [1, ] * 6
+        use_temp_convs_5 = [0, 0, 0]
+        temp_strides_5 = [1, 1, 1]
+
+        pool_stride = int(video_length / 2)
+
+    # i3d, ResNet50
+    if arc_type == 2:
+        use_temp_convs_1 = [2]
+        temp_strides_1 = [1]
+        use_temp_convs_2 = [1, 1, 1]
+        temp_strides_2 = [1, 1, 1]
+        use_temp_convs_3 = [1, 0, 1, 0]
+        temp_strides_3 = [1, 1, 1, 1]
+        use_temp_convs_4 = [1, 0, 1, 0, 1, 0]
+        temp_strides_4 = [1, 1, 1, 1, 1, 1]
+        use_temp_convs_5 = [0, 1, 0]
+        temp_strides_5 = [1, 1, 1]
+
+        pool_stride = int(video_length / 2)
+
+    # c2d, ResNet101
+    if arc_type == 3:
+        use_temp_convs_1 = [0]
+        temp_strides_1 = [1]
+        use_temp_convs_2 = [0, 0, 0]
+        temp_strides_2 = [1, 1, 1]
+        use_temp_convs_3 = [0, 0, 0, 0]
+        temp_strides_3 = [1, 1, 1, 1]
+        use_temp_convs_4 = [0, ] * 23
+        temp_strides_4 = [1, ] * 23
+        use_temp_convs_5 = [0, 0, 0]
+        temp_strides_5 = [1, 1, 1]
+
+        pool_stride = int(video_length / 2)
+
+    # i3d, ResNet101
+    if arc_type == 4:
+        use_temp_convs_1 = [2]
+        temp_strides_1 = [1]
+        use_temp_convs_2 = [1, 1, 1]
+        temp_strides_2 = [1, 1, 1]
+        use_temp_convs_3 = [1, 0, 1, 0]
+        temp_strides_3 = [1, 1, 1, 1]
+        use_temp_convs_4 = []
+        for i in range(23):
+            if i % 2 == 0:
+                use_temp_convs_4.append(1)
+            else:
+                use_temp_convs_4.append(0)
+
+        temp_strides_4 = [1] * 23
+        use_temp_convs_5 = [0, 1, 0]
+        temp_strides_5 = [1, 1, 1]
+
+        pool_stride = int(video_length / 2)
+
+    use_temp_convs_set = [
+        use_temp_convs_1, use_temp_convs_2, use_temp_convs_3, use_temp_convs_4,
+        use_temp_convs_5
+    ]
+    temp_strides_set = [
+        temp_strides_1, temp_strides_2, temp_strides_3, temp_strides_4,
+        temp_strides_5
+    ]
+
+    return use_temp_convs_set, temp_strides_set, pool_stride
+
+
+def create_model(data, label, cfg, is_training=True, mode='train'):
+    group = cfg.RESNETS.num_groups
+    width_per_group = cfg.RESNETS.width_per_group
+    batch_size = int(cfg.TRAIN.batch_size / cfg.TRAIN.num_gpus)
+
+    logger.info('--------------- ResNet-{} {}x{}d-{}, {} ---------------'.
+                format(cfg.MODEL.depth, group, width_per_group,
+                       cfg.RESNETS.trans_func, cfg.MODEL.dataset))
+
+    assert cfg.MODEL.depth in BLOCK_CONFIG.keys(), \
+        "Block config is not defined for specified model depth."
+    (n1, n2, n3, n4) = BLOCK_CONFIG[cfg.MODEL.depth]
+
+    res_block = resnet_helper._generic_residual_block_3d
+    dim_inner = group * width_per_group
+
+    use_temp_convs_set, temp_strides_set, pool_stride = obtain_arc(
+        cfg.MODEL.video_arc_choice, cfg[mode.upper()]['video_length'])
+    logger.info(use_temp_convs_set)
+    logger.info(temp_strides_set)
+    conv_blob = fluid.layers.conv3d(
+        input=data,
+        num_filters=64,
+        filter_size=[1 + use_temp_convs_set[0][0] * 2, 7, 7],
+        stride=[temp_strides_set[0][0], 2, 2],
+        padding=[use_temp_convs_set[0][0], 3, 3],
+        param_attr=ParamAttr(
+            name='conv1' + "_weights", initializer=fluid.initializer.MSRA()),
+        bias_attr=False,
+        name='conv1')
+
+    test_mode = False if (mode == 'train') else True
+    if cfg.MODEL.use_affine is False:
+        # use bn
+        bn_name = 'bn_conv1'
+        bn_blob = fluid.layers.batch_norm(
+            conv_blob,
+            is_test=test_mode,
+            momentum=cfg.MODEL.bn_momentum,
+            epsilon=cfg.MODEL.bn_epsilon,
+            name=bn_name,
+            param_attr=ParamAttr(
+                name=bn_name + "_scale",
+                regularizer=fluid.regularizer.L2Decay(
+                    cfg.TRAIN.weight_decay_bn)),
+            bias_attr=ParamAttr(
+                name=bn_name + "_offset",
+                regularizer=fluid.regularizer.L2Decay(
+                    cfg.TRAIN.weight_decay_bn)),
+            moving_mean_name=bn_name + "_mean",
+            moving_variance_name=bn_name + "_variance")
+    else:
+        # use affine
+        affine_name = 'bn_conv1'
+        conv_blob_shape = conv_blob.shape
+        affine_scale = fluid.layers.create_parameter(
+            shape=[conv_blob_shape[1]],
+            dtype=conv_blob.dtype,
+            attr=ParamAttr(name=affine_name + '_scale'),
+            default_initializer=fluid.initializer.Constant(value=1.))
+        affine_bias = fluid.layers.create_parameter(
+            shape=[conv_blob_shape[1]],
+            dtype=conv_blob.dtype,
+            attr=ParamAttr(name=affine_name + '_offset'),
+            default_initializer=fluid.initializer.Constant(value=0.))
+        bn_blob = fluid.layers.affine_channel(
+            conv_blob, scale=affine_scale, bias=affine_bias, name=affine_name)
+
+    # relu
+    relu_blob = fluid.layers.relu(bn_blob, name='res_conv1_bn_relu')
+    # max pool
+    max_pool = fluid.layers.pool3d(
+        input=relu_blob,
+        pool_size=[1, 3, 3],
+        pool_type='max',
+        pool_stride=[1, 2, 2],
+        pool_padding=[0, 0, 0],
+        name='pool1')
+
+    # building res block
+    if cfg.MODEL.depth in [50, 101]:
+        blob_in, dim_in = resnet_helper.res_stage_nonlocal(
+            res_block,
+            max_pool,
+            64,
+            256,
+            stride=1,
+            num_blocks=n1,
+            prefix='res2',
+            cfg=cfg,
+            dim_inner=dim_inner,
+            group=group,
+            use_temp_convs=use_temp_convs_set[1],
+            temp_strides=temp_strides_set[1],
+            test_mode=test_mode)
+
+        layer_mod = cfg.NONLOCAL.layer_mod
+        if cfg.MODEL.depth == 101:
+            layer_mod = 2
+        if cfg.NONLOCAL.conv3_nonlocal is False:
+            layer_mod = 1000
+
+        blob_in = fluid.layers.pool3d(
+            blob_in,
+            pool_size=[2, 1, 1],
+            pool_type='max',
+            pool_stride=[2, 1, 1],
+            pool_padding=[0, 0, 0],
+            name='pool2')
+
+        if cfg.MODEL.use_affine is False:
+            blob_in, dim_in = resnet_helper.res_stage_nonlocal(
+                res_block,
+                blob_in,
+                dim_in,
+                512,
+                stride=2,
+                num_blocks=n2,
+                prefix='res3',
+                cfg=cfg,
+                dim_inner=dim_inner * 2,
+                group=group,
+                use_temp_convs=use_temp_convs_set[2],
+                temp_strides=temp_strides_set[2],
+                batch_size=batch_size,
+                nonlocal_name="nonlocal_conv3",
+                nonlocal_mod=layer_mod,
+                test_mode=test_mode)
+        else:
+            crop_size = cfg[mode.upper()]['crop_size']
+            blob_in, dim_in = resnet_helper.res_stage_nonlocal_group(
+                res_block,
+                blob_in,
+                dim_in,
+                512,
+                stride=2,
+                num_blocks=n2,
+                prefix='res3',
+                cfg=cfg,
+                dim_inner=dim_inner * 2,
+                group=group,
+                use_temp_convs=use_temp_convs_set[2],
+                temp_strides=temp_strides_set[2],
+                batch_size=batch_size,
+                pool_stride=pool_stride,
+                spatial_dim=int(crop_size / 8),
+                group_size=4,
+                nonlocal_name="nonlocal_conv3_group",
+                nonlocal_mod=layer_mod,
+                test_mode=test_mode)
+
+        layer_mod = cfg.NONLOCAL.layer_mod
+        if cfg.MODEL.depth == 101:
+            layer_mod = layer_mod * 4 - 1
+        if cfg.NONLOCAL.conv4_nonlocal is False:
+            layer_mod = 1000
+
+        blob_in, dim_in = resnet_helper.res_stage_nonlocal(
+            res_block,
+            blob_in,
+            dim_in,
+            1024,
+            stride=2,
+            num_blocks=n3,
+            prefix='res4',
+            cfg=cfg,
+            dim_inner=dim_inner * 4,
+            group=group,
+            use_temp_convs=use_temp_convs_set[3],
+            temp_strides=temp_strides_set[3],
+            batch_size=batch_size,
+            nonlocal_name="nonlocal_conv4",
+            nonlocal_mod=layer_mod,
+            test_mode=test_mode)
+
+        blob_in, dim_in = resnet_helper.res_stage_nonlocal(
+            res_block,
+            blob_in,
+            dim_in,
+            2048,
+            stride=2,
+            num_blocks=n4,
+            prefix='res5',
+            cfg=cfg,
+            dim_inner=dim_inner * 8,
+            group=group,
+            use_temp_convs=use_temp_convs_set[4],
+            temp_strides=temp_strides_set[4],
+            test_mode=test_mode)
+
+    else:
+        raise Exception("Unsupported network settings.")
+
+    blob_out = fluid.layers.pool3d(
+        blob_in,
+        pool_size=[pool_stride, 7, 7],
+        pool_type='avg',
+        pool_stride=[1, 1, 1],
+        pool_padding=[0, 0, 0],
+        name='pool5')
+
+    if (cfg.TRAIN.dropout_rate > 0) and (test_mode is False):
+        blob_out = fluid.layers.dropout(
+            blob_out, cfg.TRAIN.dropout_rate, is_test=test_mode)
+
+    if mode in ['train', 'valid']:
+        blob_out = fluid.layers.fc(
+            blob_out,
+            cfg.MODEL.num_classes,
+            param_attr=ParamAttr(
+                name='pred' + "_w",
+                initializer=fluid.initializer.Normal(
+                    loc=0.0, scale=cfg.MODEL.fc_init_std)),
+            bias_attr=ParamAttr(
+                name='pred' + "_b",
+                initializer=fluid.initializer.Constant(value=0.)),
+            name='pred')
+    elif mode in ['test', 'infer']:
+        blob_out = fluid.layers.conv3d(
+            input=blob_out,
+            num_filters=cfg.MODEL.num_classes,
+            filter_size=[1, 1, 1],
+            stride=[1, 1, 1],
+            padding=[0, 0, 0],
+            param_attr=ParamAttr(
+                name='pred' + "_w", initializer=fluid.initializer.MSRA()),
+            bias_attr=ParamAttr(
+                name='pred' + "_b",
+                initializer=fluid.initializer.Constant(value=0.)),
+            name='pred')
+
+    if (mode == 'train') or (mode == 'valid'):
+        softmax = fluid.layers.softmax(blob_out)
+        loss = fluid.layers.cross_entropy(
+            softmax, label, soft_label=False, ignore_index=-100)
+
+    elif (mode == 'test') or (mode == 'infer'):
+        # fully convolutional testing, when loading test model, 
+        # params should be copied from train_prog fc layer named pred
+        blob_out = fluid.layers.transpose(
+            blob_out, [0, 2, 3, 4, 1], name='pred_tr')
+        blob_out = fluid.layers.softmax(blob_out, name='softmax_conv')
+        softmax = fluid.layers.reduce_mean(
+            blob_out, dim=[1, 2, 3], keep_dim=False, name='softmax')
+        loss = None
+    else:
+        raise 'Not implemented Error'
+
+    return softmax, loss
--- a/PaddleCV/video/models/stnet/README.md
+++ b/PaddleCV/video/models/stnet/README.md
@@ -30,11 +30,11 @@ StNet的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集

 数据准备完毕后，可以通过如下两种方式启动训练：

-    python train.py --model-name=STNET
+    python train.py --model_name=STNET
            --config=./configs/stnet.txt
-            --save-dir=checkpoints 
-            --log-interval=10 
-            --valid-interval=1
+            --save_dir=checkpoints
+            --log_interval=10
+            --valid_interval=1

    bash scripts/train/train_stnet.sh

@@ -62,9 +62,9 @@ StNet的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集

 可通过如下两种方式进行模型评估:

-    python test.py --model-name=STNET
+    python test.py --model_name=STNET
            --config=configs/stnet.txt
-            --log-interval=1 
+            --log_interval=1
            --weights=$PATH_TO_WEIGHTS

    bash scripts/test/test__stnet.sh
@@ -92,10 +92,10 @@ StNet的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集

 可通过如下命令进行模型推断：

-    python infer.py --model-name=stnet
+    python infer.py --model_name=stnet
            --config=configs/stnet.txt
-            --log-interval=1 
-            --weights=$PATH_TO_WEIGHTS 
+            --log_interval=1
+            --weights=$PATH_TO_WEIGHTS
            --filelist=$FILELIST

 - 模型推断结果存储于`STNET_infer_result`中，通过`pickle`格式存储。

--- a/PaddleCV/video/models/stnet/stnet.py
+++ b/PaddleCV/video/models/stnet/stnet.py
@@ -46,6 +46,7 @@ class STNET(ModelBase):
                                                        'l2_weight_decay')
        self.momentum = self.get_config_from_sec('train', 'momentum')

+        self.seg_num = self.get_config_from_sec(self.mode, 'seg_num', self.seg_num)
        self.target_size = self.get_config_from_sec(self.mode, 'target_size')
        self.batch_size = self.get_config_from_sec(self.mode, 'batch_size')


--- a/PaddleCV/video/models/tsm/README.md
+++ b/PaddleCV/video/models/tsm/README.md
+# TSM 视频分类模型
+
+---
+## 内容
+
+- [模型简介](#模型简介)
+- [数据准备](#数据准备)
+- [模型训练](#模型训练)
+- [模型评估](#模型评估)
+- [模型推断](#模型推断)
+- [参考论文](#参考论文)
+
+
+## 模型简介
+
+Temporal Shift Module是由MIT和IBM Watson AI Lab的Ji Lin，Chuang Gan等人提出的通过时间位移来提高网络视频理解能力的模块，其位移操作原理如下图所示。
+
+<p align="center">
+<img src="../../images/temporal_shift.png" height=250 width=800 hspace='10'/> <br />
+Temporal shift module
+</p>
+
+上图中矩阵表示特征图中的temporal和channel维度，通过将一部分的channel在temporal维度上向前位移一步，一部分的channel在temporal维度上向后位移一步，位移后的空缺补零。通过这种方式在特征图中引入temporal维度上的上下文交互，提高在时间维度上的视频理解能力。
+
+TSM模型是将Temporal Shift Module插入到ResNet网络中构建的视频分类模型，本模型库实现版本为以ResNet-50作为主干网络的TSM模型。
+
+详细内容请参考论文[Temporal Shift Module for Efficient Video Understanding](https://arxiv.org/abs/1811.08383v1)
+
+## 数据准备
+
+TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。数据下载及准备请参考[数据说明](../../dataset/README.md)
+
+## 模型训练
+
+数据准备完毕后，可以通过如下两种方式启动训练：
+
+    export FLAGS_fast_eager_deletion_mode=1
+    export FLAGS_eager_delete_tensor_gb=0.0
+    export FLAGS_fraction_of_gpu_memory_to_use=0.98
+    python train.py --model_name=TSM
+            --config=./configs/tsm.txt
+            --save_dir=checkpoints
+            --log_interval=10
+            --valid_interval=1
+
+    bash scripts/train/train_tsm.sh
+
+- 可下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/tsm_kinetics.tar.gz)通过`--resume`指定权重存放路径进行finetune等开发
+
+**数据读取器说明：** 模型读取Kinetics-400数据集中的`mp4`数据，每条数据抽取`seg_num`段，每段抽取1帧图像，对每帧图像做随机增强后，缩放至`target_size`。
+
+**训练策略：**
+
+*  采用Momentum优化算法训练，momentum=0.9
+*  权重衰减系数为1e-4
+
+## 模型评估
+
+可通过如下两种方式进行模型评估:
+
+    python test.py --model_name=TSM
+            --config=configs/tsm.txt
+            --log_interval=1
+            --weights=$PATH_TO_WEIGHTS
+
+    bash scripts/test/test_tsm.sh
+
+- 使用`scripts/test/test_tsm.sh`进行评估时，需要修改脚本中的`--weights`参数指定需要评估的权重。
+
+- 若未指定`--weights`参数，脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/tsm_kinetics.tar.gz)进行评估
+
+当取如下参数时，在Kinetics400的validation数据集下评估精度如下:
+
+| seg\_num | target\_size | Top-1 |
+| :------: | :----------: | :----: |
+| 8 | 224 | 0.70 |
+
+## 模型推断
+
+可通过如下命令进行模型推断：
+
+    python infer.py --model_name=TSM
+            --config=configs/tsm.txt
+            --log_interval=1
+            --weights=$PATH_TO_WEIGHTS
+            --filelist=$FILELIST
+
+- 模型推断结果存储于`TSM_infer_result`中，通过`pickle`格式存储。
+
+- 若未指定`--weights`参数，脚本会下载已发布模型[model](https://paddlemodels.bj.bcebos.com/video_classification/tsm_kinetics.tar.gz)进行推断
+
+## 参考论文
+
+- [Temporal Shift Module for Efficient Video Understanding](https://arxiv.org/abs/1811.08383v1), Ji Lin, Chuang Gan, Song Han
--- a/PaddleCV/video/models/tsm/__init__.py
+++ b/PaddleCV/video/models/tsm/__init__.py
+from .tsm import *
--- a/PaddleCV/video/models/tsm/tsm.py
+++ b/PaddleCV/video/models/tsm/tsm.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import paddle.fluid as fluid
+from paddle.fluid import ParamAttr
+
+from ..model import ModelBase
+from .tsm_res_model import TSM_ResNet
+
+import logging
+logger = logging.getLogger(__name__)
+
+__all__ = ["TSM"]
+
+
+class TSM(ModelBase):
+    def __init__(self, name, cfg, mode='train'):
+        super(TSM, self).__init__(name, cfg, mode=mode)
+        self.get_config()
+
+    def get_config(self):
+        self.num_classes = self.get_config_from_sec('model', 'num_classes')
+        self.seg_num = self.get_config_from_sec('model', 'seg_num')
+        self.seglen = self.get_config_from_sec('model', 'seglen')
+        self.image_mean = self.get_config_from_sec('model', 'image_mean')
+        self.image_std = self.get_config_from_sec('model', 'image_std')
+        self.num_layers = self.get_config_from_sec('model', 'num_layers')
+
+        self.num_epochs = self.get_config_from_sec('train', 'epoch')
+        self.total_videos = self.get_config_from_sec('train', 'total_videos')
+        self.base_learning_rate = self.get_config_from_sec('train',
+                                                           'learning_rate')
+        self.learning_rate_decay = self.get_config_from_sec(
+            'train', 'learning_rate_decay')
+        self.decay_epochs = self.get_config_from_sec('train', 'decay_epochs')
+        self.l2_weight_decay = self.get_config_from_sec('train',
+                                                        'l2_weight_decay')
+        self.momentum = self.get_config_from_sec('train', 'momentum')
+
+        self.target_size = self.get_config_from_sec(self.mode, 'target_size')
+        self.batch_size = self.get_config_from_sec(self.mode, 'batch_size')
+
+    def build_input(self, use_pyreader=True):
+        image_shape = [3, self.target_size, self.target_size]
+        image_shape[0] = image_shape[0] * self.seglen
+        image_shape = [self.seg_num] + image_shape
+        self.use_pyreader = use_pyreader
+        if use_pyreader:
+            assert self.mode != 'infer', \
+                        'pyreader is not recommendated when infer, please set use_pyreader to be false.'
+            py_reader = fluid.layers.py_reader(
+                capacity=100,
+                shapes=[[-1] + image_shape, [-1] + [1]],
+                dtypes=['float32', 'int64'],
+                name='train_py_reader'
+                if self.is_training else 'test_py_reader',
+                use_double_buffer=True)
+            image, label = fluid.layers.read_file(py_reader)
+            self.py_reader = py_reader
+        else:
+            image = fluid.layers.data(
+                name='image', shape=image_shape, dtype='float32')
+            if self.mode != 'infer':
+                label = fluid.layers.data(
+                    name='label', shape=[1], dtype='int64')
+            else:
+                label = None
+        self.feature_input = [image]
+        self.label_input = label
+
+    def build_model(self):
+        videomodel = TSM_ResNet(
+            layers=self.num_layers,
+            seg_num=self.seg_num,
+            is_training=self.is_training)
+        out = videomodel.net(input=self.feature_input[0],
+                             class_dim=self.num_classes)
+        self.network_outputs = [out]
+
+    def optimizer(self):
+        assert self.mode == 'train', "optimizer only can be get in train mode"
+        total_videos = self.total_videos
+        step = int(total_videos / self.batch_size + 1)
+        bd = [e * step for e in self.decay_epochs]
+        base_lr = self.base_learning_rate
+        lr_decay = self.learning_rate_decay
+        lr = [base_lr, base_lr * lr_decay, base_lr * lr_decay * lr_decay]
+        l2_weight_decay = self.l2_weight_decay
+        momentum = self.momentum
+        optimizer = fluid.optimizer.Momentum(
+            learning_rate=fluid.layers.piecewise_decay(
+                boundaries=bd, values=lr),
+            momentum=momentum,
+            regularization=fluid.regularizer.L2Decay(l2_weight_decay))
+
+        return optimizer
+
+    def loss(self):
+        assert self.mode != 'infer', "invalid loss calculationg in infer mode"
+        cost = fluid.layers.cross_entropy(input=self.network_outputs[0], \
+                           label=self.label_input, ignore_index=-1)
+        self.loss_ = fluid.layers.mean(x=cost)
+        return self.loss_
+
+    def outputs(self):
+        return self.network_outputs
+
+    def feeds(self):
+        return self.feature_input if self.mode == 'infer' else self.feature_input + [
+            self.label_input
+        ]
+
+    def pretrain_info(self):
+        return ('ResNet50_pretrained', 'https://paddlemodels.bj.bcebos.com/video_classification/ResNet50_pretrained.tar.gz')
+
+    def weights_info(self):
+        return ('tsm_kinetics', 
+                'https://paddlemodels.bj.bcebos.com/video_classification/tsm_kinetics.tar.gz')
+
+    def load_pretrain_params(self, exe, pretrain, prog, place):
+        def is_parameter(var):
+            return isinstance(var, fluid.framework.Parameter) and (not ("fc_0" in var.name))
+
+        logger.info("Load pretrain weights from {}, exclude fc layer.".format(pretrain))
+        vars = filter(is_parameter, prog.list_vars())
+        fluid.io.load_vars(exe, pretrain, vars=vars, main_program=prog)
+
--- a/PaddleCV/video/models/tsm/tsm_res_model.py
+++ b/PaddleCV/video/models/tsm/tsm_res_model.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import time
+import sys
+import paddle.fluid as fluid
+import math
+
+
+class TSM_ResNet():
+    def __init__(self, layers=50, seg_num=8, is_training=False):
+        self.layers = layers
+        self.seg_num = seg_num
+        self.is_training = is_training
+
+    def shift_module(self, input):
+        output = fluid.layers.temporal_shift(input, self.seg_num, 1.0 / 8)
+        return output
+
+    def conv_bn_layer(self,
+                      input,
+                      num_filters,
+                      filter_size,
+                      stride=1,
+                      groups=1,
+                      act=None,
+                      name=None):
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            act=None,
+            param_attr=fluid.param_attr.ParamAttr(name=name+"_weights"),
+            bias_attr=False)
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+
+        return fluid.layers.batch_norm(input=conv, act=act,
+                                       is_test=(not self.is_training),
+                                       param_attr=fluid.param_attr.ParamAttr(name=bn_name+"_scale"),
+                                       bias_attr=fluid.param_attr.ParamAttr(bn_name+'_offset'),
+                                       moving_mean_name=bn_name+"_mean",
+                                       moving_variance_name=bn_name+'_variance')
+
+    def shortcut(self, input, ch_out, stride, name):
+        ch_in = input.shape[1]
+        if ch_in != ch_out or stride != 1:
+            return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+        else:
+            return input
+
+    def bottleneck_block(self, input, num_filters, stride, name):
+        shifted = self.shift_module(input)
+
+        conv0 = self.conv_bn_layer(
+            input=shifted, num_filters=num_filters, filter_size=1, act='relu',
+            name=name+"_branch2a")
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=num_filters,
+            filter_size=3,
+            stride=stride,
+            act='relu', name=name+"_branch2b")
+        conv2 = self.conv_bn_layer(
+            input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c")
+
+        short = self.shortcut(input, num_filters * 4, stride, name=name+"_branch1")
+
+        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
+
+    def net(self, input, class_dim=101):
+        layers = self.layers
+        seg_num = self.seg_num
+        supported_layers = [50, 101, 152]
+        if layers not in supported_layers:
+            print("supported layers are", supported_layers, \
+                  "but input layer is ", layers)
+            exit()
+
+        # reshape input
+        channels = input.shape[2]
+        short_size = input.shape[3]
+        input = fluid.layers.reshape(
+            x=input, shape=[-1, channels, short_size, short_size])
+
+        if layers == 50:
+            depth = [3, 4, 6, 3]
+        elif layers == 101:
+            depth = [3, 4, 23, 3]
+        elif layers == 152:
+            depth = [3, 8, 36, 3]
+        num_filters = [64, 128, 256, 512]
+
+        conv = self.conv_bn_layer(
+            input=input, num_filters=64, filter_size=7, stride=2, act='relu', name='conv1')
+        conv = fluid.layers.pool2d(
+            input=conv,
+            pool_size=3,
+            pool_stride=2,
+            pool_padding=1,
+            pool_type='max')
+
+        for block in range(len(depth)):
+            for i in range(depth[block]):
+                if layers in [101, 152] and block == 2:
+                    if i == 0:
+                        conv_name = "res" + str(block+2) + "a"
+                    else:
+                        conv_name = "res" + str(block+2) + "b" + str(i)
+                else:
+                    conv_name = "res" + str(block+2) + chr(97+i)
+
+                conv = self.bottleneck_block(
+                    input=conv,
+                    num_filters=num_filters[block],
+                    stride=2 if i == 0 and block != 0 else 1,
+                    name=conv_name)
+
+        pool = fluid.layers.pool2d(
+            input=conv, pool_size=7, pool_type='avg', global_pooling=True)
+
+        dropout = fluid.layers.dropout(x=pool, dropout_prob=0.5, is_test=(not self.is_training))
+
+        feature = fluid.layers.reshape(
+            x=dropout, shape=[-1, seg_num, pool.shape[1]])
+        out = fluid.layers.reduce_mean(feature, dim=1)
+
+        stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
+        out = fluid.layers.fc(input=out,
+                              size=class_dim,
+                              act='softmax',
+                              param_attr=fluid.param_attr.ParamAttr(
+                                  initializer=fluid.initializer.Uniform(-stdv,
+                                                                        stdv)),
+                              bias_attr=fluid.param_attr.ParamAttr(learning_rate=2.0,
+                                  regularizer=fluid.regularizer.L2Decay(0.)))
+        return out
--- a/PaddleCV/video/models/tsn/README.md
+++ b/PaddleCV/video/models/tsn/README.md
@@ -25,11 +25,11 @@ TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。

 数据准备完毕后，可以通过如下两种方式启动训练：

-    python train.py --model-name=TSN
+    python train.py --model_name=TSN
            --config=./configs/tsn.txt
-            --save-dir=checkpoints 
-            --log-interval=10 
-            --valid-interval=1
+            --save_dir=checkpoints
+            --log_interval=10
+            --valid_interval=1

    bash scripts/train/train_tsn.sh

@@ -47,9 +47,9 @@ TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。

 可通过如下两种方式进行模型评估:

-    python test.py --model-name=TSN
+    python test.py --model_name=TSN
            --config=configs/tsn.txt
-            --log-interval=1
+            --log_interval=1
            --weights=$PATH_TO_WEIGHTS

    bash scripts/test/test_tsn.sh
@@ -69,10 +69,10 @@ TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。

 可通过如下命令进行模型推断：

-    python infer.py --model-name=TSN
+    python infer.py --model_name=TSN
            --config=configs/tsn.txt
-            --log-interval=1 
-            --weights=$PATH_TO_WEIGHTS 
+            --log_interval=1
+            --weights=$PATH_TO_WEIGHTS
            --filelist=$FILELIST

 - 模型推断结果存储于`TSN_infer_result`中，通过`pickle`格式存储。

--- a/PaddleCV/video/models/tsn/tsn.py
+++ b/PaddleCV/video/models/tsn/tsn.py
@@ -47,6 +47,7 @@ class TSN(ModelBase):
                                                        'l2_weight_decay')
        self.momentum = self.get_config_from_sec('train', 'momentum')

+        self.seg_num = self.get_config_from_sec(self.mode, 'seg_num', self.seg_num)
        self.target_size = self.get_config_from_sec(self.mode, 'target_size')
        self.batch_size = self.get_config_from_sec(self.mode, 'batch_size')


--- a/PaddleCV/video/models/utils.py
+++ b/PaddleCV/video/models/utils.py
--- a/PaddleCV/video/scripts/infer/infer_attention_cluster.sh
+++ b/PaddleCV/video/scripts/infer/infer_attention_cluster.sh
-python infer.py --model-name="AttentionCluster" --config=./configs/attention_cluster.txt \
-                --filelist=./data/youtube8m/infer.list \
+python infer.py --model_name="AttentionCluster" --config=./configs/attention_cluster.txt \
+                --filelist=./dataset/youtube8m/infer.list \
                --weights=./checkpoints/AttentionCluster_epoch0 \
-                --save-dir="./save"
+                --save_dir="./save"
--- a/PaddleCV/video/scripts/infer/infer_attention_lstm.sh
+++ b/PaddleCV/video/scripts/infer/infer_attention_lstm.sh
-python infer.py --model-name="AttentionLSTM" --config=./configs/attention_lstm.txt \
-                --filelist=./data/youtube8m/infer.list \
+python infer.py --model_name="AttentionLSTM" --config=./configs/attention_lstm.txt \
+                --filelist=./dataset/youtube8m/infer.list \
                --weights=./checkpoints/AttentionLSTM_epoch0 \
-                --save-dir="./save"
+                --save_dir="./save"
--- a/PaddleCV/video/scripts/infer/infer_nextvlad.sh
+++ b/PaddleCV/video/scripts/infer/infer_nextvlad.sh
-python infer.py --model-name="NEXTVLAD" --config=./configs/nextvlad.txt --filelist=./data/youtube8m/infer.list \
+python infer.py --model_name="NEXTVLAD" --config=./configs/nextvlad.txt --filelist=./dataset/youtube8m/infer.list \
                --weights=./checkpoints/NEXTVLAD_epoch0 \
-                --save-dir="./save"
+                --save_dir="./save"
--- a/PaddleCV/video/scripts/infer/infer_nonlocal.sh
+++ b/PaddleCV/video/scripts/infer/infer_nonlocal.sh
+python infer.py --model_name="NONLOCAL" --config=./configs/nonlocal.txt --filelist=./dataset/nonlocal/inferlist.txt \
+                --log_interval=10 --weights=./checkpoints/NONLOCAL_epoch0 --save_dir=./save
--- a/PaddleCV/video/scripts/infer/infer_stnet.sh
+++ b/PaddleCV/video/scripts/infer/infer_stnet.sh
-python infer.py --model-name="STNET" --config=./configs/stnet.txt --filelist=./data/kinetics/infer.list \
-                --log-interval=10 --weights=./checkpoints/STNET_epoch0 --save-dir=./save
+python infer.py --model_name="STNET" --config=./configs/stnet.txt --filelist=./dataset/kinetics/infer.list \
+                --log_interval=10 --weights=./checkpoints/STNET_epoch0 --save_dir=./save
--- a/PaddleCV/video/scripts/infer/infer_tsm.sh
+++ b/PaddleCV/video/scripts/infer/infer_tsm.sh
+python infer.py --model_name="TSM" --config=./configs/tsm.txt --filelist=./dataset/kinetics/infer.list \
+                --log_interval=10 --weights=./checkpoints/TSM_epoch0 --save_dir=./save
--- a/PaddleCV/video/scripts/infer/infer_tsn.sh
+++ b/PaddleCV/video/scripts/infer/infer_tsn.sh
-python infer.py --model-name="TSN" --config=./configs/tsn.txt --filelist=./data/kinetics/infer.list \
-                --log-interval=10 --weights=./checkpoints/TSN_epoch0 --save-dir=./save
+python infer.py --model_name="TSN" --config=./configs/tsn.txt --filelist=./dataset/kinetics/infer.list \
+                --log_interval=10 --weights=./checkpoints/TSN_epoch0 --save_dir=./save
--- a/PaddleCV/video/scripts/test/test_attention_cluster.sh
+++ b/PaddleCV/video/scripts/test/test_attention_cluster.sh
-python test.py --model-name="AttentionCluster" --config=./configs/attention_cluster.txt \
-                --log-interval=5 --weights=./checkpoints/AttentionCluster_epoch0
+python test.py --model_name="AttentionCluster" --config=./configs/attention_cluster.txt \
+                --log_interval=5 --weights=./checkpoints/AttentionCluster_epoch0
--- a/PaddleCV/video/scripts/test/test_attention_lstm.sh
+++ b/PaddleCV/video/scripts/test/test_attention_lstm.sh
-python test.py --model-name="AttentionLSTM" --config=./configs/attention_lstm.txt \
-                --log-interval=5 --weights=./checkpoints/AttentionLSTM_epoch0
+python test.py --model_name="AttentionLSTM" --config=./configs/attention_lstm.txt \
+                --log_interval=5 --weights=./checkpoints/AttentionLSTM_epoch0
--- a/PaddleCV/video/scripts/test/test_nextvlad.sh
+++ b/PaddleCV/video/scripts/test/test_nextvlad.sh
-python test.py --model-name="NEXTVLAD" --config=./configs/nextvlad.txt \
-                --log-interval=10 --weights=./checkpoints/NEXTVLAD_epoch0
+python test.py --model_name="NEXTVLAD" --config=./configs/nextvlad.txt \
+                --log_interval=10 --weights=./checkpoints/NEXTVLAD_epoch0
--- a/PaddleCV/video/scripts/test/test_nonlocal.sh
+++ b/PaddleCV/video/scripts/test/test_nonlocal.sh
+python -i test.py --model_name="NONLOCAL" --config=./configs/nonlocal.txt \
+                --log_interval=1 --weights=./checkpoints/NONLOCAL_epoch0
--- a/PaddleCV/video/scripts/test/test_stnet.sh
+++ b/PaddleCV/video/scripts/test/test_stnet.sh
-python test.py --model-name="STNET" --config=./configs/stnet.txt \
-                --log-interval=10 --weights=./checkpoints/STNET_epoch0
+python test.py --model_name="STNET" --config=./configs/stnet.txt \
+                --log_interval=10 --weights=./checkpoints/STNET_epoch0
--- a/PaddleCV/video/scripts/test/test_tsm.sh
+++ b/PaddleCV/video/scripts/test/test_tsm.sh
+python test.py --model_name="TSM" --config=./configs/tsm.txt \
+                --log_interval=10 --weights=./checkpoints/TSM_epoch0
--- a/PaddleCV/video/scripts/test/test_tsn.sh
+++ b/PaddleCV/video/scripts/test/test_tsn.sh
-python test.py --model-name="TSN" --config=./configs/tsn.txt \
-                --log-interval=10 --weights=./checkpoints/TSN_epoch0
+python test.py --model_name="TSN" --config=./configs/tsn.txt \
+                --log_interval=10 --weights=./checkpoints/TSN_epoch0
--- a/PaddleCV/video/scripts/train/train_attention_cluster.sh
+++ b/PaddleCV/video/scripts/train/train_attention_cluster.sh
-python train.py --model-name="AttentionCluster" --config=./configs/attention_cluster.txt --epoch-num=5 \
-                --valid-interval=1 --log-interval=10
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+python train.py --model_name="AttentionCluster" --config=./configs/attention_cluster.txt --epoch_num=5 \
+                --valid_interval=1 --log_interval=10
--- a/PaddleCV/video/scripts/train/train_attention_lstm.sh
+++ b/PaddleCV/video/scripts/train/train_attention_lstm.sh
-python train.py --model-name="AttentionLSTM" --config=./configs/attention_lstm.txt --epoch-num=10 \
-                --valid-interval=1 --log-interval=10
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+python train.py --model_name="AttentionLSTM" --config=./configs/attention_lstm.txt --epoch_num=10 \
+                --valid_interval=1 --log_interval=10
--- a/PaddleCV/video/scripts/train/train_nextvlad.sh
+++ b/PaddleCV/video/scripts/train/train_nextvlad.sh
 export CUDA_VISIBLE_DEVICES=0,1,2,3
-python train.py --model-name="NEXTVLAD" --config=./configs/nextvlad.txt --epoch-num=6 \
-                --valid-interval=1 --log-interval=10
+python train.py --model_name="NEXTVLAD" --config=./configs/nextvlad.txt --epoch_num=6 \
+                --valid_interval=1 --log_interval=10
--- a/PaddleCV/video/scripts/train/train_nonlocal.sh
+++ b/PaddleCV/video/scripts/train/train_nonlocal.sh
--- a/PaddleCV/video/scripts/train/train_stnet.sh
+++ b/PaddleCV/video/scripts/train/train_stnet.sh
--- a/PaddleCV/video/scripts/train/train_tsm.sh
+++ b/PaddleCV/video/scripts/train/train_tsm.sh
--- a/PaddleCV/video/scripts/train/train_tsn.sh
+++ b/PaddleCV/video/scripts/train/train_tsn.sh
--- a/PaddleCV/video/test.py
+++ b/PaddleCV/video/test.py
--- a/PaddleCV/video/tools/train_utils.py
+++ b/PaddleCV/video/tools/train_utils.py
--- a/PaddleCV/video/train.py
+++ b/PaddleCV/video/train.py
--- a/PaddleCV/video/utils.py
+++ b/PaddleCV/video/utils.py
--- a/PaddleCV/yolov3/README.md
+++ b/PaddleCV/yolov3/README.md
--- a/PaddleCV/yolov3/README_cn.md
+++ b/PaddleCV/yolov3/README_cn.md
--- a/PaddleCV/yolov3/eval.py
+++ b/PaddleCV/yolov3/eval.py
--- a/PaddleCV/yolov3/image_utils.py
+++ b/PaddleCV/yolov3/image_utils.py
--- a/PaddleCV/yolov3/models/darknet.py
+++ b/PaddleCV/yolov3/models/darknet.py
--- a/PaddleCV/yolov3/models/yolov3.py
+++ b/PaddleCV/yolov3/models/yolov3.py
--- a/PaddleCV/yolov3/reader.py
+++ b/PaddleCV/yolov3/reader.py
--- a/PaddleCV/yolov3/train.py
+++ b/PaddleCV/yolov3/train.py
--- a/PaddleCV/yolov3/utility.py
+++ b/PaddleCV/yolov3/utility.py
--- a/PaddleCV/yolov3/weights/download.sh
+++ b/PaddleCV/yolov3/weights/download.sh
--- a/PaddleNLP/ELMO/README.md
+++ b/PaddleNLP/ELMO/README.md
--- a/PaddleNLP/ELMO/bilm.py
+++ b/PaddleNLP/ELMO/bilm.py
--- a/PaddleNLP/ELMO/data.py
+++ b/PaddleNLP/ELMO/data.py
--- a/PaddleNLP/ELMO/data/vocabulary_min5k.txt
+++ b/PaddleNLP/ELMO/data/vocabulary_min5k.txt
--- a/PaddleNLP/ELMO/lm_model.py
+++ b/PaddleNLP/ELMO/lm_model.py
--- a/PaddleNLP/ELMO/run.sh
+++ b/PaddleNLP/ELMO/run.sh
--- a/PaddleNLP/ELMO/train.py
+++ b/PaddleNLP/ELMO/train.py
--- a/LAC @ a4eb73b2
+++ b/LAC @ a4eb73b2
--- a/LARK @ 77ab80a7
+++ b/LARK @ 77ab80a7
--- a/PaddleNLP/README.md
+++ b/PaddleNLP/README.md
--- a/Senta @ dc1af6a8
+++ b/Senta @ dc1af6a8
--- a/SimNet @ b3e096b9
+++ b/SimNet @ b3e096b9
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/README.md
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/README.md
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/config.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/config.py
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/download_data.sh
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/download_data.sh
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/evaluation.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/evaluation.py
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/init.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/init.py
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/main.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/main.py
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/model_files/download_model.sh
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/model_files/download_model.sh
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/reader.py
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/reader.py
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/run.sh
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/run.sh
--- a/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/run_CPU.sh
+++ b/PaddleNLP/dialogue_model_toolkit/auto_dialogue_evaluation/run_CPU.sh
--- a/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/README.md
+++ b/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/README.md
--- a/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/config.py
+++ b/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/config.py
--- a/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/data/download_data.sh
+++ b/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/data/download_data.sh
--- a/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/evaluation.py
+++ b/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/evaluation.py
--- a/PaddleNLP/deep_attention_matching_net/images/Figure1.png
+++ b/PaddleNLP/deep_attention_matching_net/images/Figure1.png
--- a/PaddleNLP/deep_attention_matching_net/images/Figure2.png
+++ b/PaddleNLP/deep_attention_matching_net/images/Figure2.png
--- a/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/main.py
+++ b/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/main.py
--- a/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/reader.py
+++ b/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/reader.py
--- a/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/run.sh
+++ b/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/run.sh
--- a/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/run_CPU.sh
+++ b/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/run_CPU.sh
--- a/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/util.py
+++ b/PaddleNLP/dialogue_model_toolkit/deep_attention_matching/util.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/README.md
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/README.md
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/batching.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/batching.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/define_predict_pack.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/define_predict_pack.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_data.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_data.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_models.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_models.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_pretrain_model.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/download_pretrain_model.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/eval_metrics.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/eval_metrics.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/finetune_args.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/finetune_args.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/optimization.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/optimization.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/predict.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/predict.py
--- a/PaddleNLP/chinese_ner/__init__.py
+++ b/PaddleNLP/chinese_ner/__init__.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/reader/data_reader.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/reader/data_reader.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_eval_metrics.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_eval_metrics.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_predict.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_predict.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_train.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/run_train.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/README.md
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/README.md
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_atis_dataset.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_atis_dataset.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_dstc2_dataset.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_dstc2_dataset.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_mrda_dataset.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_mrda_dataset.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_swda_dataset.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/build_swda_dataset.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/commonlib.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/commonlib.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/dstc2.conf
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/dstc2.conf
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/mrda.conf
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/mrda.conf
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/multi-woz.conf
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/multi-woz.conf
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/swda.conf
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/conf/swda.conf
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/run_build_data.sh
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/scripts/run_build_data.sh
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/tokenization.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/tokenization.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/train.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/train.py
--- a/PaddleNLP/deep_attention_matching_net/utils/__init__.py
+++ b/PaddleNLP/deep_attention_matching_net/utils/__init__.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/args.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/args.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/fp16.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/fp16.py
--- a/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/init.py
+++ b/PaddleNLP/dialogue_model_toolkit/dialogue_general_understanding/utils/init.py
--- a/PaddleNLP/emotion_detection/README.md
+++ b/PaddleNLP/emotion_detection/README.md
--- a/PaddleNLP/emotion_detection/config.json
+++ b/PaddleNLP/emotion_detection/config.json
--- a/PaddleNLP/emotion_detection/config.py
+++ b/PaddleNLP/emotion_detection/config.py
--- a/PaddleNLP/emotion_detection/download_data.sh
+++ b/PaddleNLP/emotion_detection/download_data.sh
--- a/PaddleNLP/emotion_detection/download_model.sh
+++ b/PaddleNLP/emotion_detection/download_model.sh
--- a/PaddleNLP/emotion_detection/reader.py
+++ b/PaddleNLP/emotion_detection/reader.py
--- a/PaddleNLP/emotion_detection/run.sh
+++ b/PaddleNLP/emotion_detection/run.sh
--- a/PaddleNLP/emotion_detection/run_classifier.py
+++ b/PaddleNLP/emotion_detection/run_classifier.py
--- a/PaddleNLP/emotion_detection/run_ernie.sh
+++ b/PaddleNLP/emotion_detection/run_ernie.sh
--- a/PaddleNLP/emotion_detection/run_ernie_classifier.py
+++ b/PaddleNLP/emotion_detection/run_ernie_classifier.py
--- a/PaddleNLP/emotion_detection/utils.py
+++ b/PaddleNLP/emotion_detection/utils.py
--- a/knowledge_driven_dialogue @ 199af066
+++ b/knowledge_driven_dialogue @ 199af066
--- a/PaddleNLP/language_model/README.md
+++ b/PaddleNLP/language_model/README.md
--- a/PaddleNLP/language_model/lstm/args.py
+++ b/PaddleNLP/language_model/lstm/args.py
--- a/PaddleNLP/language_model/lstm/data/download_data.sh
+++ b/PaddleNLP/language_model/lstm/data/download_data.sh
--- a/PaddleNLP/language_model/lstm/reader.py
+++ b/PaddleNLP/language_model/lstm/reader.py
--- a/PaddleNLP/language_model/run.sh
+++ b/PaddleNLP/language_model/run.sh
--- a/PaddleNLP/language_model/lstm/train.py
+++ b/PaddleNLP/language_model/lstm/train.py
--- a/language_representations_kit @ b9dae026
+++ b/language_representations_kit @ b9dae026
--- a/PaddleNLP/lexical_analysis/README.md
+++ b/PaddleNLP/lexical_analysis/README.md
--- a/PaddleNLP/text_matching_on_quora/__init__.py
+++ b/PaddleNLP/text_matching_on_quora/__init__.py
--- a/PaddleNLP/lexical_analysis/conf/customization.dic.example
+++ b/PaddleNLP/lexical_analysis/conf/customization.dic.example
--- a/PaddleNLP/lexical_analysis/conf/label_map.json
+++ b/PaddleNLP/lexical_analysis/conf/label_map.json
--- a/PaddleNLP/lexical_analysis/conf/q2b.dic
+++ b/PaddleNLP/lexical_analysis/conf/q2b.dic
--- a/PaddleNLP/lexical_analysis/conf/strong_punc.dic
+++ b/PaddleNLP/lexical_analysis/conf/strong_punc.dic
--- a/PaddleNLP/lexical_analysis/conf/tag.dic
+++ b/PaddleNLP/lexical_analysis/conf/tag.dic
--- a/PaddleNLP/lexical_analysis/conf/word.dic
+++ b/PaddleNLP/lexical_analysis/conf/word.dic
--- a/PaddleNLP/lexical_analysis/downloads.sh
+++ b/PaddleNLP/lexical_analysis/downloads.sh
--- a/PaddleNLP/lexical_analysis/evaluate.py
+++ b/PaddleNLP/lexical_analysis/evaluate.py
--- a/PaddleNLP/lexical_analysis/gru-crf-model.png
+++ b/PaddleNLP/lexical_analysis/gru-crf-model.png
--- a/PaddleNLP/lexical_analysis/reader.py
+++ b/PaddleNLP/lexical_analysis/reader.py
--- a/PaddleNLP/lexical_analysis/run.sh
+++ b/PaddleNLP/lexical_analysis/run.sh
--- a/PaddleNLP/lexical_analysis/run_ernie.sh
+++ b/PaddleNLP/lexical_analysis/run_ernie.sh
--- a/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py
+++ b/PaddleNLP/lexical_analysis/run_ernie_sequence_labeling.py
--- a/PaddleNLP/lexical_analysis/run_sequence_labeling.py
+++ b/PaddleNLP/lexical_analysis/run_sequence_labeling.py
--- a/PaddleNLP/lexical_analysis/utils.py
+++ b/PaddleNLP/lexical_analysis/utils.py
--- a/PaddleNLP/text_matching_on_quora/models/test.py
+++ b/PaddleNLP/text_matching_on_quora/models/test.py
--- a/PaddleNLP/models/classification/__init__.py
+++ b/PaddleNLP/models/classification/__init__.py
--- a/PaddleNLP/models/classification/nets.py
+++ b/PaddleNLP/models/classification/nets.py
--- a/PaddleNLP/models/dialogue_model_toolkit/auto_dialogue_evaluation/__init__.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/auto_dialogue_evaluation/__init__.py
--- a/PaddleNLP/models/dialogue_model_toolkit/auto_dialogue_evaluation/net.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/auto_dialogue_evaluation/net.py
--- a/PaddleNLP/models/dialogue_model_toolkit/deep_attention_matching/__init__.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/deep_attention_matching/__init__.py
--- a/PaddleNLP/models/dialogue_model_toolkit/deep_attention_matching/layers.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/deep_attention_matching/layers.py
--- a/PaddleNLP/models/dialogue_model_toolkit/deep_attention_matching/net.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/deep_attention_matching/net.py
--- a/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/__init__.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/__init__.py
--- a/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/bert.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/bert.py
--- a/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/create_model.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/create_model.py
--- a/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/define_paradigm.py
+++ b/PaddleNLP/models/dialogue_model_toolkit/dialogue_general_understanding/define_paradigm.py
--- a/PaddleNLP/models/language_model/__init__.py
+++ b/PaddleNLP/models/language_model/__init__.py
--- a/PaddleNLP/language_model/lstm/lm_model.py
+++ b/PaddleNLP/language_model/lstm/lm_model.py
--- a/PaddleNLP/models/matching/__init__.py
+++ b/PaddleNLP/models/matching/__init__.py
--- a/PaddleNLP/models/matching/bow.py
+++ b/PaddleNLP/models/matching/bow.py
--- a/PaddleNLP/models/matching/cnn.py
+++ b/PaddleNLP/models/matching/cnn.py
--- a/PaddleNLP/models/matching/gru.py
+++ b/PaddleNLP/models/matching/gru.py
--- a/PaddleNLP/models/matching/losses/__init__.py
+++ b/PaddleNLP/models/matching/losses/__init__.py
--- a/PaddleNLP/models/matching/losses/hinge_loss.py
+++ b/PaddleNLP/models/matching/losses/hinge_loss.py
--- a/PaddleNLP/models/matching/losses/log_loss.py
+++ b/PaddleNLP/models/matching/losses/log_loss.py
--- a/PaddleNLP/models/matching/losses/softmax_cross_entropy_loss.py
+++ b/PaddleNLP/models/matching/losses/softmax_cross_entropy_loss.py
--- a/PaddleNLP/models/matching/lstm.py
+++ b/PaddleNLP/models/matching/lstm.py
--- a/PaddleNLP/models/matching/mm_dnn.py
+++ b/PaddleNLP/models/matching/mm_dnn.py
--- a/PaddleNLP/models/matching/optimizers/__init__.py
+++ b/PaddleNLP/models/matching/optimizers/__init__.py
--- a/PaddleNLP/models/matching/optimizers/paddle_optimizers.py
+++ b/PaddleNLP/models/matching/optimizers/paddle_optimizers.py
--- a/PaddleNLP/models/matching/paddle_layers.py
+++ b/PaddleNLP/models/matching/paddle_layers.py
--- a/PaddleNLP/models/neural_machine_translation/transformer/__init__.py
+++ b/PaddleNLP/models/neural_machine_translation/transformer/__init__.py
--- a/PaddleNLP/models/neural_machine_translation/transformer/desc.py
+++ b/PaddleNLP/models/neural_machine_translation/transformer/desc.py
--- a/PaddleNLP/models/neural_machine_translation/transformer/model.py
+++ b/PaddleNLP/models/neural_machine_translation/transformer/model.py
--- a/PaddleNLP/models/reading_comprehension/__init__.py
+++ b/PaddleNLP/models/reading_comprehension/__init__.py
--- a/PaddleNLP/machine_reading_comprehension/rc_model.py
+++ b/PaddleNLP/machine_reading_comprehension/rc_model.py
--- a/PaddleNLP/models/representation/__init__.py
+++ b/PaddleNLP/models/representation/__init__.py
--- a/PaddleNLP/models/representation/ernie.py
+++ b/PaddleNLP/models/representation/ernie.py
--- a/PaddleNLP/models/sequence_labeling/__init__.py
+++ b/PaddleNLP/models/sequence_labeling/__init__.py
--- a/PaddleNLP/models/sequence_labeling/nets.py
+++ b/PaddleNLP/models/sequence_labeling/nets.py
--- a/PaddleNLP/models/transformer_encoder.py
+++ b/PaddleNLP/models/transformer_encoder.py
--- a/PaddleNLP/neural_machine_translation/transformer/README.md
+++ b/PaddleNLP/neural_machine_translation/transformer/README.md
--- a/PaddleNLP/neural_machine_translation/transformer/config.py
+++ b/PaddleNLP/neural_machine_translation/transformer/config.py
--- a/PaddleNLP/neural_machine_translation/transformer/infer.py
+++ b/PaddleNLP/neural_machine_translation/transformer/infer.py
--- a/PaddleNLP/neural_machine_translation/transformer/train.py
+++ b/PaddleNLP/neural_machine_translation/transformer/train.py
--- a/PaddleNLP/preprocess/__init__.py
+++ b/PaddleNLP/preprocess/__init__.py
--- a/PaddleNLP/preprocess/ernie/__init__.py
+++ b/PaddleNLP/preprocess/ernie/__init__.py
--- a/PaddleNLP/preprocess/ernie/task_reader.py
+++ b/PaddleNLP/preprocess/ernie/task_reader.py
--- a/PaddleNLP/preprocess/ernie/tokenization.py
+++ b/PaddleNLP/preprocess/ernie/tokenization.py
--- a/PaddleNLP/preprocess/padding.py
+++ b/PaddleNLP/preprocess/padding.py
--- a/PaddleNLP/preprocess/tokenizer/README
+++ b/PaddleNLP/preprocess/tokenizer/README
--- a/PaddleNLP/preprocess/tokenizer/conf/customization.dic
+++ b/PaddleNLP/preprocess/tokenizer/conf/customization.dic
--- a/PaddleNLP/preprocess/tokenizer/conf/customization.dic.example
+++ b/PaddleNLP/preprocess/tokenizer/conf/customization.dic.example
--- a/PaddleNLP/preprocess/tokenizer/conf/model/__model__
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/__model__
--- a/PaddleNLP/preprocess/tokenizer/conf/model/crfw
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/crfw
--- a/PaddleNLP/preprocess/tokenizer/conf/model/fc_0.b_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/fc_0.b_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/fc_0.w_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/fc_0.w_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/fc_1.b_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/fc_1.b_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/fc_1.w_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/fc_1.w_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/fc_2.b_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/fc_2.b_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/fc_2.w_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/fc_2.w_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/fc_3.b_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/fc_3.b_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/fc_3.w_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/fc_3.w_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/fc_4.b_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/fc_4.b_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/fc_4.w_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/fc_4.w_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/gru_0.b_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/gru_0.b_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/gru_0.w_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/gru_0.w_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/gru_1.b_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/gru_1.b_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/gru_1.w_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/gru_1.w_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/gru_2.b_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/gru_2.b_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/gru_2.w_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/gru_2.w_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/gru_3.b_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/gru_3.b_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/gru_3.w_0
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/gru_3.w_0
--- a/PaddleNLP/preprocess/tokenizer/conf/model/word_emb
+++ b/PaddleNLP/preprocess/tokenizer/conf/model/word_emb
--- a/PaddleNLP/preprocess/tokenizer/conf/q2b.dic
+++ b/PaddleNLP/preprocess/tokenizer/conf/q2b.dic
--- a/PaddleNLP/preprocess/tokenizer/conf/strong_punc.dic
+++ b/PaddleNLP/preprocess/tokenizer/conf/strong_punc.dic
--- a/PaddleNLP/preprocess/tokenizer/conf/tag.dic
+++ b/PaddleNLP/preprocess/tokenizer/conf/tag.dic
--- a/PaddleNLP/preprocess/tokenizer/conf/word.dic
+++ b/PaddleNLP/preprocess/tokenizer/conf/word.dic
--- a/PaddleNLP/preprocess/tokenizer/reader.py
+++ b/PaddleNLP/preprocess/tokenizer/reader.py
--- a/PaddleNLP/preprocess/tokenizer/test.txt.utf8
+++ b/PaddleNLP/preprocess/tokenizer/test.txt.utf8
--- a/PaddleNLP/preprocess/tokenizer/tokenizer.py
+++ b/PaddleNLP/preprocess/tokenizer/tokenizer.py
--- a/PaddleNLP/reading_comprehension/LICENSE
+++ b/PaddleNLP/reading_comprehension/LICENSE
--- a/PaddleNLP/reading_comprehension/README.md
+++ b/PaddleNLP/reading_comprehension/README.md
--- a/PaddleNLP/reading_comprehension/data/BiDAF.png
+++ b/PaddleNLP/reading_comprehension/data/BiDAF.png
--- a/PaddleNLP/reading_comprehension/data/demo/devset/search.dev.json
+++ b/PaddleNLP/reading_comprehension/data/demo/devset/search.dev.json
--- a/PaddleNLP/reading_comprehension/data/demo/testset/search.test.json
+++ b/PaddleNLP/reading_comprehension/data/demo/testset/search.test.json
--- a/PaddleNLP/reading_comprehension/data/demo/trainset/search.train.json
+++ b/PaddleNLP/reading_comprehension/data/demo/trainset/search.train.json
--- a/PaddleNLP/reading_comprehension/data/download.sh
+++ b/PaddleNLP/reading_comprehension/data/download.sh
--- a/PaddleNLP/reading_comprehension/data/md5sum.txt
+++ b/PaddleNLP/reading_comprehension/data/md5sum.txt
--- a/PaddleNLP/reading_comprehension/src/UPDATES.md
+++ b/PaddleNLP/reading_comprehension/src/UPDATES.md
--- a/PaddleNLP/machine_reading_comprehension/args.py
+++ b/PaddleNLP/machine_reading_comprehension/args.py
--- a/PaddleNLP/machine_reading_comprehension/dataset.py
+++ b/PaddleNLP/machine_reading_comprehension/dataset.py
--- a/PaddleNLP/machine_reading_comprehension/paragraph_extraction.py
+++ b/PaddleNLP/machine_reading_comprehension/paragraph_extraction.py
--- a/PaddleNLP/machine_reading_comprehension/preprocess.py
+++ b/PaddleNLP/machine_reading_comprehension/preprocess.py
--- a/PaddleNLP/machine_reading_comprehension/run.py
+++ b/PaddleNLP/machine_reading_comprehension/run.py
--- a/PaddleNLP/reading_comprehension/src/run.sh
+++ b/PaddleNLP/reading_comprehension/src/run.sh
--- a/PaddleNLP/machine_reading_comprehension/vocab.py
+++ b/PaddleNLP/machine_reading_comprehension/vocab.py
--- a/PaddleNLP/reading_comprehension/utils/__init__.py
+++ b/PaddleNLP/reading_comprehension/utils/__init__.py
--- a/PaddleNLP/machine_reading_comprehension/utils/download_thirdparty.sh
+++ b/PaddleNLP/machine_reading_comprehension/utils/download_thirdparty.sh
--- a/PaddleNLP/reading_comprehension/utils/dureader_eval.py
+++ b/PaddleNLP/reading_comprehension/utils/dureader_eval.py
--- a/PaddleNLP/reading_comprehension/utils/get_vocab.py
+++ b/PaddleNLP/reading_comprehension/utils/get_vocab.py
--- a/PaddleNLP/reading_comprehension/utils/marco_tokenize_data.py
+++ b/PaddleNLP/reading_comprehension/utils/marco_tokenize_data.py
--- a/PaddleNLP/machine_reading_comprehension/utils/marcov1_to_dureader.py
+++ b/PaddleNLP/machine_reading_comprehension/utils/marcov1_to_dureader.py
--- a/PaddleNLP/reading_comprehension/utils/marcov2_to_v1_tojsonl.py
+++ b/PaddleNLP/reading_comprehension/utils/marcov2_to_v1_tojsonl.py
--- a/PaddleNLP/reading_comprehension/utils/preprocess.py
+++ b/PaddleNLP/reading_comprehension/utils/preprocess.py
--- a/PaddleNLP/machine_reading_comprehension/utils/run_marco2dureader_preprocess.sh
+++ b/PaddleNLP/machine_reading_comprehension/utils/run_marco2dureader_preprocess.sh
--- a/PaddleNLP/sentiment_classification/README.md
+++ b/PaddleNLP/sentiment_classification/README.md
--- a/PaddleNLP/sentiment_classification/config.py
+++ b/PaddleNLP/sentiment_classification/config.py
--- a/PaddleNLP/sentiment_classification/reader.py
+++ b/PaddleNLP/sentiment_classification/reader.py
--- a/PaddleNLP/sentiment_classification/run.sh
+++ b/PaddleNLP/sentiment_classification/run.sh
--- a/PaddleNLP/sentiment_classification/run_classifier.py
+++ b/PaddleNLP/sentiment_classification/run_classifier.py
--- a/PaddleNLP/sentiment_classification/run_ernie.sh
+++ b/PaddleNLP/sentiment_classification/run_ernie.sh
--- a/PaddleNLP/sentiment_classification/run_ernie_classifier.py
+++ b/PaddleNLP/sentiment_classification/run_ernie_classifier.py
--- a/PaddleNLP/sentiment_classification/senta_config.json
+++ b/PaddleNLP/sentiment_classification/senta_config.json
--- a/PaddleNLP/sentiment_classification/utils.py
+++ b/PaddleNLP/sentiment_classification/utils.py
--- a/PaddleNLP/similarity_net/README.md
+++ b/PaddleNLP/similarity_net/README.md
--- a/PaddleNLP/similarity_net/config.py
+++ b/PaddleNLP/similarity_net/config.py
--- a/PaddleNLP/similarity_net/config/bow_pairwise.json
+++ b/PaddleNLP/similarity_net/config/bow_pairwise.json
--- a/PaddleNLP/similarity_net/config/bow_pointwise.json
+++ b/PaddleNLP/similarity_net/config/bow_pointwise.json
--- a/PaddleNLP/similarity_net/config/cnn_pairwise.json
+++ b/PaddleNLP/similarity_net/config/cnn_pairwise.json
--- a/PaddleNLP/similarity_net/config/cnn_pointwise.json
+++ b/PaddleNLP/similarity_net/config/cnn_pointwise.json
--- a/PaddleNLP/similarity_net/config/gru_pairwise.json
+++ b/PaddleNLP/similarity_net/config/gru_pairwise.json
--- a/PaddleNLP/similarity_net/config/gru_pointwise.json
+++ b/PaddleNLP/similarity_net/config/gru_pointwise.json
--- a/PaddleNLP/similarity_net/config/lstm_pairwise.json
+++ b/PaddleNLP/similarity_net/config/lstm_pairwise.json
--- a/PaddleNLP/similarity_net/config/lstm_pointwise.json
+++ b/PaddleNLP/similarity_net/config/lstm_pointwise.json
--- a/PaddleNLP/similarity_net/config/mmdnn_pointwise.json
+++ b/PaddleNLP/similarity_net/config/mmdnn_pointwise.json
--- a/PaddleNLP/similarity_net/download.sh
+++ b/PaddleNLP/similarity_net/download.sh
--- a/PaddleNLP/similarity_net/evaluate/evaluate_ecom.sh
+++ b/PaddleNLP/similarity_net/evaluate/evaluate_ecom.sh
--- a/PaddleNLP/similarity_net/evaluate/evaluate_qqsim.sh
+++ b/PaddleNLP/similarity_net/evaluate/evaluate_qqsim.sh
--- a/PaddleNLP/similarity_net/evaluate/evaluate_unicom.sh
+++ b/PaddleNLP/similarity_net/evaluate/evaluate_unicom.sh
--- a/PaddleNLP/similarity_net/evaluate/evaluate_zhidao.sh
+++ b/PaddleNLP/similarity_net/evaluate/evaluate_zhidao.sh
--- a/PaddleNLP/similarity_net/evaluate/unicom_compute_pos_neg.py
+++ b/PaddleNLP/similarity_net/evaluate/unicom_compute_pos_neg.py
--- a/PaddleNLP/similarity_net/evaluate/unicom_split.py
+++ b/PaddleNLP/similarity_net/evaluate/unicom_split.py
--- a/PaddleNLP/similarity_net/reader.py
+++ b/PaddleNLP/similarity_net/reader.py
--- a/PaddleNLP/similarity_net/run.sh
+++ b/PaddleNLP/similarity_net/run.sh
--- a/PaddleNLP/similarity_net/run_classifier.py
+++ b/PaddleNLP/similarity_net/run_classifier.py
--- a/PaddleNLP/similarity_net/struct.jpg
+++ b/PaddleNLP/similarity_net/struct.jpg
--- a/PaddleNLP/similarity_net/utils.py
+++ b/PaddleNLP/similarity_net/utils.py
--- a/PaddleNLP/chinese_ner/.run_ce.sh
+++ b/PaddleNLP/chinese_ner/.run_ce.sh
--- a/PaddleNLP/chinese_ner/README.md
+++ b/PaddleNLP/chinese_ner/README.md
--- a/PaddleNLP/unarchived/chinese_ner/__init__.py
+++ b/PaddleNLP/unarchived/chinese_ner/__init__.py
--- a/PaddleNLP/chinese_ner/_ce.py
+++ b/PaddleNLP/chinese_ner/_ce.py
--- a/PaddleNLP/chinese_ner/data/label_dict
+++ b/PaddleNLP/chinese_ner/data/label_dict
--- a/PaddleNLP/chinese_ner/data/test_files/test_part_1
+++ b/PaddleNLP/chinese_ner/data/test_files/test_part_1
--- a/PaddleNLP/chinese_ner/data/test_files/test_part_2
+++ b/PaddleNLP/chinese_ner/data/test_files/test_part_2
--- a/PaddleNLP/chinese_ner/data/train_files/train_part_1
+++ b/PaddleNLP/chinese_ner/data/train_files/train_part_1
--- a/PaddleNLP/chinese_ner/data/train_files/train_part_2
+++ b/PaddleNLP/chinese_ner/data/train_files/train_part_2
--- a/PaddleNLP/chinese_ner/infer.py
+++ b/PaddleNLP/chinese_ner/infer.py
--- a/PaddleNLP/chinese_ner/reader.py
+++ b/PaddleNLP/chinese_ner/reader.py
--- a/PaddleNLP/chinese_ner/scripts/README.md
+++ b/PaddleNLP/chinese_ner/scripts/README.md
--- a/PaddleNLP/chinese_ner/scripts/infer.sh
+++ b/PaddleNLP/chinese_ner/scripts/infer.sh
--- a/PaddleNLP/chinese_ner/scripts/train.sh
+++ b/PaddleNLP/chinese_ner/scripts/train.sh
--- a/PaddleNLP/chinese_ner/train.py
+++ b/PaddleNLP/chinese_ner/train.py
--- a/PaddleNLP/deep_attention_matching_net/.run_ce.sh
+++ b/PaddleNLP/deep_attention_matching_net/.run_ce.sh
--- a/PaddleNLP/deep_attention_matching_net/README.md
+++ b/PaddleNLP/deep_attention_matching_net/README.md
--- a/PaddleNLP/deep_attention_matching_net/_ce.py
+++ b/PaddleNLP/deep_attention_matching_net/_ce.py
--- a/PaddleNLP/deep_attention_matching_net/douban/download_data.sh
+++ b/PaddleNLP/deep_attention_matching_net/douban/download_data.sh
--- a/PaddleNLP/deep_attention_matching_net/douban/test.sh
+++ b/PaddleNLP/deep_attention_matching_net/douban/test.sh
--- a/PaddleNLP/deep_attention_matching_net/douban/train.sh
+++ b/PaddleNLP/deep_attention_matching_net/douban/train.sh
--- a/PaddleNLP/unarchived/deep_attention_matching_net/images/Figure1.png
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/images/Figure1.png
--- a/PaddleNLP/unarchived/deep_attention_matching_net/images/Figure2.png
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/images/Figure2.png
--- a/PaddleNLP/deep_attention_matching_net/model.py
+++ b/PaddleNLP/deep_attention_matching_net/model.py
--- a/PaddleNLP/deep_attention_matching_net/test_and_evaluate.py
+++ b/PaddleNLP/deep_attention_matching_net/test_and_evaluate.py
--- a/PaddleNLP/deep_attention_matching_net/train_and_evaluate.py
+++ b/PaddleNLP/deep_attention_matching_net/train_and_evaluate.py
--- a/PaddleNLP/deep_attention_matching_net/ubuntu/download_data.sh
+++ b/PaddleNLP/deep_attention_matching_net/ubuntu/download_data.sh
--- a/PaddleNLP/deep_attention_matching_net/ubuntu/test.sh
+++ b/PaddleNLP/deep_attention_matching_net/ubuntu/test.sh
--- a/PaddleNLP/deep_attention_matching_net/ubuntu/train.sh
+++ b/PaddleNLP/deep_attention_matching_net/ubuntu/train.sh
--- a/PaddleNLP/unarchived/deep_attention_matching_net/utils/__init__.py
+++ b/PaddleNLP/unarchived/deep_attention_matching_net/utils/__init__.py
--- a/PaddleNLP/deep_attention_matching_net/utils/douban_evaluation.py
+++ b/PaddleNLP/deep_attention_matching_net/utils/douban_evaluation.py
--- a/PaddleNLP/deep_attention_matching_net/utils/evaluation.py
+++ b/PaddleNLP/deep_attention_matching_net/utils/evaluation.py
--- a/PaddleNLP/deep_attention_matching_net/utils/layers.py
+++ b/PaddleNLP/deep_attention_matching_net/utils/layers.py
--- a/PaddleNLP/deep_attention_matching_net/utils/reader.py
+++ b/PaddleNLP/deep_attention_matching_net/utils/reader.py
--- a/PaddleNLP/deep_attention_matching_net/utils/util.py
+++ b/PaddleNLP/deep_attention_matching_net/utils/util.py
--- a/PaddleNLP/language_model/gru/.run_ce.sh
+++ b/PaddleNLP/language_model/gru/.run_ce.sh
--- a/PaddleNLP/language_model/gru/README.md
+++ b/PaddleNLP/language_model/gru/README.md
--- a/PaddleNLP/language_model/gru/_ce.py
+++ b/PaddleNLP/language_model/gru/_ce.py
--- a/PaddleNLP/language_model/gru/infer.py
+++ b/PaddleNLP/language_model/gru/infer.py
--- a/PaddleNLP/language_model/gru/train.py
+++ b/PaddleNLP/language_model/gru/train.py
--- a/PaddleNLP/language_model/gru/train_on_cloud.py
+++ b/PaddleNLP/language_model/gru/train_on_cloud.py
--- a/PaddleNLP/language_model/gru/utils.py
+++ b/PaddleNLP/language_model/gru/utils.py
--- a/PaddleNLP/language_model/lstm/.run_ce.sh
+++ b/PaddleNLP/language_model/lstm/.run_ce.sh
--- a/PaddleNLP/language_model/lstm/README.md
+++ b/PaddleNLP/language_model/lstm/README.md
--- a/PaddleNLP/language_model/lstm/_ce.py
+++ b/PaddleNLP/language_model/lstm/_ce.py
--- a/PaddleNLP/unarchived/language_model/lstm/args.py
+++ b/PaddleNLP/unarchived/language_model/lstm/args.py
--- a/PaddleNLP/unarchived/language_model/lstm/data/download_data.sh
+++ b/PaddleNLP/unarchived/language_model/lstm/data/download_data.sh
--- a/PaddleNLP/unarchived/language_model/lstm/lm_model.py
+++ b/PaddleNLP/unarchived/language_model/lstm/lm_model.py
--- a/PaddleNLP/ELMO/reader.py
+++ b/PaddleNLP/ELMO/reader.py
--- a/PaddleNLP/unarchived/language_model/lstm/train.py
+++ b/PaddleNLP/unarchived/language_model/lstm/train.py
--- a/PaddleNLP/machine_reading_comprehension/.run_ce.sh
+++ b/PaddleNLP/machine_reading_comprehension/.run_ce.sh
--- a/PaddleNLP/machine_reading_comprehension/README.md
+++ b/PaddleNLP/machine_reading_comprehension/README.md
--- a/PaddleNLP/machine_reading_comprehension/_ce.py
+++ b/PaddleNLP/machine_reading_comprehension/_ce.py
--- a/PaddleNLP/ELMO/args.py
+++ b/PaddleNLP/ELMO/args.py
--- a/PaddleNLP/machine_reading_comprehension/data/download.sh
+++ b/PaddleNLP/machine_reading_comprehension/data/download.sh
--- a/PaddleNLP/machine_reading_comprehension/data/md5sum.txt
+++ b/PaddleNLP/machine_reading_comprehension/data/md5sum.txt
--- a/PaddleNLP/unarchived/machine_reading_comprehension/dataset.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/dataset.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/paragraph_extraction.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/preprocess.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/preprocess.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/rc_model.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/rc_model.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/run.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/run.py
--- a/PaddleNLP/machine_reading_comprehension/run.sh
+++ b/PaddleNLP/machine_reading_comprehension/run.sh
--- a/PaddleNLP/machine_reading_comprehension/utils/__init__.py
+++ b/PaddleNLP/machine_reading_comprehension/utils/__init__.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/download_thirdparty.sh
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/download_thirdparty.sh
--- a/PaddleNLP/machine_reading_comprehension/utils/dureader_eval.py
+++ b/PaddleNLP/machine_reading_comprehension/utils/dureader_eval.py
--- a/PaddleNLP/machine_reading_comprehension/utils/get_vocab.py
+++ b/PaddleNLP/machine_reading_comprehension/utils/get_vocab.py
--- a/PaddleNLP/machine_reading_comprehension/utils/marco_tokenize_data.py
+++ b/PaddleNLP/machine_reading_comprehension/utils/marco_tokenize_data.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/marcov1_to_dureader.py
--- a/PaddleNLP/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py
+++ b/PaddleNLP/machine_reading_comprehension/utils/marcov2_to_v1_tojsonl.py
--- a/PaddleNLP/machine_reading_comprehension/utils/preprocess.py
+++ b/PaddleNLP/machine_reading_comprehension/utils/preprocess.py
--- a/PaddleNLP/unarchived/machine_reading_comprehension/utils/run_marco2dureader_preprocess.sh
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/utils/run_marco2dureader_preprocess.sh
--- a/PaddleNLP/unarchived/machine_reading_comprehension/vocab.py
+++ b/PaddleNLP/unarchived/machine_reading_comprehension/vocab.py
--- a/PaddleNLP/neural_machine_translation/README.md
+++ b/PaddleNLP/neural_machine_translation/README.md
--- a/PaddleNLP/neural_machine_translation/rnn_search/.run_ce.sh
+++ b/PaddleNLP/neural_machine_translation/rnn_search/.run_ce.sh
--- a/PaddleNLP/neural_machine_translation/rnn_search/README.md
+++ b/PaddleNLP/neural_machine_translation/rnn_search/README.md
--- a/PaddleNLP/neural_machine_translation/rnn_search/_ce.py
+++ b/PaddleNLP/neural_machine_translation/rnn_search/_ce.py
--- a/PaddleNLP/neural_machine_translation/rnn_search/args.py
+++ b/PaddleNLP/neural_machine_translation/rnn_search/args.py
--- a/PaddleNLP/neural_machine_translation/rnn_search/attention_model.py
+++ b/PaddleNLP/neural_machine_translation/rnn_search/attention_model.py
--- a/PaddleNLP/neural_machine_translation/rnn_search/images/bi_rnn.png
+++ b/PaddleNLP/neural_machine_translation/rnn_search/images/bi_rnn.png
--- a/PaddleNLP/neural_machine_translation/rnn_search/images/decoder_attention.png
+++ b/PaddleNLP/neural_machine_translation/rnn_search/images/decoder_attention.png
--- a/PaddleNLP/neural_machine_translation/rnn_search/images/encoder_attention.png
+++ b/PaddleNLP/neural_machine_translation/rnn_search/images/encoder_attention.png
--- a/PaddleNLP/neural_machine_translation/rnn_search/infer.py
+++ b/PaddleNLP/neural_machine_translation/rnn_search/infer.py
--- a/PaddleNLP/neural_machine_translation/rnn_search/no_attention_model.py
+++ b/PaddleNLP/neural_machine_translation/rnn_search/no_attention_model.py
--- a/PaddleNLP/neural_machine_translation/rnn_search/train.py
+++ b/PaddleNLP/neural_machine_translation/rnn_search/train.py
--- a/PaddleNLP/neural_machine_translation/transformer/.gitignore
+++ b/PaddleNLP/neural_machine_translation/transformer/.gitignore
--- a/PaddleNLP/neural_machine_translation/transformer/.run_ce.sh
+++ b/PaddleNLP/neural_machine_translation/transformer/.run_ce.sh
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/README.md
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/README.md
--- a/PaddleNLP/neural_machine_translation/transformer/README_cn.md
+++ b/PaddleNLP/neural_machine_translation/transformer/README_cn.md
--- a/PaddleNLP/neural_machine_translation/transformer/_ce.py
+++ b/PaddleNLP/neural_machine_translation/transformer/_ce.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/config.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/gen_data.sh
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/gen_data.sh
--- a/PaddleNLP/neural_machine_translation/transformer/images/attention_formula.png
+++ b/PaddleNLP/neural_machine_translation/transformer/images/attention_formula.png
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/images/multi_head_attention.png
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/images/multi_head_attention.png
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/images/transformer_network.png
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/images/transformer_network.png
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/infer.py
--- a/PaddleNLP/neural_machine_translation/transformer/local_dist.sh
+++ b/PaddleNLP/neural_machine_translation/transformer/local_dist.sh
--- a/PaddleNLP/neural_machine_translation/transformer/model.py
+++ b/PaddleNLP/neural_machine_translation/transformer/model.py
--- a/PaddleNLP/neural_machine_translation/transformer/optim.py
+++ b/PaddleNLP/neural_machine_translation/transformer/optim.py
--- a/PaddleNLP/neural_machine_translation/transformer/profile.py
+++ b/PaddleNLP/neural_machine_translation/transformer/profile.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/reader.py
--- a/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py
+++ b/PaddleNLP/unarchived/neural_machine_translation/transformer/train.py
--- a/PaddleNLP/sequence_tagging_for_ner/.run_ce.sh
+++ b/PaddleNLP/sequence_tagging_for_ner/.run_ce.sh
--- a/PaddleNLP/sequence_tagging_for_ner/README.md
+++ b/PaddleNLP/sequence_tagging_for_ner/README.md
--- a/PaddleNLP/sequence_tagging_for_ner/_ce.py
+++ b/PaddleNLP/sequence_tagging_for_ner/_ce.py
--- a/PaddleNLP/sequence_tagging_for_ner/data/download.sh
+++ b/PaddleNLP/sequence_tagging_for_ner/data/download.sh
--- a/PaddleNLP/sequence_tagging_for_ner/data/target.txt
+++ b/PaddleNLP/sequence_tagging_for_ner/data/target.txt
--- a/PaddleNLP/sequence_tagging_for_ner/data/test
+++ b/PaddleNLP/sequence_tagging_for_ner/data/test
--- a/PaddleNLP/sequence_tagging_for_ner/data/train
+++ b/PaddleNLP/sequence_tagging_for_ner/data/train
--- a/PaddleNLP/sequence_tagging_for_ner/imgs/convergence_curve.png
+++ b/PaddleNLP/sequence_tagging_for_ner/imgs/convergence_curve.png
--- a/PaddleNLP/sequence_tagging_for_ner/infer.py
+++ b/PaddleNLP/sequence_tagging_for_ner/infer.py
--- a/PaddleNLP/sequence_tagging_for_ner/network_conf.py
+++ b/PaddleNLP/sequence_tagging_for_ner/network_conf.py
--- a/PaddleNLP/sequence_tagging_for_ner/reader.py
+++ b/PaddleNLP/sequence_tagging_for_ner/reader.py
--- a/PaddleNLP/sequence_tagging_for_ner/train.py
+++ b/PaddleNLP/sequence_tagging_for_ner/train.py
--- a/PaddleNLP/sequence_tagging_for_ner/utils.py
+++ b/PaddleNLP/sequence_tagging_for_ner/utils.py
--- a/PaddleNLP/sequence_tagging_for_ner/utils_extend.py
+++ b/PaddleNLP/sequence_tagging_for_ner/utils_extend.py
--- a/PaddleNLP/text_classification/.run_ce.sh
+++ b/PaddleNLP/text_classification/.run_ce.sh
--- a/PaddleNLP/text_classification/README.md
+++ b/PaddleNLP/text_classification/README.md
--- a/PaddleNLP/text_classification/_ce.py
+++ b/PaddleNLP/text_classification/_ce.py
--- a/PaddleNLP/text_classification/async_executor/README.md
+++ b/PaddleNLP/text_classification/async_executor/README.md
--- a/PaddleNLP/text_classification/async_executor/data_generator.sh
+++ b/PaddleNLP/text_classification/async_executor/data_generator.sh
--- a/PaddleNLP/text_classification/async_executor/data_generator/IMDB.py
+++ b/PaddleNLP/text_classification/async_executor/data_generator/IMDB.py
--- a/PaddleNLP/text_classification/async_executor/data_generator/build_raw_data.py
+++ b/PaddleNLP/text_classification/async_executor/data_generator/build_raw_data.py
--- a/PaddleNLP/text_classification/async_executor/data_generator/data_generator.py
+++ b/PaddleNLP/text_classification/async_executor/data_generator/data_generator.py
--- a/PaddleNLP/text_classification/async_executor/data_generator/splitfile.py
+++ b/PaddleNLP/text_classification/async_executor/data_generator/splitfile.py
--- a/PaddleNLP/text_classification/async_executor/data_reader.py
+++ b/PaddleNLP/text_classification/async_executor/data_reader.py
--- a/PaddleNLP/text_classification/async_executor/infer.py
+++ b/PaddleNLP/text_classification/async_executor/infer.py
--- a/PaddleNLP/text_classification/async_executor/train.py
+++ b/PaddleNLP/text_classification/async_executor/train.py
--- a/PaddleNLP/text_classification/clouds/scdb_parallel_executor.py
+++ b/PaddleNLP/text_classification/clouds/scdb_parallel_executor.py
--- a/PaddleNLP/text_classification/clouds/scdb_single_card.py
+++ b/PaddleNLP/text_classification/clouds/scdb_single_card.py
--- a/PaddleNLP/text_classification/infer.py
+++ b/PaddleNLP/text_classification/infer.py
--- a/PaddleNLP/text_classification/nets.py
+++ b/PaddleNLP/text_classification/nets.py
--- a/PaddleNLP/text_classification/train.py
+++ b/PaddleNLP/text_classification/train.py
--- a/PaddleNLP/text_classification/utils.py
+++ b/PaddleNLP/text_classification/utils.py
--- a/PaddleNLP/text_matching_on_quora/.run_ce.sh
+++ b/PaddleNLP/text_matching_on_quora/.run_ce.sh
--- a/PaddleNLP/text_matching_on_quora/README.md
+++ b/PaddleNLP/text_matching_on_quora/README.md
--- a/PaddleNLP/unarchived/text_matching_on_quora/__init__.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/__init__.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/_ce.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/_ce.py
--- a/PaddleNLP/text_matching_on_quora/cdssm_base.log
+++ b/PaddleNLP/text_matching_on_quora/cdssm_base.log
--- a/PaddleNLP/text_matching_on_quora/configs/__init__.py
+++ b/PaddleNLP/text_matching_on_quora/configs/__init__.py
--- a/PaddleNLP/text_matching_on_quora/configs/basic_config.py
+++ b/PaddleNLP/text_matching_on_quora/configs/basic_config.py
--- a/PaddleNLP/text_matching_on_quora/configs/cdssm.py
+++ b/PaddleNLP/text_matching_on_quora/configs/cdssm.py
--- a/PaddleNLP/text_matching_on_quora/configs/dec_att.py
+++ b/PaddleNLP/text_matching_on_quora/configs/dec_att.py
--- a/PaddleNLP/text_matching_on_quora/configs/infer_sent.py
+++ b/PaddleNLP/text_matching_on_quora/configs/infer_sent.py
--- a/PaddleNLP/text_matching_on_quora/configs/sse.py
+++ b/PaddleNLP/text_matching_on_quora/configs/sse.py
--- a/PaddleNLP/text_matching_on_quora/data/prepare_quora_data.sh
+++ b/PaddleNLP/text_matching_on_quora/data/prepare_quora_data.sh
--- a/PaddleNLP/text_matching_on_quora/imgs/README.md
+++ b/PaddleNLP/text_matching_on_quora/imgs/README.md
--- a/PaddleNLP/text_matching_on_quora/imgs/models_test_acc.png
+++ b/PaddleNLP/text_matching_on_quora/imgs/models_test_acc.png
--- a/PaddleNLP/text_matching_on_quora/metric.py
+++ b/PaddleNLP/text_matching_on_quora/metric.py
--- a/PaddleNLP/text_matching_on_quora/models/__init__.py
+++ b/PaddleNLP/text_matching_on_quora/models/__init__.py
--- a/PaddleNLP/text_matching_on_quora/models/cdssm.py
+++ b/PaddleNLP/text_matching_on_quora/models/cdssm.py
--- a/PaddleNLP/text_matching_on_quora/models/dec_att.py
+++ b/PaddleNLP/text_matching_on_quora/models/dec_att.py
--- a/PaddleNLP/text_matching_on_quora/models/infer_sent.py
+++ b/PaddleNLP/text_matching_on_quora/models/infer_sent.py
--- a/PaddleNLP/text_matching_on_quora/models/match_layers.py
+++ b/PaddleNLP/text_matching_on_quora/models/match_layers.py
--- a/PaddleNLP/text_matching_on_quora/models/my_layers.py
+++ b/PaddleNLP/text_matching_on_quora/models/my_layers.py
--- a/PaddleNLP/text_matching_on_quora/models/pwim.py
+++ b/PaddleNLP/text_matching_on_quora/models/pwim.py
--- a/PaddleNLP/text_matching_on_quora/models/sse.py
+++ b/PaddleNLP/text_matching_on_quora/models/sse.py
--- a/PaddleNLP/unarchived/text_matching_on_quora/models/test.py
+++ b/PaddleNLP/unarchived/text_matching_on_quora/models/test.py
--- a/PaddleNLP/text_matching_on_quora/pretrained_word2vec.py
+++ b/PaddleNLP/text_matching_on_quora/pretrained_word2vec.py
--- a/PaddleNLP/text_matching_on_quora/quora_question_pairs.py
+++ b/PaddleNLP/text_matching_on_quora/quora_question_pairs.py
--- a/PaddleNLP/text_matching_on_quora/train_and_evaluate.py
+++ b/PaddleNLP/text_matching_on_quora/train_and_evaluate.py
--- a/PaddleNLP/text_matching_on_quora/utils.py
+++ b/PaddleNLP/text_matching_on_quora/utils.py
--- a/PaddleRL/DeepQNetwork/DQN_agent.py
+++ b/PaddleRL/DeepQNetwork/DQN_agent.py
--- a/PaddleRL/DeepQNetwork/DoubleDQN_agent.py
+++ b/PaddleRL/DeepQNetwork/DoubleDQN_agent.py
--- a/PaddleRL/DeepQNetwork/DuelingDQN_agent.py
+++ b/PaddleRL/DeepQNetwork/DuelingDQN_agent.py
--- a/PaddleRL/DeepQNetwork/requirement.txt
+++ b/PaddleRL/DeepQNetwork/requirement.txt
--- a/PaddleRL/DeepQNetwork/train.py
+++ b/PaddleRL/DeepQNetwork/train.py
--- a/PaddleRec/din/.run_ce.sh
+++ b/PaddleRec/din/.run_ce.sh
--- a/PaddleRec/din/__init__.py
+++ b/PaddleRec/din/__init__.py
--- a/PaddleNLP/text_matching_on_quora/_ce.py
+++ b/PaddleNLP/text_matching_on_quora/_ce.py
--- a/PaddleRec/din/train.py
+++ b/PaddleRec/din/train.py
--- a/PaddleRec/gnn/.run_ce.sh
+++ b/PaddleRec/gnn/.run_ce.sh
--- a/PaddleRec/gnn/__init__.py
+++ b/PaddleRec/gnn/__init__.py
--- a/PaddleRec/gnn/_ce.py
+++ b/PaddleRec/gnn/_ce.py
--- a/PaddleRec/gnn/train.py
+++ b/PaddleRec/gnn/train.py
--- a/PaddleRec/word2vec/README.md
+++ b/PaddleRec/word2vec/README.md
--- a/PaddleRec/word2vec/preprocess.py
+++ b/PaddleRec/word2vec/preprocess.py
--- a/PaddleRec/word2vec/reader.py
+++ b/PaddleRec/word2vec/reader.py
--- a/PaddleSlim/README.md
+++ b/PaddleSlim/README.md
--- a/PaddleSlim/compress.py
+++ b/PaddleSlim/compress.py
--- a/PaddleSlim/configs/filter_pruning_sen.yaml
+++ b/PaddleSlim/configs/filter_pruning_sen.yaml
--- a/PaddleSlim/configs/filter_pruning_uniform.yaml
+++ b/PaddleSlim/configs/filter_pruning_uniform.yaml
--- a/PaddleSlim/configs/mobilenetv1_resnet50_distillation.yaml
+++ b/PaddleSlim/configs/mobilenetv1_resnet50_distillation.yaml
--- a/PaddleSlim/configs/quantization.yaml
+++ b/PaddleSlim/configs/quantization.yaml
--- a/PaddleSlim/configs/quantization_dist.yaml
+++ b/PaddleSlim/configs/quantization_dist.yaml
--- a/PaddleSlim/configs/quantization_pruning.yaml
+++ b/PaddleSlim/configs/quantization_pruning.yaml
--- a/PaddleSlim/docs/demo.md
+++ b/PaddleSlim/docs/demo.md
--- a/PaddleSlim/docs/images/demo/demo.zip
+++ b/PaddleSlim/docs/images/demo/demo.zip
--- a/PaddleSlim/docs/images/demo/distillation_result.png
+++ b/PaddleSlim/docs/images/demo/distillation_result.png
--- a/PaddleSlim/docs/images/demo/pruning_sen_result.png
+++ b/PaddleSlim/docs/images/demo/pruning_sen_result.png
--- a/PaddleSlim/docs/images/demo/pruning_uni_result.png
+++ b/PaddleSlim/docs/images/demo/pruning_uni_result.png
--- a/PaddleSlim/docs/images/framework_0.png
+++ b/PaddleSlim/docs/images/framework_0.png
--- a/PaddleSlim/docs/images/framework_1.png
+++ b/PaddleSlim/docs/images/framework_1.png
--- a/PaddleSlim/docs/images/tutorial/distillation_0.png
+++ b/PaddleSlim/docs/images/tutorial/distillation_0.png
--- a/PaddleSlim/docs/images/tutorial/pruning_0.png
+++ b/PaddleSlim/docs/images/tutorial/pruning_0.png
--- a/PaddleSlim/docs/images/tutorial/pruning_1.png
+++ b/PaddleSlim/docs/images/tutorial/pruning_1.png
--- a/PaddleSlim/docs/images/tutorial/pruning_2.png
+++ b/PaddleSlim/docs/images/tutorial/pruning_2.png
--- a/PaddleSlim/docs/images/tutorial/pruning_3.png
+++ b/PaddleSlim/docs/images/tutorial/pruning_3.png
--- a/PaddleSlim/docs/images/tutorial/pruning_4.png
+++ b/PaddleSlim/docs/images/tutorial/pruning_4.png
--- a/PaddleSlim/docs/images/tutorial/quan_bwd.png
+++ b/PaddleSlim/docs/images/tutorial/quan_bwd.png
--- a/PaddleSlim/docs/images/tutorial/quan_forward.png
+++ b/PaddleSlim/docs/images/tutorial/quan_forward.png
--- a/PaddleSlim/docs/images/tutorial/quan_fwd_1.png
+++ b/PaddleSlim/docs/images/tutorial/quan_fwd_1.png
--- a/PaddleSlim/docs/images/tutorial/quan_table_0.png
+++ b/PaddleSlim/docs/images/tutorial/quan_table_0.png
--- a/PaddleSlim/docs/images/tutorial/quan_table_1.png
+++ b/PaddleSlim/docs/images/tutorial/quan_table_1.png
--- a/PaddleSlim/docs/images/usage/ConvertToInt8Pass.png
+++ b/PaddleSlim/docs/images/usage/ConvertToInt8Pass.png
--- a/PaddleSlim/docs/images/usage/FreezePass.png
+++ b/PaddleSlim/docs/images/usage/FreezePass.png
--- a/PaddleSlim/docs/images/usage/TransformForMobilePass.png
+++ b/PaddleSlim/docs/images/usage/TransformForMobilePass.png
--- a/PaddleSlim/docs/images/usage/TransformPass.png
+++ b/PaddleSlim/docs/images/usage/TransformPass.png
--- a/PaddleSlim/docs/model_zoo.md
+++ b/PaddleSlim/docs/model_zoo.md
--- a/PaddleSlim/docs/tutorial.md
+++ b/PaddleSlim/docs/tutorial.md
--- a/PaddleSlim/docs/usage.md
+++ b/PaddleSlim/docs/usage.md
--- a/PaddleSlim/models/__init__.py
+++ b/PaddleSlim/models/__init__.py
--- a/PaddleSlim/models/googlenet.py
+++ b/PaddleSlim/models/googlenet.py
--- a/PaddleSlim/models/mobilenet.py
+++ b/PaddleSlim/models/mobilenet.py
--- a/PaddleSlim/models/resnet.py
+++ b/PaddleSlim/models/resnet.py
--- a/PaddleSlim/quant_low_level_api/README.md
+++ b/PaddleSlim/quant_low_level_api/README.md
--- a/PaddleSlim/quant_low_level_api/quant.py
+++ b/PaddleSlim/quant_low_level_api/quant.py
--- a/PaddleSlim/quant_low_level_api/run_quant.sh
+++ b/PaddleSlim/quant_low_level_api/run_quant.sh
--- a/PaddleSlim/run.sh
+++ b/PaddleSlim/run.sh
--- a/PaddleSlim/utility.py
+++ b/PaddleSlim/utility.py
--- a/README.md
+++ b/README.md
--- a/dygraph/mnist/README.md
+++ b/dygraph/mnist/README.md
--- a/dygraph/mnist/image/infer_3.png
+++ b/dygraph/mnist/image/infer_3.png
--- a/dygraph/mnist/mnist_dygraph.py
+++ b/dygraph/mnist/mnist_dygraph.py
--- a/dygraph/resnet/README.md
+++ b/dygraph/resnet/README.md
--- a/dygraph/resnet/train.py
+++ b/dygraph/resnet/train.py
--- a/dygraph/transformer/README.md
+++ b/dygraph/transformer/README.md
--- a/dygraph/transformer/train.py
+++ b/dygraph/transformer/train.py
--- a/fluid/PaddleCV/video/models/attention_cluster/README.md
+++ b/fluid/PaddleCV/video/models/attention_cluster/README.md
--- a/fluid/PaddleCV/video/models/attention_lstm/README.md
+++ b/fluid/PaddleCV/video/models/attention_lstm/README.md
--- a/fluid/PaddleCV/video/models/nextvlad/README.md
+++ b/fluid/PaddleCV/video/models/nextvlad/README.md
--- a/fluid/PaddleCV/video/models/nonlocal_model/README.md
+++ b/fluid/PaddleCV/video/models/nonlocal_model/README.md
--- a/fluid/PaddleCV/video/models/stnet/README.md
+++ b/fluid/PaddleCV/video/models/stnet/README.md
--- a/fluid/PaddleCV/video/models/tsm/README.md
+++ b/fluid/PaddleCV/video/models/tsm/README.md
--- a/fluid/PaddleCV/video/models/tsn/README.md
+++ b/fluid/PaddleCV/video/models/tsn/README.md
--- a/fluid/PaddleNLP/chinese_ner/README.md
+++ b/fluid/PaddleNLP/chinese_ner/README.md
--- a/fluid/PaddleNLP/deep_attention_matching_net/README.md
+++ b/fluid/PaddleNLP/deep_attention_matching_net/README.md
--- a/fluid/PaddleNLP/language_model/gru/README.md
+++ b/fluid/PaddleNLP/language_model/gru/README.md
--- a/fluid/PaddleNLP/language_model/lstm/README.md
+++ b/fluid/PaddleNLP/language_model/lstm/README.md
--- a/fluid/PaddleNLP/machine_reading_comprehension/README.md
+++ b/fluid/PaddleNLP/machine_reading_comprehension/README.md
--- a/fluid/PaddleNLP/neural_machine_translation/README.md
+++ b/fluid/PaddleNLP/neural_machine_translation/README.md
--- a/fluid/PaddleNLP/sequence_tagging_for_ner/README.md
+++ b/fluid/PaddleNLP/sequence_tagging_for_ner/README.md
--- a/fluid/PaddleNLP/text_classification/README.md
+++ b/fluid/PaddleNLP/text_classification/README.md
--- a/fluid/PaddleNLP/text_matching_on_quora/README.md
+++ b/fluid/PaddleNLP/text_matching_on_quora/README.md