Merge branch 'master' of https://github.com/PaddlePaddle/hapi into lenet

f2212a71 · LielinJiang · 4c3effee · 072bedd1 · f2212a71 · 4c3effee
44 changed file
--- a/examples/bert/bert_classifier.py
+++ b/examples/bert/bert_classifier.py
@@ -16,14 +16,60 @@
 import paddle.fluid as fluid
 from hapi.metrics import Accuracy
 from hapi.configure import Config
+from hapi.text.bert import BertEncoder
+from paddle.fluid.dygraph import Linear, Layer
 from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input
-from cls import ClsModelLayer
 import hapi.text.tokenizer.tokenization as tokenization
 from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
-def train():
+class ClsModelLayer(Model):
+    """
+    classify model
+    """
+    def __init__(self,
+                 args,
+                 config,
+                 num_labels,
+                 return_pooled_out=True,
+                 use_fp16=False):
+        super(ClsModelLayer, self).__init__()
+        self.config = config
+        self.use_fp16 = use_fp16
+        self.loss_scaling = args.loss_scaling
+        self.bert_layer = BertEncoder(
+            config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
+        self.cls_fc = Linear(
+            input_dim=self.config["hidden_size"],
+            output_dim=num_labels,
+            param_attr=fluid.ParamAttr(
+                name="cls_out_w",
+                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
+            bias_attr=fluid.ParamAttr(
+                name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
+    def forward(self, src_ids, position_ids, sentence_ids, input_mask):
+        """
+        forward
+        """
+        enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
+                                                     sentence_ids, input_mask)
+        cls_feats = fluid.layers.dropout(
+            x=next_sent_feat,
+            dropout_prob=0.1,
+            dropout_implementation="upscale_in_train")
+        pred = self.cls_fc(cls_feats)
+        return pred
+def main():
    config = Config(yaml_file="./bert.yaml")
    config.build()
@@ -35,8 +81,6 @@ def train():
    bert_config = BertConfig(config.bert_config_path)
    bert_config.print_config()
-    trainer_count = fluid.dygraph.parallel.Env().nranks
    tokenizer = tokenization.FullTokenizer(
        vocab_file=config.vocab_path, do_lower_case=config.do_lower_case)
@@ -52,14 +96,24 @@ def train():
        return BertInputExample(
            uid=uid, text_a=text_a, text_b=text_b, label=label)
-    bert_dataloader = BertDataLoader(
+    train_dataloader = BertDataLoader(
        "./data/glue_data/MNLI/train.tsv",
        tokenizer, ["contradiction", "entailment", "neutral"],
-        max_seq_length=64,
+        max_seq_length=config.max_seq_len,
-        batch_size=32,
+        batch_size=config.batch_size,
        line_processor=mnli_line_processor)
-    num_train_examples = len(bert_dataloader.dataset)
+    test_dataloader = BertDataLoader(
+        "./data/glue_data/MNLI/dev_matched.tsv",
+        tokenizer, ["contradiction", "entailment", "neutral"],
+        max_seq_length=config.max_seq_len,
+        batch_size=config.batch_size,
+        line_processor=mnli_line_processor,
+        shuffle=False,
+        phase="predict")
+    trainer_count = fluid.dygraph.parallel.Env().nranks
+    num_train_examples = len(train_dataloader.dataset)
    max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count
    warmup_steps = int(max_train_steps * config.warmup_proportion)
@@ -82,7 +136,6 @@ def train():
        config,
        bert_config,
        len(["contradiction", "entailment", "neutral"]),
-        is_training=True,
        return_pooled_out=True)
    optimizer = Optimizer(
@@ -106,10 +159,15 @@ def train():
    cls_model.bert_layer.init_parameters(
        config.init_pretraining_params, verbose=config.verbose)
-    cls_model.fit(train_data=bert_dataloader.dataloader, epochs=config.epoch)
+    # do train
+    cls_model.fit(train_data=train_dataloader.dataloader,
+                  epochs=config.epoch,
+                  save_dir=config.checkpoints)
-    return cls_model
+    # do eval
+    cls_model.evaluate(
+        eval_data=test_dataloader.dataloader, batch_size=config.batch_size)
 if __name__ == '__main__':
-    cls_model = train()
+    main()
--- a/examples/bert/cls.py
+++ b/examples/bert/cls.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"dygraph transformer layers"
-import six
-import json
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.dygraph import Linear, Layer
-from hapi.text.bert import BertEncoder
-from hapi.model import Model
-class ClsModelLayer(Model):
-    """
-    classify model
-    """
-    def __init__(self,
-                 args,
-                 config,
-                 num_labels,
-                 is_training=True,
-                 return_pooled_out=True,
-                 use_fp16=False):
-        super(ClsModelLayer, self).__init__()
-        self.config = config
-        self.is_training = is_training
-        self.use_fp16 = use_fp16
-        self.loss_scaling = args.loss_scaling
-        self.bert_layer = BertEncoder(
-            config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
-        self.cls_fc = Linear(
-            input_dim=self.config["hidden_size"],
-            output_dim=num_labels,
-            param_attr=fluid.ParamAttr(
-                name="cls_out_w",
-                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
-            bias_attr=fluid.ParamAttr(
-                name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
-    def forward(self, src_ids, position_ids, sentence_ids, input_mask):
-        """
-        forward
-        """
-        enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
-                                                     sentence_ids, input_mask)
-        cls_feats = fluid.layers.dropout(
-            x=next_sent_feat,
-            dropout_prob=0.1,
-            dropout_implementation="upscale_in_train")
-        logits = self.cls_fc(cls_feats)
-        return logits
--- a/examples/bert_leveldb/bert.yaml
+++ b/examples/bert_leveldb/bert.yaml
@@ -18,7 +18,7 @@ batch_size: 32
 in_tokens: False
 do_lower_case: True
 random_seed: 5512
-use_cuda: False
+use_cuda: True
 shuffle: True
 do_train: True
 do_test: True

--- a/examples/bert_leveldb/bert_classifier.py
+++ b/examples/bert_leveldb/bert_classifier.py
@@ -16,14 +16,60 @@
 import paddle.fluid as fluid
 from hapi.metrics import Accuracy
 from hapi.configure import Config
+from hapi.text.bert import BertEncoder
+from paddle.fluid.dygraph import Linear, Layer
 from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input
-from cls import ClsModelLayer
 import hapi.text.tokenizer.tokenization as tokenization
 from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
-def train():
+class ClsModelLayer(Model):
+    """
+    classify model
+    """
+    def __init__(self,
+                 args,
+                 config,
+                 num_labels,
+                 return_pooled_out=True,
+                 use_fp16=False):
+        super(ClsModelLayer, self).__init__()
+        self.config = config
+        self.use_fp16 = use_fp16
+        self.loss_scaling = args.loss_scaling
+        self.bert_layer = BertEncoder(
+            config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
+        self.cls_fc = Linear(
+            input_dim=self.config["hidden_size"],
+            output_dim=num_labels,
+            param_attr=fluid.ParamAttr(
+                name="cls_out_w",
+                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
+            bias_attr=fluid.ParamAttr(
+                name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
+    def forward(self, src_ids, position_ids, sentence_ids, input_mask):
+        """
+        forward
+        """
+        enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
+                                                     sentence_ids, input_mask)
+        cls_feats = fluid.layers.dropout(
+            x=next_sent_feat,
+            dropout_prob=0.1,
+            dropout_implementation="upscale_in_train")
+        pred = self.cls_fc(cls_feats)
+        return pred
+def main():
    config = Config(yaml_file="./bert.yaml")
    config.build()
@@ -35,8 +81,6 @@ def train():
    bert_config = BertConfig(config.bert_config_path)
    bert_config.print_config()
-    trainer_count = fluid.dygraph.parallel.Env().nranks
    tokenizer = tokenization.FullTokenizer(
        vocab_file=config.vocab_path, do_lower_case=config.do_lower_case)
@@ -52,15 +96,26 @@ def train():
        return BertInputExample(
            uid=uid, text_a=text_a, text_b=text_b, label=label)
-    bert_dataloader = BertDataLoader(
+    train_dataloader = BertDataLoader(
        "./data/glue_data/MNLI/train.tsv",
        tokenizer, ["contradiction", "entailment", "neutral"],
-        max_seq_length=64,
+        max_seq_length=config.max_seq_len,
-        batch_size=32,
+        batch_size=config.batch_size,
        line_processor=mnli_line_processor,
-        mode="leveldb")
+        mode="leveldb",
+        phase="train")
-    num_train_examples = len(bert_dataloader.dataset)
+    test_dataloader = BertDataLoader(
+        "./data/glue_data/MNLI/dev_matched.tsv",
+        tokenizer, ["contradiction", "entailment", "neutral"],
+        max_seq_length=config.max_seq_len,
+        batch_size=config.batch_size,
+        line_processor=mnli_line_processor,
+        shuffle=False,
+        phase="predict")
+    trainer_count = fluid.dygraph.parallel.Env().nranks
+    num_train_examples = len(train_dataloader.dataset)
    max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count
    warmup_steps = int(max_train_steps * config.warmup_proportion)
@@ -83,7 +138,6 @@ def train():
        config,
        bert_config,
        len(["contradiction", "entailment", "neutral"]),
-        is_training=True,
        return_pooled_out=True)
    optimizer = Optimizer(
@@ -107,10 +161,15 @@ def train():
    cls_model.bert_layer.init_parameters(
        config.init_pretraining_params, verbose=config.verbose)
-    cls_model.fit(train_data=bert_dataloader.dataloader, epochs=config.epoch)
+    # do train
+    cls_model.fit(train_data=train_dataloader.dataloader,
+                  epochs=config.epoch,
+                  save_dir=config.checkpoints)
-    return cls_model
+    # do eval
+    cls_model.evaluate(
+        eval_data=test_dataloader.dataloader, batch_size=config.batch_size)
 if __name__ == '__main__':
-    cls_model = train()
+    main()
--- a/examples/bert_leveldb/cls.py
+++ b/examples/bert_leveldb/cls.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"dygraph transformer layers"
-import six
-import json
-import numpy as np
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.dygraph import Linear, Layer
-from hapi.text.bert import BertEncoder
-from hapi.model import Model
-class ClsModelLayer(Model):
-    """
-    classify model
-    """
-    def __init__(self,
-                 args,
-                 config,
-                 num_labels,
-                 is_training=True,
-                 return_pooled_out=True,
-                 use_fp16=False):
-        super(ClsModelLayer, self).__init__()
-        self.config = config
-        self.is_training = is_training
-        self.use_fp16 = use_fp16
-        self.loss_scaling = args.loss_scaling
-        self.bert_layer = BertEncoder(
-            config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
-        self.cls_fc = Linear(
-            input_dim=self.config["hidden_size"],
-            output_dim=num_labels,
-            param_attr=fluid.ParamAttr(
-                name="cls_out_w",
-                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
-            bias_attr=fluid.ParamAttr(
-                name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
-    def forward(self, src_ids, position_ids, sentence_ids, input_mask):
-        """
-        forward
-        """
-        enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
-                                                     sentence_ids, input_mask)
-        cls_feats = fluid.layers.dropout(
-            x=next_sent_feat,
-            dropout_prob=0.1,
-            dropout_implementation="upscale_in_train")
-        logits = self.cls_fc(cls_feats)
-        return logits
--- a/examples/bert_leveldb/nohup.out
+++ b/examples/bert_leveldb/nohup.out
--- a/examples/bert_leveldb/run_classifier_multi_gpu.sh
+++ b/examples/bert_leveldb/run_classifier_multi_gpu.sh
+#!/bin/bash
+BERT_BASE_PATH="./data/pretrained_models/uncased_L-12_H-768_A-12/"
+TASK_NAME='MNLI'
+DATA_PATH="./data/glue_data/MNLI/"
+CKPT_PATH="./data/saved_model/mnli_models"
+# start fine-tuning
+python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=0,1,2,3 bert_classifier.py\
+    --use_cuda true \
+    --do_train true \
+    --do_test true \
+    --batch_size 64 \
+    --init_pretraining_params ${BERT_BASE_PATH}/dygraph_params/ \
+    --data_dir ${DATA_PATH} \
+    --vocab_path ${BERT_BASE_PATH}/vocab.txt \
+    --checkpoints ${CKPT_PATH} \
+    --save_steps 1000 \
+    --weight_decay  0.01 \
+    --warmup_proportion 0.1 \
+    --validation_steps 100 \
+    --epoch 3 \
+    --max_seq_len 128 \
+    --bert_config_path ${BERT_BASE_PATH}/bert_config.json \
+    --learning_rate 5e-5 \
+    --skip_steps 10 \
+    --shuffle true
--- a/examples/bert_leveldb/run_classifier_single_gpu.sh
+++ b/examples/bert_leveldb/run_classifier_single_gpu.sh
@@ -4,7 +4,7 @@ TASK_NAME='MNLI'
 DATA_PATH="./data/glue_data/MNLI/"
 CKPT_PATH="./data/saved_model/mnli_models"
-export CUDA_VISIBLE_DEVICES=7
+export CUDA_VISIBLE_DEVICES=0
 # start fine-tuning
 python3.7 bert_classifier.py\

--- a/examples/bmn/bmn_metric.py
+++ b/examples/bmn/bmn_metric.py
@@ -47,10 +47,15 @@ class BmnMetric(Metric):
            if not os.path.isdir(self.cfg.INFER.result_path):
                os.makedirs(self.cfg.INFER.result_path)
-    def add_metric_op(self, preds, label):
+    def add_metric_op(self, *args):
-        pred_bm, pred_start, pred_en = preds
+        if self.mode == 'test':
-        video_index = label[-1]
+            # only extract pred_bm, pred_start, pred_en from outputs
-        return [pred_bm, pred_start, pred_en, video_index]  #return list
+            # and video_index from label here
+            pred_bm, pred_start, pred_en, _, _, _, video_index = args
+        else:
+            # in infer mode, labels only contains video_index
+            pred_bm, pred_start, pred_en, video_index = args
+        return pred_bm, pred_start, pred_en, video_index
    def update(self, pred_bm, pred_start, pred_end, fid):
        # generate proposals

--- a/examples/bmn/eval.py
+++ b/examples/bmn/eval.py
@@ -19,7 +19,8 @@ import logging
 import paddle.fluid as fluid
 from hapi.model import set_device, Input
-from hapi.vision.models import bmn, BmnLoss
+from modeling import bmn, BmnLoss
 from bmn_metric import BmnMetric
 from reader import BmnDataset
 from config_utils import *

--- a/hapi/vision/models/bmn_model.py
+++ b/hapi/vision/models/bmn_model.py
@@ -26,7 +26,7 @@ DATATYPE = 'float32'
 pretrain_infos = {
    'bmn': ('https://paddlemodels.bj.bcebos.com/hapi/bmn.pdparams',
-            '9286c821acc4cad46d6613b931ba468c')
+            'aa84e3386e1fbd117fb96fa572feeb94')
 }
@@ -462,5 +462,5 @@ def bmn(tscale,
        weight_path = get_weights_path(*(pretrain_infos['bmn']))
        assert weight_path.endswith('.pdparams'), \
                "suffix of weight must be .pdparams"
-        model.load(weight_path[:-9])
+        model.load(weight_path)
    return model
--- a/examples/bmn/predict.py
+++ b/examples/bmn/predict.py
@@ -19,7 +19,8 @@ import logging
 import paddle.fluid as fluid
 from hapi.model import set_device, Input
-from hapi.vision.models import bmn, BmnLoss
+from modeling import bmn, BmnLoss
 from bmn_metric import BmnMetric
 from reader import BmnDataset
 from config_utils import *

--- a/examples/bmn/train.py
+++ b/examples/bmn/train.py
@@ -19,9 +19,10 @@ import sys
 import os
 from hapi.model import set_device, Input
-from hapi.vision.models import bmn, BmnLoss
 from reader import BmnDataset
 from config_utils import *
+from modeling import bmn, BmnLoss
 DATATYPE = 'float32'

--- a/examples/ocr/README.md
+++ b/examples/ocr/README.md
+简介
+--------
+本OCR任务是识别图片单行的字母信息，基于attention的seq2seq结构。 运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。
+## 代码结构
+```
+.
+|-- data.py          # 数据读取
+|-- eval.py          # 评估脚本
+|-- images           # 测试图片
+|-- predict.py       # 预测脚本
+|-- seq2seq_attn.py  # 模型
+|-- train.py         # 训练脚本
+`-- utility.py       # 公共模块
+```
+## 训练/评估/预测流程
+- 设置GPU环境:
+```
+export CUDA_VISIBLE_DEVICES=0
+```
+- 训练
+```
+python train.py
+```
+更多参数可以通过`--help`查看。
+- 动静切换
+```
+python train.py --dynamic=True
+```
+- 评估
+```
+python eval.py --init_model=checkpoint/final
+```
+- 预测
+目前不支持动态图预测
+```
+python predict.py --init_model=checkpoint/final --image_path=images/ --dynamic=False --beam_size=3
+```
+预测结果如下:
+```
+Image 1: images/112_chubbiness_13557.jpg
+0: chubbines
+1: chubbiness
+2: chubbinesS
+Image 2: images/177_Interfiled_40185.jpg
+0: Interflied
+1: Interfiled
+2: InterfIled
+Image 3: images/325_dame_19109.jpg
+0: da
+1: damo
+2: dame
+Image 4: images/368_fixtures_29232.jpg
+0: firtures
+1: Firtures
+2: fixtures
+```
--- a/examples/ocr/data.py
+++ b/examples/ocr/data.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from os import path
+import random
+import traceback
+import copy
+import math
+import tarfile
+from PIL import Image
+import logging
+logger = logging.getLogger(__name__)
+import paddle
+from paddle import fluid
+from paddle.fluid.dygraph.parallel import ParallelEnv
+DATA_MD5 = "7256b1d5420d8c3e74815196e58cdad5"
+DATA_URL = "http://paddle-ocr-data.bj.bcebos.com/data.tar.gz"
+CACHE_DIR_NAME = "attention_data"
+SAVED_FILE_NAME = "data.tar.gz"
+DATA_DIR_NAME = "data"
+TRAIN_DATA_DIR_NAME = "train_images"
+TEST_DATA_DIR_NAME = "test_images"
+TRAIN_LIST_FILE_NAME = "train.list"
+TEST_LIST_FILE_NAME = "test.list"
+class Resize(object):
+    def __init__(self, height=48):
+        self.interp = Image.NEAREST  # Image.ANTIALIAS
+        self.height = height
+    def __call__(self, samples):
+        shape = samples[0][0].size
+        for i in range(len(samples)):
+            im = samples[i][0]
+            im = im.resize((shape[0], self.height), self.interp)
+            samples[i][0] = im
+        return samples
+class Normalize(object):
+    def __init__(self,
+                 mean=[127.5],
+                 std=[1.0],
+                 scale=False,
+                 channel_first=True):
+        self.mean = mean
+        self.std = std
+        self.scale = scale
+        self.channel_first = channel_first
+        if not (isinstance(self.mean, list) and isinstance(self.std, list) and
+                isinstance(self.scale, bool)):
+            raise TypeError("{}: input type is invalid.".format(self))
+    def __call__(self, samples):
+        for i in range(len(samples)):
+            im = samples[i][0]
+            im = np.array(im).astype(np.float32, copy=False)
+            im = im[np.newaxis, ...]
+            mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
+            std = np.array(self.std)[np.newaxis, np.newaxis, :]
+            if self.scale:
+                im = im / 255.0
+            #im -= mean
+            im -= 127.5
+            #im /= std
+            samples[i][0] = im
+        return samples
+class PadTarget(object):
+    def __init__(self, SOS=0, EOS=1):
+        self.SOS = SOS
+        self.EOS = EOS
+    def __call__(self, samples):
+        lens = np.array([len(s[1]) for s in samples], dtype="int64")
+        max_len = np.max(lens)
+        for i in range(len(samples)):
+            label = samples[i][1]
+            if max_len > len(label):
+                pad_label = label + [self.EOS] * (max_len - len(label))
+            else:
+                pad_label = label
+            samples[i][1] = np.array([self.SOS] + pad_label, dtype='int64')
+            # label_out
+            samples[i].append(np.array(pad_label + [self.EOS], dtype='int64'))
+            mask = np.zeros((max_len + 1)).astype('float32')
+            mask[:len(label) + 1] = 1.0
+            # mask
+            samples[i].append(np.array(mask, dtype='float32'))
+        return samples
+class BatchSampler(fluid.io.BatchSampler):
+    def __init__(self,
+                 dataset,
+                 batch_size,
+                 shuffle=False,
+                 drop_last=True,
+                 seed=None):
+        self._dataset = dataset
+        self._batch_size = batch_size
+        self._shuffle = shuffle
+        self._drop_last = drop_last
+        self._random = np.random
+        self._random.seed(seed)
+        self._nranks = ParallelEnv().nranks
+        self._local_rank = ParallelEnv().local_rank
+        self._device_id = ParallelEnv().dev_id
+        self._num_samples = int(
+            math.ceil(len(self._dataset) * 1.0 / self._nranks))
+        self._total_size = self._num_samples * self._nranks
+        self._epoch = 0
+    def __iter__(self):
+        infos = copy.copy(self._dataset._sample_infos)
+        skip_num = 0
+        if self._shuffle:
+            if self._batch_size == 1:
+                self._random.RandomState(self._epoch).shuffle(infos)
+            else:  # partial shuffle
+                infos = sorted(infos, key=lambda x: x.w)
+                skip_num = random.randint(1, 100)
+        infos = infos[skip_num:] + infos[:skip_num]
+        infos += infos[:(self._total_size - len(infos))]
+        last_size = self._total_size % (self._batch_size * self._nranks)
+        batches = []
+        for i in range(self._local_rank * self._batch_size,
+                       len(infos) - last_size,
+                       self._batch_size * self._nranks):
+            batches.append(infos[i:i + self._batch_size])
+        if (not self._drop_last) and last_size != 0:
+            last_local_size = last_size // self._nranks
+            last_infos = infos[len(infos) - last_size:]
+            start = self._local_rank * last_local_size
+            batches.append(last_infos[start:start + last_local_size])
+        if self._shuffle:
+            self._random.RandomState(self._epoch).shuffle(batches)
+            self._epoch += 1
+        for batch in batches:
+            batch_indices = [info.idx for info in batch]
+            yield batch_indices
+    def __len__(self):
+        if self._drop_last:
+            return self._total_size // self._batch_size
+        else:
+            return math.ceil(self._total_size / float(self._batch_size))
+class SampleInfo(object):
+    def __init__(self, idx, h, w, im_name, labels):
+        self.idx = idx
+        self.h = h
+        self.w = w
+        self.im_name = im_name
+        self.labels = labels
+class OCRDataset(paddle.io.Dataset):
+    def __init__(self, image_dir, anno_file):
+        self.image_dir = image_dir
+        self.anno_file = anno_file
+        self._sample_infos = []
+        with open(anno_file, 'r') as f:
+            for i, line in enumerate(f):
+                w, h, im_name, labels = line.strip().split(' ')
+                h, w = int(h), int(w)
+                labels = [int(c) for c in labels.split(',')]
+                self._sample_infos.append(SampleInfo(i, h, w, im_name, labels))
+    def __getitem__(self, idx):
+        info = self._sample_infos[idx]
+        im_name, labels = info.im_name, info.labels
+        image = Image.open(path.join(self.image_dir, im_name)).convert('L')
+        return [image, labels]
+    def __len__(self):
+        return len(self._sample_infos)
+def train(
+        root_dir=None,
+        images_dir=None,
+        anno_file=None,
+        shuffle=True, ):
+    if root_dir is None:
+        root_dir = download_data()
+    if images_dir is None:
+        images_dir = TRAIN_DATA_DIR_NAME
+    images_dir = path.join(root_dir, TRAIN_DATA_DIR_NAME)
+    if anno_file is None:
+        anno_file = TRAIN_LIST_FILE_NAME
+    anno_file = path.join(root_dir, TRAIN_LIST_FILE_NAME)
+    return OCRDataset(images_dir, anno_file)
+def test(
+        root_dir=None,
+        images_dir=None,
+        anno_file=None,
+        shuffle=True, ):
+    if root_dir is None:
+        root_dir = download_data()
+    if images_dir is None:
+        images_dir = TEST_DATA_DIR_NAME
+    images_dir = path.join(root_dir, TEST_DATA_DIR_NAME)
+    if anno_file is None:
+        anno_file = TEST_LIST_FILE_NAME
+    anno_file = path.join(root_dir, TEST_LIST_FILE_NAME)
+    return OCRDataset(images_dir, anno_file)
+def download_data():
+    '''Download train and test data.
+    '''
+    tar_file = paddle.dataset.common.download(
+        DATA_URL, CACHE_DIR_NAME, DATA_MD5, save_name=SAVED_FILE_NAME)
+    data_dir = path.join(path.dirname(tar_file), DATA_DIR_NAME)
+    if not path.isdir(data_dir):
+        t = tarfile.open(tar_file, "r:gz")
+        t.extractall(path=path.dirname(tar_file))
+        t.close()
+    return data_dir
--- a/examples/ocr/eval.py
+++ b/examples/ocr/eval.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import argparse
+import functools
+import paddle.fluid.profiler as profiler
+import paddle.fluid as fluid
+from hapi.model import Input, set_device
+from hapi.vision.transforms import BatchCompose
+from utility import add_arguments, print_arguments
+from utility import SeqAccuracy, LoggerCallBack, SeqBeamAccuracy
+from utility import postprocess
+from seq2seq_attn import Seq2SeqAttModel, Seq2SeqAttInferModel, WeightCrossEntropy
+import data
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('batch_size',        int,   32,                 "Minibatch size.")
+add_arg('test_images',       str,   None,               "The directory of images to be used for test.")
+add_arg('test_list',         str,   None,               "The list file of images to be used for training.")
+add_arg('init_model',        str,   'checkpoint/final', "The init model file of directory.")
+add_arg('use_gpu',           bool,  True,               "Whether use GPU to train.")
+add_arg('encoder_size',      int,   200,                "Encoder size.")
+add_arg('decoder_size',      int,   128,                "Decoder size.")
+add_arg('embedding_dim',     int,   128,                "Word vector dim.")
+add_arg('num_classes',       int,   95,                 "Number classes.")
+add_arg('beam_size',         int,   0,                  "If set beam size, will use beam search.")
+add_arg('dynamic',           bool,  False,              "Whether to use dygraph.")
+# yapf: enable
+def main(FLAGS):
+    device = set_device("gpu" if FLAGS.use_gpu else "cpu")
+    fluid.enable_dygraph(device) if FLAGS.dynamic else None
+    model = Seq2SeqAttModel(
+        encoder_size=FLAGS.encoder_size,
+        decoder_size=FLAGS.decoder_size,
+        emb_dim=FLAGS.embedding_dim,
+        num_classes=FLAGS.num_classes)
+    # yapf: disable
+    inputs = [
+        Input([None, 1, 48, 384], "float32", name="pixel"),
+        Input([None, None], "int64", name="label_in")
+    ]
+    labels = [
+        Input([None, None], "int64", name="label_out"),
+        Input([None, None], "float32", name="mask")
+    ]
+    # yapf: enable
+    model.prepare(
+        loss_function=WeightCrossEntropy(),
+        metrics=SeqAccuracy(),
+        inputs=inputs,
+        labels=labels,
+        device=device)
+    model.load(FLAGS.init_model)
+    test_dataset = data.test()
+    test_collate_fn = BatchCompose(
+        [data.Resize(), data.Normalize(), data.PadTarget()])
+    test_sampler = data.BatchSampler(
+        test_dataset,
+        batch_size=FLAGS.batch_size,
+        drop_last=False,
+        shuffle=False)
+    test_loader = fluid.io.DataLoader(
+        test_dataset,
+        batch_sampler=test_sampler,
+        places=device,
+        num_workers=0,
+        return_list=True,
+        collate_fn=test_collate_fn)
+    model.evaluate(
+        eval_data=test_loader,
+        callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
+def beam_search(FLAGS):
+    device = set_device("gpu" if FLAGS.use_gpu else "cpu")
+    fluid.enable_dygraph(device) if FLAGS.dynamic else None
+    model = Seq2SeqAttInferModel(
+        encoder_size=FLAGS.encoder_size,
+        decoder_size=FLAGS.decoder_size,
+        emb_dim=FLAGS.embedding_dim,
+        num_classes=FLAGS.num_classes,
+        beam_size=FLAGS.beam_size)
+    inputs = [
+        Input(
+            [None, 1, 48, 384], "float32", name="pixel"), Input(
+                [None, None], "int64", name="label_in")
+    ]
+    labels = [
+        Input(
+            [None, None], "int64", name="label_out"), Input(
+                [None, None], "float32", name="mask")
+    ]
+    model.prepare(
+        loss_function=None,
+        metrics=SeqBeamAccuracy(),
+        inputs=inputs,
+        labels=labels,
+        device=device)
+    model.load(FLAGS.init_model)
+    test_dataset = data.test()
+    test_collate_fn = BatchCompose(
+        [data.Resize(), data.Normalize(), data.PadTarget()])
+    test_sampler = data.BatchSampler(
+        test_dataset,
+        batch_size=FLAGS.batch_size,
+        drop_last=False,
+        shuffle=False)
+    test_loader = fluid.io.DataLoader(
+        test_dataset,
+        batch_sampler=test_sampler,
+        places=device,
+        num_workers=0,
+        return_list=True,
+        collate_fn=test_collate_fn)
+    model.evaluate(
+        eval_data=test_loader,
+        callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
+if __name__ == '__main__':
+    FLAGS = parser.parse_args()
+    print_arguments(FLAGS)
+    if FLAGS.beam_size:
+        beam_search(FLAGS)
+    else:
+        main(FLAGS)
--- a/examples/ocr/images/112_chubbiness_13557.jpg
+++ b/examples/ocr/images/112_chubbiness_13557.jpg
--- a/examples/ocr/images/177_Interfiled_40185.jpg
+++ b/examples/ocr/images/177_Interfiled_40185.jpg
--- a/examples/ocr/images/325_dame_19109.jpg
+++ b/examples/ocr/images/325_dame_19109.jpg
--- a/examples/ocr/images/368_fixtures_29232.jpg
+++ b/examples/ocr/images/368_fixtures_29232.jpg
--- a/examples/ocr/predict.py
+++ b/examples/ocr/predict.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import os
+import sys
+import random
+import numpy as np
+import argparse
+import functools
+from PIL import Image
+import paddle.fluid.profiler as profiler
+import paddle.fluid as fluid
+from hapi.model import Input, set_device
+from hapi.datasets.folder import ImageFolder
+from hapi.vision.transforms import BatchCompose
+from utility import add_arguments, print_arguments
+from utility import postprocess, index2word
+from seq2seq_attn import Seq2SeqAttInferModel, WeightCrossEntropy
+import data
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('batch_size',        int,   1,       "Minibatch size.")
+add_arg('image_path',        str,   None,    "The directory of images to be used for test.")
+add_arg('init_model',        str,   None,    "The init model file of directory.")
+add_arg('use_gpu',           bool,  True,    "Whether use GPU to train.")
+# model hyper paramters
+add_arg('encoder_size',      int,   200,     "Encoder size.")
+add_arg('decoder_size',      int,   128,     "Decoder size.")
+add_arg('embedding_dim',     int,   128,     "Word vector dim.")
+add_arg('num_classes',       int,   95,      "Number classes.")
+add_arg('beam_size',         int,   3,       "Beam size for beam search.")
+add_arg('dynamic',           bool,  False,   "Whether to use dygraph.")
+# yapf: enable
+def main(FLAGS):
+    device = set_device("gpu" if FLAGS.use_gpu else "cpu")
+    fluid.enable_dygraph(device) if FLAGS.dynamic else None
+    model = Seq2SeqAttInferModel(
+        encoder_size=FLAGS.encoder_size,
+        decoder_size=FLAGS.decoder_size,
+        emb_dim=FLAGS.embedding_dim,
+        num_classes=FLAGS.num_classes,
+        beam_size=FLAGS.beam_size)
+    inputs = [Input([None, 1, 48, 384], "float32", name="pixel"), ]
+    model.prepare(inputs=inputs, device=device)
+    model.load(FLAGS.init_model)
+    fn = lambda p: Image.open(p).convert('L')
+    test_dataset = ImageFolder(FLAGS.image_path, loader=fn)
+    test_collate_fn = BatchCompose([data.Resize(), data.Normalize()])
+    test_loader = fluid.io.DataLoader(
+        test_dataset,
+        places=device,
+        num_workers=0,
+        return_list=True,
+        collate_fn=test_collate_fn)
+    samples = test_dataset.samples
+    #outputs = model.predict(test_loader)
+    ins_id = 0
+    for image, in test_loader:
+        image = image if FLAGS.dynamic else image[0]
+        pred = model.test_batch([image])[0]
+        pred = pred[:, :, np.newaxis] if len(pred.shape) == 2 else pred
+        pred = np.transpose(pred, [0, 2, 1])
+        for ins in pred:
+            impath = samples[ins_id]
+            ins_id += 1
+            print('Image {}: {}'.format(ins_id, impath))
+            for beam_idx, beam in enumerate(ins):
+                id_list = postprocess(beam)
+                word_list = index2word(id_list)
+                sequence = "".join(word_list)
+                print('{}: {}'.format(beam_idx, sequence))
+if __name__ == '__main__':
+    FLAGS = parser.parse_args()
+    print_arguments(FLAGS)
+    main(FLAGS)
--- a/examples/ocr/seq2seq_attn.py
+++ b/examples/ocr/seq2seq_attn.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import numpy as np
+import paddle.fluid as fluid
+import paddle.fluid.layers as layers
+from paddle.fluid.layers import BeamSearchDecoder
+from hapi.text import RNNCell, RNN, DynamicDecode
+from hapi.model import Model, Loss
+class ConvBNPool(fluid.dygraph.Layer):
+    def __init__(self,
+                 in_ch,
+                 out_ch,
+                 act="relu",
+                 is_test=False,
+                 pool=True,
+                 use_cudnn=True):
+        super(ConvBNPool, self).__init__()
+        self.pool = pool
+        filter_size = 3
+        std = (2.0 / (filter_size**2 * in_ch))**0.5
+        param_0 = fluid.ParamAttr(
+            initializer=fluid.initializer.Normal(0.0, std))
+        std = (2.0 / (filter_size**2 * out_ch))**0.5
+        param_1 = fluid.ParamAttr(
+            initializer=fluid.initializer.Normal(0.0, std))
+        self.conv0 = fluid.dygraph.Conv2D(
+            in_ch,
+            out_ch,
+            3,
+            padding=1,
+            param_attr=param_0,
+            bias_attr=False,
+            act=None,
+            use_cudnn=use_cudnn)
+        self.bn0 = fluid.dygraph.BatchNorm(out_ch, act=act)
+        self.conv1 = fluid.dygraph.Conv2D(
+            out_ch,
+            out_ch,
+            filter_size=3,
+            padding=1,
+            param_attr=param_1,
+            bias_attr=False,
+            act=None,
+            use_cudnn=use_cudnn)
+        self.bn1 = fluid.dygraph.BatchNorm(out_ch, act=act)
+        if self.pool:
+            self.pool = fluid.dygraph.Pool2D(
+                pool_size=2,
+                pool_type='max',
+                pool_stride=2,
+                use_cudnn=use_cudnn,
+                ceil_mode=True)
+    def forward(self, inputs):
+        out = self.conv0(inputs)
+        out = self.bn0(out)
+        out = self.conv1(out)
+        out = self.bn1(out)
+        if self.pool:
+            out = self.pool(out)
+        return out
+class CNN(fluid.dygraph.Layer):
+    def __init__(self, in_ch=1, is_test=False):
+        super(CNN, self).__init__()
+        self.conv_bn1 = ConvBNPool(in_ch, 16)
+        self.conv_bn2 = ConvBNPool(16, 32)
+        self.conv_bn3 = ConvBNPool(32, 64)
+        self.conv_bn4 = ConvBNPool(64, 128, pool=False)
+    def forward(self, inputs):
+        conv = self.conv_bn1(inputs)
+        conv = self.conv_bn2(conv)
+        conv = self.conv_bn3(conv)
+        conv = self.conv_bn4(conv)
+        return conv
+class GRUCell(RNNCell):
+    def __init__(self,
+                 input_size,
+                 hidden_size,
+                 param_attr=None,
+                 bias_attr=None,
+                 gate_activation='sigmoid',
+                 candidate_activation='tanh',
+                 origin_mode=False):
+        super(GRUCell, self).__init__()
+        self.hidden_size = hidden_size
+        self.fc_layer = fluid.dygraph.Linear(
+            input_size,
+            hidden_size * 3,
+            param_attr=param_attr,
+            bias_attr=False)
+        self.gru_unit = fluid.dygraph.GRUUnit(
+            hidden_size * 3,
+            param_attr=param_attr,
+            bias_attr=bias_attr,
+            activation=candidate_activation,
+            gate_activation=gate_activation,
+            origin_mode=origin_mode)
+    def forward(self, inputs, states):
+        # step_outputs, new_states = cell(step_inputs, states)
+        # for GRUCell, `step_outputs` and `new_states` both are hidden
+        x = self.fc_layer(inputs)
+        hidden, _, _ = self.gru_unit(x, states)
+        return hidden, hidden
+    @property
+    def state_shape(self):
+        return [self.hidden_size]
+class Encoder(fluid.dygraph.Layer):
+    def __init__(
+            self,
+            in_channel=1,
+            rnn_hidden_size=200,
+            decoder_size=128,
+            is_test=False, ):
+        super(Encoder, self).__init__()
+        self.rnn_hidden_size = rnn_hidden_size
+        self.backbone = CNN(in_ch=in_channel, is_test=is_test)
+        para_attr = fluid.ParamAttr(
+            initializer=fluid.initializer.Normal(0.0, 0.02))
+        bias_attr = fluid.ParamAttr(
+            initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0)
+        self.gru_fwd = RNN(cell=GRUCell(
+            input_size=128 * 6,
+            hidden_size=rnn_hidden_size,
+            param_attr=para_attr,
+            bias_attr=bias_attr,
+            candidate_activation='relu'),
+                           is_reverse=False,
+                           time_major=False)
+        self.gru_bwd = RNN(cell=GRUCell(
+            input_size=128 * 6,
+            hidden_size=rnn_hidden_size,
+            param_attr=para_attr,
+            bias_attr=bias_attr,
+            candidate_activation='relu'),
+                           is_reverse=True,
+                           time_major=False)
+        self.encoded_proj_fc = fluid.dygraph.Linear(
+            rnn_hidden_size * 2, decoder_size, bias_attr=False)
+    def forward(self, inputs):
+        conv_features = self.backbone(inputs)
+        conv_features = fluid.layers.transpose(
+            conv_features, perm=[0, 3, 1, 2])
+        n, w, c, h = conv_features.shape
+        seq_feature = fluid.layers.reshape(conv_features, [0, -1, c * h])
+        gru_fwd, _ = self.gru_fwd(seq_feature)
+        gru_bwd, _ = self.gru_bwd(seq_feature)
+        encoded_vector = fluid.layers.concat(input=[gru_fwd, gru_bwd], axis=2)
+        encoded_proj = self.encoded_proj_fc(encoded_vector)
+        return gru_bwd, encoded_vector, encoded_proj
+class Attention(fluid.dygraph.Layer):
+    """
+    Neural Machine Translation by Jointly Learning to Align and Translate.
+    https://arxiv.org/abs/1409.0473
+    """
+    def __init__(self, decoder_size):
+        super(Attention, self).__init__()
+        self.fc1 = fluid.dygraph.Linear(
+            decoder_size, decoder_size, bias_attr=False)
+        self.fc2 = fluid.dygraph.Linear(decoder_size, 1, bias_attr=False)
+    def forward(self, encoder_vec, encoder_proj, decoder_state):
+        # alignment model, single-layer multilayer perceptron
+        decoder_state = self.fc1(decoder_state)
+        decoder_state = fluid.layers.unsqueeze(decoder_state, [1])
+        e = fluid.layers.elementwise_add(encoder_proj, decoder_state)
+        e = fluid.layers.tanh(e)
+        att_scores = self.fc2(e)
+        att_scores = fluid.layers.squeeze(att_scores, [2])
+        att_scores = fluid.layers.softmax(att_scores)
+        context = fluid.layers.elementwise_mul(
+            x=encoder_vec, y=att_scores, axis=0)
+        context = fluid.layers.reduce_sum(context, dim=1)
+        return context
+class DecoderCell(RNNCell):
+    def __init__(self, encoder_size=200, decoder_size=128):
+        super(DecoderCell, self).__init__()
+        self.attention = Attention(decoder_size)
+        self.gru_cell = GRUCell(
+            input_size=encoder_size * 2 + decoder_size,
+            hidden_size=decoder_size)
+    def forward(self, current_word, states, encoder_vec, encoder_proj):
+        context = self.attention(encoder_vec, encoder_proj, states)
+        decoder_inputs = fluid.layers.concat([current_word, context], axis=1)
+        hidden, _ = self.gru_cell(decoder_inputs, states)
+        return hidden, hidden
+class Decoder(fluid.dygraph.Layer):
+    def __init__(self, num_classes, emb_dim, encoder_size, decoder_size):
+        super(Decoder, self).__init__()
+        self.decoder_attention = RNN(DecoderCell(encoder_size, decoder_size))
+        self.fc = fluid.dygraph.Linear(
+            decoder_size, num_classes + 2, act='softmax')
+    def forward(self, target, initial_states, encoder_vec, encoder_proj):
+        out, _ = self.decoder_attention(
+            target,
+            initial_states=initial_states,
+            encoder_vec=encoder_vec,
+            encoder_proj=encoder_proj)
+        pred = self.fc(out)
+        return pred
+class Seq2SeqAttModel(Model):
+    def __init__(
+            self,
+            in_channle=1,
+            encoder_size=200,
+            decoder_size=128,
+            emb_dim=128,
+            num_classes=None, ):
+        super(Seq2SeqAttModel, self).__init__()
+        self.encoder = Encoder(in_channle, encoder_size, decoder_size)
+        self.fc = fluid.dygraph.Linear(
+            input_dim=encoder_size,
+            output_dim=decoder_size,
+            bias_attr=False,
+            act='relu')
+        self.embedding = fluid.dygraph.Embedding(
+            [num_classes + 2, emb_dim], dtype='float32')
+        self.decoder = Decoder(num_classes, emb_dim, encoder_size,
+                               decoder_size)
+    def forward(self, inputs, target):
+        gru_backward, encoded_vector, encoded_proj = self.encoder(inputs)
+        decoder_boot = self.fc(gru_backward[:, 0])
+        trg_embedding = self.embedding(target)
+        prediction = self.decoder(trg_embedding, decoder_boot, encoded_vector,
+                                  encoded_proj)
+        return prediction
+class Seq2SeqAttInferModel(Seq2SeqAttModel):
+    def __init__(
+            self,
+            in_channle=1,
+            encoder_size=200,
+            decoder_size=128,
+            emb_dim=128,
+            num_classes=None,
+            beam_size=0,
+            bos_id=0,
+            eos_id=1,
+            max_out_len=20, ):
+        super(Seq2SeqAttInferModel, self).__init__(
+            in_channle, encoder_size, decoder_size, emb_dim, num_classes)
+        self.beam_size = beam_size
+        # dynamic decoder for inference
+        decoder = BeamSearchDecoder(
+            self.decoder.decoder_attention.cell,
+            start_token=bos_id,
+            end_token=eos_id,
+            beam_size=beam_size,
+            embedding_fn=self.embedding,
+            output_fn=self.decoder.fc)
+        self.infer_decoder = DynamicDecode(
+            decoder, max_step_num=max_out_len, is_test=True)
+    def forward(self, inputs, *args):
+        gru_backward, encoded_vector, encoded_proj = self.encoder(inputs)
+        decoder_boot = self.fc(gru_backward[:, 0])
+        if self.beam_size:
+            # Tile the batch dimension with beam_size
+            encoded_vector = BeamSearchDecoder.tile_beam_merge_with_batch(
+                encoded_vector, self.beam_size)
+            encoded_proj = BeamSearchDecoder.tile_beam_merge_with_batch(
+                encoded_proj, self.beam_size)
+        # dynamic decoding with beam search
+        rs, _ = self.infer_decoder(
+            inits=decoder_boot,
+            encoder_vec=encoded_vector,
+            encoder_proj=encoded_proj)
+        return rs
+class WeightCrossEntropy(Loss):
+    def __init__(self):
+        super(WeightCrossEntropy, self).__init__(average=False)
+    def forward(self, outputs, labels):
+        predict, (label, mask) = outputs[0], labels
+        loss = layers.cross_entropy(predict, label=label)
+        loss = layers.elementwise_mul(loss, mask, axis=0)
+        loss = layers.reduce_sum(loss)
+        return loss
--- a/examples/ocr/train.py
+++ b/examples/ocr/train.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import os
+import sys
+import random
+import numpy as np
+import argparse
+import functools
+import paddle.fluid.profiler as profiler
+import paddle.fluid as fluid
+from hapi.model import Input, set_device
+from hapi.vision.transforms import BatchCompose
+from utility import add_arguments, print_arguments
+from utility import SeqAccuracy, LoggerCallBack
+from seq2seq_attn import Seq2SeqAttModel, WeightCrossEntropy
+import data
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('batch_size',        int,   32,           "Minibatch size.")
+add_arg('epoch',             int,   30,           "Epoch number.")
+add_arg('num_workers',       int,   0,            "workers number.")
+add_arg('lr',                float, 0.001,        "Learning rate.")
+add_arg('lr_decay_strategy', str,   "",           "Learning rate decay strategy.")
+add_arg('checkpoint_path',   str,   "checkpoint", "The directory the model to be saved to.")
+add_arg('train_images',      str,   None,         "The directory of images to be used for training.")
+add_arg('train_list',        str,   None,         "The list file of images to be used for training.")
+add_arg('test_images',       str,   None,         "The directory of images to be used for test.")
+add_arg('test_list',         str,   None,         "The list file of images to be used for training.")
+add_arg('resume_path',       str,   None,         "The init model file of directory.")
+add_arg('use_gpu',           bool,  True,         "Whether use GPU to train.")
+# model hyper paramters
+add_arg('encoder_size',      int,   200,     "Encoder size.")
+add_arg('decoder_size',      int,   128,     "Decoder size.")
+add_arg('embedding_dim',     int,   128,     "Word vector dim.")
+add_arg('num_classes',       int,   95,     "Number classes.")
+add_arg('gradient_clip',     float, 5.0,     "Gradient clip value.")
+add_arg('dynamic',           bool,  False,      "Whether to use dygraph.")
+# yapf: enable
+def main(FLAGS):
+    device = set_device("gpu" if FLAGS.use_gpu else "cpu")
+    fluid.enable_dygraph(device) if FLAGS.dynamic else None
+    model = Seq2SeqAttModel(
+        encoder_size=FLAGS.encoder_size,
+        decoder_size=FLAGS.decoder_size,
+        emb_dim=FLAGS.embedding_dim,
+        num_classes=FLAGS.num_classes)
+    lr = FLAGS.lr
+    if FLAGS.lr_decay_strategy == "piecewise_decay":
+        learning_rate = fluid.layers.piecewise_decay(
+            [200000, 250000], [lr, lr * 0.1, lr * 0.01])
+    else:
+        learning_rate = lr
+    grad_clip = fluid.clip.GradientClipByGlobalNorm(FLAGS.gradient_clip)
+    optimizer = fluid.optimizer.Adam(
+        learning_rate=learning_rate,
+        parameter_list=model.parameters(),
+        grad_clip=grad_clip)
+    # yapf: disable
+    inputs = [
+        Input([None,1,48,384], "float32", name="pixel"),
+        Input([None, None], "int64", name="label_in"),
+    ]
+    labels = [
+        Input([None, None], "int64", name="label_out"),
+        Input([None, None], "float32", name="mask"),
+    ]
+    # yapf: enable
+    model.prepare(
+        optimizer,
+        WeightCrossEntropy(),
+        SeqAccuracy(),
+        inputs=inputs,
+        labels=labels)
+    train_dataset = data.train()
+    train_collate_fn = BatchCompose(
+        [data.Resize(), data.Normalize(), data.PadTarget()])
+    train_sampler = data.BatchSampler(
+        train_dataset, batch_size=FLAGS.batch_size, shuffle=True)
+    train_loader = fluid.io.DataLoader(
+        train_dataset,
+        batch_sampler=train_sampler,
+        places=device,
+        num_workers=FLAGS.num_workers,
+        return_list=True,
+        collate_fn=train_collate_fn)
+    test_dataset = data.test()
+    test_collate_fn = BatchCompose(
+        [data.Resize(), data.Normalize(), data.PadTarget()])
+    test_sampler = data.BatchSampler(
+        test_dataset,
+        batch_size=FLAGS.batch_size,
+        drop_last=False,
+        shuffle=False)
+    test_loader = fluid.io.DataLoader(
+        test_dataset,
+        batch_sampler=test_sampler,
+        places=device,
+        num_workers=0,
+        return_list=True,
+        collate_fn=test_collate_fn)
+    model.fit(train_data=train_loader,
+              eval_data=test_loader,
+              epochs=FLAGS.epoch,
+              save_dir=FLAGS.checkpoint_path,
+              callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
+if __name__ == '__main__':
+    FLAGS = parser.parse_args()
+    print_arguments(FLAGS)
+    main(FLAGS)
--- a/examples/ocr/utility.py
+++ b/examples/ocr/utility.py
+"""Contains common utility functions."""
+#  Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import distutils.util
+import numpy as np
+import paddle.fluid as fluid
+import six
+from hapi.metrics import Metric
+from hapi.callbacks import ProgBarLogger
+def print_arguments(args):
+    """Print argparse's arguments.
+    Usage:
+    .. code-block:: python
+        parser = argparse.ArgumentParser()
+        parser.add_argument("name", default="Jonh", type=str, help="User name.")
+        args = parser.parse_args()
+        print_arguments(args)
+    :param args: Input argparse.Namespace for printing.
+    :type args: argparse.Namespace
+    """
+    print("-----------  Configuration Arguments -----------")
+    for arg, value in sorted(six.iteritems(vars(args))):
+        print("%s: %s" % (arg, value))
+    print("------------------------------------------------")
+def add_arguments(argname, type, default, help, argparser, **kwargs):
+    """Add argparse's argument.
+    Usage:
+    .. code-block:: python
+        parser = argparse.ArgumentParser()
+        add_argument("name", str, "Jonh", "User name.", parser)
+        args = parser.parse_args()
+    """
+    type = distutils.util.strtobool if type == bool else type
+    argparser.add_argument(
+        "--" + argname,
+        default=default,
+        type=type,
+        help=help + ' Default: %(default)s.',
+        **kwargs)
+class SeqAccuracy(Metric):
+    def __init__(self, name=None, *args, **kwargs):
+        super(SeqAccuracy, self).__init__(*args, **kwargs)
+        self._name = 'seq_acc'
+        self.reset()
+    def add_metric_op(self, output, label, mask, *args, **kwargs):
+        pred = fluid.layers.flatten(output, axis=2)
+        score, topk = fluid.layers.topk(pred, 1)
+        return topk, label, mask
+    def update(self, topk, label, mask, *args, **kwargs):
+        topk = topk.reshape(label.shape[0], -1)
+        seq_len = np.sum(mask, -1)
+        acc = 0
+        for i in range(label.shape[0]):
+            l = int(seq_len[i] - 1)
+            pred = topk[i][:l - 1]
+            ref = label[i][:l - 1]
+            if np.array_equal(pred, ref):
+                self.total += 1
+                acc += 1
+            self.count += 1
+        return float(acc) / label.shape[0]
+    def reset(self):
+        self.total = 0.
+        self.count = 0.
+    def accumulate(self):
+        return float(self.total) / self.count
+    def name(self):
+        return self._name
+class LoggerCallBack(ProgBarLogger):
+    def __init__(self, log_freq=1, verbose=2, train_bs=None, eval_bs=None):
+        super(LoggerCallBack, self).__init__(log_freq, verbose)
+        self.train_bs = train_bs
+        self.eval_bs = eval_bs if eval_bs else train_bs
+    def on_train_batch_end(self, step, logs=None):
+        logs = logs or {}
+        logs['loss'] = [l / self.train_bs for l in logs['loss']]
+        super(LoggerCallBack, self).on_train_batch_end(step, logs)
+    def on_epoch_end(self, epoch, logs=None):
+        logs = logs or {}
+        logs['loss'] = [l / self.train_bs for l in logs['loss']]
+        super(LoggerCallBack, self).on_epoch_end(epoch, logs)
+    def on_eval_batch_end(self, step, logs=None):
+        logs = logs or {}
+        logs['loss'] = [l / self.eval_bs for l in logs['loss']]
+        super(LoggerCallBack, self).on_eval_batch_end(step, logs)
+    def on_eval_end(self, logs=None):
+        logs = logs or {}
+        logs['loss'] = [l / self.eval_bs for l in logs['loss']]
+        super(LoggerCallBack, self).on_eval_end(logs)
+def index2word(ids):
+    return [chr(int(k + 33)) for k in ids]
+def postprocess(seq, bos_idx=0, eos_idx=1):
+    if type(seq) is np.ndarray:
+        seq = seq.tolist()
+    eos_pos = len(seq) - 1
+    for i, idx in enumerate(seq):
+        if idx == eos_idx:
+            eos_pos = i
+            break
+    seq = [
+        idx for idx in seq[:eos_pos + 1] if idx != bos_idx and idx != eos_idx
+    ]
+    return seq
+class SeqBeamAccuracy(Metric):
+    def __init__(self, name=None, *args, **kwargs):
+        super(SeqBeamAccuracy, self).__init__(*args, **kwargs)
+        self._name = 'seq_acc'
+        self.reset()
+    def add_metric_op(self, output, label, mask, *args, **kwargs):
+        return output, label, mask
+    def update(self, preds, labels, masks, *args, **kwargs):
+        preds = preds[:, :, np.newaxis] if len(preds.shape) == 2 else preds
+        preds = np.transpose(preds, [0, 2, 1])
+        seq_len = np.sum(masks, -1)
+        acc = 0
+        for i in range(labels.shape[0]):
+            l = int(seq_len[i] - 1)
+            #ref = labels[i][: l - 1]
+            ref = np.array(postprocess(labels[i]))
+            pred = preds[i]
+            for idx, beam in enumerate(pred):
+                beam_pred = np.array(postprocess(beam))
+                if np.array_equal(beam_pred, ref):
+                    self.total += 1
+                    acc += 1
+                    break
+            self.count += 1
+        return float(acc) / labels.shape[0]
+    def reset(self):
+        self.total = 0.
+        self.count = 0.
+    def accumulate(self):
+        return float(self.total) / self.count
+    def name(self):
+        return self._name
--- a/examples/tsm/infer.py
+++ b/examples/tsm/infer.py
@@ -20,9 +20,9 @@ import argparse
 import numpy as np
 from hapi.model import Input, set_device
-from hapi.vision.models import tsm_resnet50
 from check import check_gpu, check_version
+from modeling import tsm_resnet50
 from kinetics_dataset import KineticsDataset
 from transforms import *

--- a/examples/tsm/main.py
+++ b/examples/tsm/main.py
@@ -24,8 +24,8 @@ from paddle.fluid.dygraph.parallel import ParallelEnv
 from hapi.model import Model, CrossEntropy, Input, set_device
 from hapi.metrics import Accuracy
-from hapi.vision.models import tsm_resnet50
+from modeling import tsm_resnet50
 from check import check_gpu, check_version
 from kinetics_dataset import KineticsDataset
 from transforms import *

--- a/hapi/vision/models/tsm.py
+++ b/hapi/vision/models/tsm.py
@@ -196,7 +196,7 @@ def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True):
        weight_path = get_weights_path(*(pretrain_infos[num_layers]))
        assert weight_path.endswith('.pdparams'), \
                "suffix of weight must be .pdparams"
-        model.load(weight_path[:-9])
+        model.load(weight_path)
    return model

--- a/examples/yolov3/README.md
+++ b/examples/yolov3/README.md
@@ -99,18 +99,12 @@ YOLOv3 的网络结构由基础特征提取网络、multi-scale特征融合层
  |   ...
  ```
-```bash
-sh pretrain_weights/download.sh
-```
 ### 模型训练
 数据准备完成后，可使用`main.py`脚本启动训练和评估，如下脚本会自动每epoch交替进行训练和模型评估，并将checkpoint默认保存在`yolo_checkpoint`目录下。
 YOLOv3模型训练总batch_size为64训练，以下以使用4卡Tesla P40每卡batch_size为16训练介绍训练方式。对于静态图和动态图，多卡训练中`--batch_size`为每卡上的batch_size，即总batch_size为`--batch_size`乘以卡数。
-YOLOv3模型训练须加载骨干网络[DarkNet53]()的预训练权重，可在训练时通过`--pretrain_weights`指定，若指定为URL，将自动下载权重至`~/.cache/paddle/weights`目录并加载。
 `main.py`脚本参数可通过如下命令查询
 ```bash
@@ -122,7 +116,7 @@ python main.py --help
 使用如下方式进行多卡训练:
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=<path/to/dataset> --batch_size=16 --pretrain_weights=https://paddlemodels.bj.bcebos.com/hapi/darknet53_pretrained.pdparams
+CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=<path/to/dataset> --batch_size=16
 ```
 #### 动态图训练
@@ -132,7 +126,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --data=
 使用如下方式进行多卡训练:
 ```bash
-CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py -m paddle.distributed.launch --data=<path/to/dataset> --batch_size=16 -d --pretrain_weights=https://paddlemodels.bj.bcebos.com/hapi/darknet53_pretrained.pdparams
+CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py -m paddle.distributed.launch --data=<path/to/dataset> --batch_size=16 -d
 ```

--- a/hapi/datasets/coco.py
+++ b/hapi/datasets/coco.py
--- a/examples/yolov3/infer.py
+++ b/examples/yolov3/infer.py
@@ -25,8 +25,9 @@ from paddle.fluid.optimizer import Momentum
 from paddle.io import DataLoader
 from hapi.model import Model, Input, set_device
-from hapi.vision.models import yolov3_darknet53, YoloLoss
-from hapi.vision.transforms import *
+from modeling import yolov3_darknet53, YoloLoss
+from transforms import *
 from visualizer import draw_bbox

--- a/examples/yolov3/main.py
+++ b/examples/yolov3/main.py
@@ -27,12 +27,12 @@ from paddle.io import DataLoader
 from hapi.model import Model, Input, set_device
 from hapi.distributed import DistributedBatchSampler
-from hapi.download import is_url, get_weights_path
+from hapi.vision.transforms import Compose, BatchCompose
-from hapi.datasets import COCODataset
-from hapi.vision.transforms import *
-from hapi.vision.models import yolov3_darknet53, YoloLoss
+from modeling import yolov3_darknet53, YoloLoss
+from coco import COCODataset
 from coco_metric import COCOMetric
+from transforms import *
 NUM_MAX_BOXES = 50
@@ -126,10 +126,7 @@ def main():
                   pretrained=pretrained)
    if FLAGS.pretrain_weights and not FLAGS.eval_only:
-        pretrain_weights = FLAGS.pretrain_weights
+        model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)
-        if is_url(pretrain_weights):
-            pretrain_weights = get_weights_path(pretrain_weights)
-        model.load(pretrain_weights, skip_mismatch=True, reset_optimizer=True)
    optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters())
@@ -168,7 +165,7 @@ def main():
              save_dir="yolo_checkpoint/mixup",
              save_freq=10)
-    # do not use image mixup transfrom in laste FLAGS.no_mixup_epoch epoches
+    # do not use image mixup transfrom in the last FLAGS.no_mixup_epoch epoches
    dataset.mixup = False
    model.fit(train_data=loader,
              epochs=FLAGS.no_mixup_epoch,
@@ -200,8 +197,7 @@ if __name__ == '__main__':
    parser.add_argument(
        "-j", "--num_workers", default=4, type=int, help="reader worker number")
    parser.add_argument(
-        "-p", "--pretrain_weights",
+        "-p", "--pretrain_weights", default=None, type=str,
-        default="./pretrain_weights/darknet53_pretrained", type=str,
        help="path to pretrained weights")
    parser.add_argument(
        "-r", "--resume", default=None, type=str,

--- a/hapi/vision/models/yolov3.py
+++ b/hapi/vision/models/yolov3.py
@@ -16,13 +16,13 @@ from __future__ import division
 from __future__ import print_function
 import paddle.fluid as fluid
-from paddle.fluid.dygraph.nn import Conv2D
+from paddle.fluid.dygraph.nn import Conv2D, BatchNorm
 from paddle.fluid.param_attr import ParamAttr
 from paddle.fluid.regularizer import L2Decay
 from hapi.model import Model, Loss
 from hapi.download import get_weights_path
-from .darknet import darknet53, ConvBNLayer
+from hapi.vision.models import darknet53
 __all__ = ['YoloLoss', 'YOLOv3', 'yolov3_darknet53']
@@ -33,6 +33,46 @@ pretrain_infos = {
 }
+class ConvBNLayer(fluid.dygraph.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size=3,
+                 stride=1,
+                 groups=1,
+                 padding=0,
+                 act="leaky"):
+        super(ConvBNLayer, self).__init__()
+        self.conv = Conv2D(
+            num_channels=ch_in,
+            num_filters=ch_out,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.Normal(0., 0.02)),
+            bias_attr=False,
+            act=None)
+        self.batch_norm = BatchNorm(
+            num_channels=ch_out,
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.Normal(0., 0.02),
+                regularizer=L2Decay(0.)),
+            bias_attr=ParamAttr(
+                initializer=fluid.initializer.Constant(0.0),
+                regularizer=L2Decay(0.)))
+        self.act = act
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        out = self.batch_norm(out)
+        if self.act == 'leaky':
+            out = fluid.layers.leaky_relu(x=out, alpha=0.1)
+        return out
 class YoloDetectionBlock(fluid.dygraph.Layer):
    def __init__(self, ch_in, channel):
        super(YoloDetectionBlock, self).__init__()
@@ -118,7 +158,7 @@ class YOLOv3(Model):
        self.nms_posk = 100
        self.draw_thresh = 0.5
-        self.backbone = darknet53(pretrained=False)
+        self.backbone = darknet53(pretrained=(model_mode=='train'))
        self.block_outputs = []
        self.yolo_blocks = []
        self.route_blocks = []
@@ -254,7 +294,7 @@ def _yolov3_darknet(num_layers=53, num_classes=80,
        weight_path = get_weights_path(*(pretrain_infos[num_layers]))
        assert weight_path.endswith('.pdparams'), \
                "suffix of weight must be .pdparams"
-        model.load(weight_path[:-9])
+        model.load(weight_path)
    return model

--- a/hapi/vision/transforms/detection_transforms.py
+++ b/hapi/vision/transforms/detection_transforms.py
--- a/hapi/callbacks.py
+++ b/hapi/callbacks.py
@@ -218,8 +218,6 @@ class ProgBarLogger(Callback):
            # if steps is not None, last step will update in on_epoch_end
            if self.steps and self.train_step < self.steps:
                self._updates(logs, 'train')
-            else:
-                self._updates(logs, 'train')
    def on_epoch_end(self, epoch, logs=None):
        logs = logs or {}
@@ -238,7 +236,7 @@ class ProgBarLogger(Callback):
    def on_eval_batch_end(self, step, logs=None):
        logs = logs or {}
-        self.eval_step = step
+        self.eval_step += 1
        samples = logs.get('batch_size', 1)
        self.evaled_samples += samples

--- a/hapi/datasets/__init__.py
+++ b/hapi/datasets/__init__.py
@@ -12,7 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from . import folder
+from . import mnist
+from . import flowers
 from .folder import *
 from .mnist import *
 from .flowers import *
-from .coco import *
+__all__ = folder.__all__ \
+        + mnist.__all__ \
+        + flowers.__all__
--- a/hapi/datasets/folder.py
+++ b/hapi/datasets/folder.py
@@ -18,7 +18,7 @@ import cv2
 from paddle.io import Dataset
-__all__ = ["DatasetFolder"]
+__all__ = ["DatasetFolder", "ImageFolder"]
 def has_valid_extension(filename, extensions):
@@ -164,3 +164,80 @@ IMG_EXTENSIONS = ('.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm', '.tif',
 def cv2_loader(path):
    return cv2.imread(path)
+class ImageFolder(Dataset):
+    """A generic data loader where the samples are arranged in this way:
+        root/1.ext
+        root/2.ext
+        root/sub_dir/3.ext
+    Args:
+        root (string): Root directory path.
+        loader (callable, optional): A function to load a sample given its path.
+        extensions (tuple[string], optional): A list of allowed extensions.
+            both extensions and is_valid_file should not be passed.
+        transform (callable, optional): A function/transform that takes in
+            a sample and returns a transformed version.
+        is_valid_file (callable, optional): A function that takes path of a file
+            and check if the file is a valid file (used to check of corrupt files)
+            both extensions and is_valid_file should not be passed.
+     Attributes:
+        samples (list): List of sample path
+     """
+    def __init__(self,
+                 root,
+                 loader=None,
+                 extensions=None,
+                 transform=None,
+                 is_valid_file=None):
+        self.root = root
+        if extensions is None:
+            extensions = IMG_EXTENSIONS
+        samples = []
+        path = os.path.expanduser(root)
+        if not ((extensions is None) ^ (is_valid_file is None)):
+            raise ValueError(
+                "Both extensions and is_valid_file cannot be None or not None at the same time"
+            )
+        if extensions is not None:
+            def is_valid_file(x):
+                return has_valid_extension(x, extensions)
+        for root, _, fnames in sorted(os.walk(path, followlinks=True)):
+            for fname in sorted(fnames):
+                f = os.path.join(root, fname)
+                if is_valid_file(f):
+                    samples.append(f)
+        if len(samples) == 0:
+            raise (RuntimeError(
+                "Found 0 files in subfolders of: " + self.root + "\n"
+                "Supported extensions are: " + ",".join(extensions)))
+        self.loader = cv2_loader if loader is None else loader
+        self.extensions = extensions
+        self.samples = samples
+        self.transform = transform
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path = self.samples[index]
+        sample = self.loader(path)
+        if self.transform is not None:
+            sample = self.transform(sample)
+        return [sample]
+    def __len__(self):
+        return len(self.samples)
--- a/hapi/model.py
+++ b/hapi/model.py
@@ -816,7 +816,7 @@ class Model(fluid.dygraph.Layer):
            except ValueError as err:
                if skip_mismatch:
                    warnings.warn(
-                        ("Skip loading for {}. ".format(key) + err.message))
+                        ("Skip loading for {}. ".format(key) + str(err)))
                    # reset optimizer when mismatch happens
                    reset_optimizer = True
                else:
@@ -1161,7 +1161,7 @@ class Model(fluid.dygraph.Layer):
        if fluid.in_dygraph_mode():
            feed_list = None
        else:
-            feed_list = [x.forward() for x in self._inputs + self._labels]
+            feed_list = [x.forward() for x in self._inputs]
        if test_data is not None and isinstance(test_data, Dataset):
            test_sampler = DistributedBatchSampler(
@@ -1281,7 +1281,7 @@ class Model(fluid.dygraph.Layer):
        if mode == 'train':
            assert epoch is not None, 'when mode is train, epoch must be given'
-            callbacks.on_epoch_end(epoch)
+            callbacks.on_epoch_end(epoch, logs)
        return logs

--- a/hapi/text/bert/dataloader.py
+++ b/hapi/text/bert/dataloader.py
@@ -30,6 +30,7 @@ from hapi.distributed import DistributedBatchSampler
 from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor
 from hapi.text.bert.batching import prepare_batch_data
 import hapi.text.tokenizer.tokenization as tokenization
+from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
 __all__ = [
    'BertInputExample', 'BertInputFeatures', 'SingleSentenceDataset',
@@ -227,6 +228,9 @@ class SingleSentenceDataset(Dataset):
        if line_processor is None:
            line_processor = default_line_processor
+        if ParallelEnv().nranks > 1:
+            leveldb_file = leveldb_file + "_" + str(ParallelEnv().local_rank)
        if not os.path.exists(leveldb_file):
            print("putting data %s into leveldb %s" %
                  (input_file, leveldb_file))
@@ -384,7 +388,12 @@ class BertDataLoader(object):
                 quotechar=None,
                 device=fluid.CPUPlace(),
                 num_workers=0,
-                 return_list=True):
+                 return_list=True,
+                 phase="train"):
+        assert phase in [
+            "train", "predict", "test"
+        ], "phase of BertDataLoader should be in [train, predict, test], but get %s" % phase
        self.dataset = SingleSentenceDataset(tokenizer, label_list,
                                             max_seq_length, mode)
@@ -394,15 +403,21 @@ class BertDataLoader(object):
                input_file, label_list, max_seq_length, tokenizer,
                line_processor, delimiter, quotechar)
        elif mode == "leveldb":
-            #prepare_leveldb(self, input_file, leveldb_file, label_list, max_seq_length, tokenizer, line_processor=None, delimiter="\t", quotechar=None):
            self.dataset.prepare_leveldb(input_file, leveldb_file, label_list,
                                         max_seq_length, tokenizer,
                                         line_processor, delimiter, quotechar)
        else:
            raise ValueError("mode should be in [all_in_memory, leveldb]")
-        self.sampler = DistributedBatchSampler(
+        if phase == "train":
-            self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last)
+            self.sampler = DistributedBatchSampler(
+                self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last)
+        elif phase == "test" or phase == "predict":
+            self.sampler = BatchSampler(
+                dataset=self.dataset,
+                batch_size=batch_size,
+                shuffle=shuffle,
+                drop_last=drop_last)
        self.dataloader = DataLoader(
            dataset=self.dataset,

--- a/hapi/text/bert/optimization.py
+++ b/hapi/text/bert/optimization.py
@@ -130,6 +130,18 @@ class Optimizer(object):
                return True
        return False
+    def state_dict(self):
+        return self.optimizer.state_dict()
+    def set_dict(self, state_dict):
+        return self.optimizer.set_dict(state_dict)
+    def get_opti_var_name_list(self):
+        return self.optimizer.get_opti_var_name_list()
+    def current_step_lr(self):
+        return self.optimizer.current_step_lr()
    def minimize(self, loss, use_data_parallel=False, model=None):
        param_list = dict()

--- a/hapi/text/text.py
+++ b/hapi/text/text.py
--- a/hapi/vision/models/__init__.py
+++ b/hapi/vision/models/__init__.py
@@ -17,24 +17,15 @@ from . import vgg
 from . import mobilenetv1
 from . import mobilenetv2
 from . import darknet
-from . import yolov3
-from . import tsm
-from . import bmn_model
 from .resnet import *
 from .mobilenetv1 import *
 from .mobilenetv2 import *
 from .vgg import *
 from .darknet import *
-from .yolov3 import *
-from .tsm import *
-from .bmn_model import *
 __all__ = resnet.__all__ \
        + vgg.__all__ \
        + mobilenetv1.__all__ \
        + mobilenetv2.__all__ \
-        + darknet.__all__ \
+        + darknet.__all__
-        + yolov3.__all__ \
-        + tsm.__all__ \
-        + bmn_model.__all__
--- a/hapi/vision/models/darknet.py
+++ b/hapi/vision/models/darknet.py
@@ -22,7 +22,7 @@ from paddle.fluid.dygraph.nn import Conv2D, BatchNorm, Pool2D, Linear
 from hapi.model import Model
 from hapi.download import get_weights_path
-__all__ = ['DarkNet', 'ConvBNLayer', 'darknet53']
+__all__ = ['DarkNet', 'darknet53']
 # {num_layers: (url, md5)}
 pretrain_infos = {

--- a/hapi/vision/transforms/__init__.py
+++ b/hapi/vision/transforms/__init__.py
@@ -12,6 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from . import transforms
+from . import functional
 from .transforms import *
 from .functional import *
-from .detection_transforms import *
+__all__ = transforms.__all__ \
+        + functional.__all__
--- a/hapi/vision/transforms/functional.py
+++ b/hapi/vision/transforms/functional.py
@@ -26,6 +26,8 @@ else:
    Sequence = collections.abc.Sequence
    Iterable = collections.abc.Iterable
+__all__ = ['flip', 'resize']
 def flip(image, code):
    """