diff --git a/.tools/build_docker.sh b/.tools/build_docker.sh
index 242f8de16639f6c333d99f2ff4c7e24f86788c98..a403b710eedbc8c70c35f4bb27c10bf523143a4a 100755
--- a/.tools/build_docker.sh
+++ b/.tools/build_docker.sh
@@ -59,6 +59,10 @@ fi
 
 cat >> Dockerfile <<EOF
 RUN pip install -U nltk \
+    && python3 -m pip install --user --upgrade pip==9.0.3 \
+    && pip3 install -U nltk \
+    && pip3.6 install -U nltk \
+    && pip3.7 install -U nltk \
     && python /book/.tools/cache_dataset.py
 
 RUN ${update_mirror_cmd}
@@ -67,8 +71,17 @@ RUN ${update_mirror_cmd}
     apt-get -y install gcc curl git vim && \
     apt-get -y clean && \
     localedef -f UTF-8 -i en_US en_US.UTF-8 && \
-    pip install --upgrade pip && \
-    pip install -U notedown pillow matplotlib jupyter numpy requests scipy
+    pip install -U notedown pillow matplotlib jupyter numpy requests scipy && \
+    pip3 install -U notedown pillow matplotlib numpy requests scipy && \
+    pip3.6 install -U notedown pillow matplotlib numpy requests scipy && \
+    pip3.7 install -U notedown pillow matplotlib numpy requests scipy
+
+RUN pip3.6 install ipykernel && \
+    pip3.7 install ipykernel && \
+    python3.6 -m ipykernel install --name python3.6 && \
+    python3.7 -m ipykernel install --name python3.7 && \
+    pip3 install ipykernel && \
+    python3 -m ipykernel install --name python3
 
 RUN curl https://storage.googleapis.com/golang/go1.8.linux-amd64.tar.gz -o go1.8.linux-amd64.tar.gz && \
     tar -zxvf go1.8.linux-amd64.tar.gz -C /usr/local/ && \
diff --git a/0.coding_guideline/README.MD b/0.coding_guideline/README.MD
new file mode 100644
index 0000000000000000000000000000000000000000..884185fe0cb6a24e7130edc716f52533792267bc
--- /dev/null
+++ b/0.coding_guideline/README.MD
@@ -0,0 +1,994 @@
+# Paddle官方模型库开发及API使用规范
+
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://github.com/PaddlePaddle/models)
+[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
+
+## 目录
+- [**整体目标和原则**](#整体目标和原则)
+
+- [**必选规范**](#必选规范)
+
+  - [**目录结构**](#目录结构)
+  
+  - [**功能实现**](#功能实现)
+  
+  - [**书写示例**](#书写示例)
+
+  -[**重点关注事项**](#整体需要注意的问题)
+  
+  - [**命名规范**](#命名规范和使用规范)
+  
+  - [**README**](#README)
+  
+  - [**多环境支持**](#多环境支持)
+  
+  - [**注释和Licenses**](#注释和Licenses)
+
+- [**建议规范**](#建议规范)
+
+  - [**显存优化**](#显存优化)  
+  
+- [**合入规则**](#合入规则)
+  
+  - [**代码提交**](#代码提交)
+  
+  - [**code_review**](#code_review)
+
+- [**升级维护**](#升级维护)
+
+## 整体目标和原则
+
+### 整体目标
+为PaddlePaddle/models用户，提供功能一致，风格统一，可插拔的NLP，CV，Speech等领域的重要模型。
+提高PaddlePaddle用户的学习和开发效率。
+加速部分老旧有bug的API离场，并推进新API的快速落地。
+
+### 原则
+1. 不同领域在逻辑上统一功能，在具体落实上（如configure的使用方式），尊重并拥抱各个领域的主流做法。但在具体大方向上，不可进一步拆分，需形成统一。
+2. 区分规范为必选和可选，原则上要求所有模型遵守必选规范，建议遵守可选规范。
+3. 原则上要求模型可以在**GPU单卡多卡**，**CPU多核**上执行**训练**，**预测**，**评估**和**准备部署环境**，并可以支持轻松**使用自定义数据**。
+4. 如果有特殊情况，需要在readme中的显眼位置做出说明，避免用户踩坑。
+
+## 必选规范
+
+### 目录结构
+
+遵照1.5 Paddle-models结构，将整体模型库拆分为3级目录结构，如下图所示：
+
+#### 一级目录，按照大方向进行细分，如PaddleNLP，PaddleCV等：
+
+![avatar](./appendix/dir-1.png)
+
+#### 二级目录，按照具体场景进行拆分，如在PaddleNLP下，有neural_machine_translation，reading_comprehension等：
+
+![avatar](./appendix/dir-2.png)
+
+#### 三级目录，按照具体任务进行拆分，如在reading_comprehension下，有squad任务，mrqa任务等：
+
+![avatar](./appendix/dir-3.png)
+
+#### 叶子目录，具体到某个具体的任务，如SQuAD，则建议包涵以下内容：
+
+![avatar](./appendix/dir-4.png)
+
+- **data**：存储所有跟数据相关的内容，如configure，dict，input，output，saved_models，inferenece_models等。
+- **squad**：该文件夹命名可以根据具体任务进行命名，保存该任务下一些特定的代码，如reader.py，batching.py，tokenizer.py等。
+- **README.MD**: 当前任务库的使用手册，规范参见[**使用手册规范**](#README)
+- **XXX_net.py**: 保存网络定义的脚本，可以有多个XXX_net.py，不同net之间在网络声明阶段需要统一output，也需要尽量统一input。
+- **eval.py**：执行评估的脚本。
+- **inference_model.py**：执行保存inference_model的脚本，用于准备上线部署环境。
+- **main.py**：主程序入口，通过命令行或者configure文件，执行不同逻辑。
+- **predict.py**：执行预测功能的脚本，需要可以单独运行。
+- **train.py**：执行训练功能的脚本，需要可以单独运行。
+- **run.sh**：示例脚本，需要给出不同环境下（GPU单卡多卡，CPU多核），如何进行训练，预测，评估，保存部署环境等功能。
+
+### 功能实现
+
+原则上需要官方提供的模型需要提供：
+1. **训练**：可以在GPU单卡/多卡，CPU多核的环境下执行训练，至少有一个环境可以打平竞品效果或者复现原有paper效果（如果存在竞品），官方可以给出一种推荐训练的方式。
+2. **预测**：可以在GPU单卡和CPU单核下执行预测。
+3. **评估**：要求同预测。
+4. **保存部署环境**：可以保存可以部署环境，支持模型部署。
+5. **使用自定义数据**：要求官方模型可以灵活支持/适配用户自定义数据，可以通过在readme中加入数据格式描部分和**如何使用自定义数据**章节解决。
+
+### 书写示例
+
+下面以SQUAD下面的ERNIE-based MRC模型具体说明一些代码书写方面的问题。
+
+#### net.py书写示例
+```python
+#encoding=utf8
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+import transformer_encoder
+from bert import BertModel
+
+def create_net(
+  is_training, 
+  model_input, 
+  args):
+  
+    """
+    create the network of BERT-based Machine Reading Comprehension Network.
+    is_training: a boolean value, indicating if the model to be created is for training or predicting.
+    model_input: stores the input of the model
+      for simple task like mnist: model_input can be either a single/tuple/list of fluid.layers.data.
+      for complicated task like ERNIE-based machine reading comprehension, the model_input is an object like:
+      
+      class model_input:
+        def __inint__(self):
+          self.image = fluid.layers.data(shape = [-1, 8, 8, 1], dtype = "int64", name = "image")
+          
+    """
+
+    # declare the model input here
+    if is_training:
+        src_ids = model_input.src_ids
+        ...
+        is_null_answer = model_input.is_null_answer
+    else:
+        src_ids = model_input.src_ids
+        ...
+        unique_id = model_input.unique_id
+
+    # define the model bert first
+
+    assert isinstance(args.bert_config_path, str)
+
+    # define the model here
+    bert_conf = JsonConfig(args.bert_config_path)
+    base_model = BertModel(
+        src_ids=src_ids,
+        position_ids=pos_ids,
+        sentence_ids=sent_ids,
+        input_mask=input_mask,
+        config=bert_conf)
+
+    sequence_output = base_model.get_sequence_output()
+
+    ...
+
+    # if is_training, then return the loss, otherwise return the prediction
+
+    if is_training:
+
+        def compute_loss(logits, positions):
+            loss = fluid.layers.softmax_with_cross_entropy(
+                logits=logits, label=positions)
+            loss = fluid.layers.mean(x=loss)
+            return loss
+
+        start_loss = compute_loss(start_logits, start_positions)
+        end_logits = fluid.layers.reshape(
+            x=end_logits,
+            shape=[-1, args.start_top_k, list(end_logits.shape)[-1]])
+        end_logits = fluid.layers.slice(
+            end_logits, axes=[1], starts=[0], ends=[1])
+        end_logits = fluid.layers.reshape(
+            x=end_logits, shape=[-1, list(end_logits.shape)[-1]])
+        end_loss = compute_loss(end_logits, end_positions)
+
+        total_loss = (start_loss + end_loss) / 2.0
+
+        if args.use_fp16 and args.loss_scaling > 1.0:
+            total_loss = total_loss * args.loss_scaling
+
+        return total_loss
+
+    else:
+
+        predict = [unique_id, top_k_start_log_probs, top_k_start_indexes, \
+            top_k_end_log_probs, top_k_end_indexes]
+
+        return predict
+
+```
+
+#### train.py书写示例
+
+```python
+#encoding=utf8
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import os
+import sys
+import numpy as np
+import argparse
+import paddle
+import paddle.fluid as fluid
+
+# include task-specific libs
+from bert_mrc_net import create_net
+from squad.reader import DataProcessor, write_predictions
+
+# 读取预训练模型
+def init_from_pretrain_model(args, exe, program):
+
+    assert isinstance(args.init_from_pretrain_model, str)
+
+    if not os.path.exists(args.init_from_pretrain_model):
+        raise Warning("The pretrained params do not exist.")
+        return False
+
+    def existed_params(var):
+        if not isinstance(var, fluid.framework.Parameter):
+            return False
+        return os.path.exists(
+            os.path.join(args.init_from_pretrain_model, var.name))
+
+    fluid.io.load_vars(
+        exe,
+        args.init_from_pretrain_model,
+        main_program=program,
+        predicate=existed_params)
+
+    print("finish initing model from pretrained params from %s" %
+          (args.init_from_pretrain_model))
+
+    return True
+
+# 读取一个checkpoint，进行恢复训练
+def init_from_checkpoint(args, exe, program):
+
+    assert isinstance(args.init_from_checkpoint, str)
+
+    if not os.path.exists(args.init_from_checkpoint):
+        raise Warning("the checkpoint path does not exist.")
+        return False
+
+    fluid.io.load_persistables(
+        executor=exe,
+        dirname=args.init_from_checkpoint,
+        main_program=program,
+        filename="checkpoint.pdckpt")
+
+    print("finish initing model from checkpoint from %s" %
+          (args.init_from_checkpoint))
+
+    return True
+
+# 保存一个checkpoint，用以后续继续训练
+def save_checkpoint(args, exe, program, dirname):
+
+    assert isinstance(args.save_model_path, str)
+
+    checkpoint_dir = os.path.join(args.save_model_path, args.save_checkpoint)
+
+    if not os.path.exists(checkpoint_dir):
+        os.mkdir(checkpoint_dir)
+
+    fluid.io.save_persistables(
+        exe,
+        os.path.join(checkpoint_dir, dirname),
+        main_program=program,
+        filename="checkpoint.pdparams")
+
+    print("save checkpoint at %s" % (os.path.join(checkpoint_dir, dirname)))
+
+    return True
+
+# 保存模型的参数，用以后续预测部署等任务
+def save_param(args, exe, program, dirname):
+
+    assert isinstance(args.save_model_path, str)
+
+    param_dir = os.path.join(args.save_model_path, args.save_param)
+
+    if not os.path.exists(param_dir):
+        os.mkdir(param_dir)
+
+    fluid.io.save_params(
+        exe,
+        os.path.join(param_dir, dirname),
+        main_program=program,
+        filename="params.pdparams")
+    print("save parameters at %s" % (os.path.join(param_dir, dirname)))
+
+    return True
+
+# 用于管理超参数的一个类，paddle-config
+# 在 paddle/palm下可以直接调用
+class PDConfig(object):
+    """
+    A high-level API for managing configuration files in PaddlePaddle.
+    Can jointly work with command-line-arugment, json files and yaml files.
+    """
+
+    def __init__(self, json_file="", yaml_file="", fuse_args=True):
+        """
+            Init funciton for PDConfig.
+            json_file: the path to the json configure file.
+            yaml_file: the path to the yaml configure file.
+            fuse_args: if fuse the json/yaml configs with argparse.
+        """
+        assert isinstance(json_file, str)
+        assert isinstance(yaml_file, str)
+
+        if json_file != "" and yaml_file != "":
+            raise Warning(
+                "json_file and yaml_file can not co-exist for now. please only use one configure file type."
+            )
+            return
+
+        self.args = None
+        self.arg_config = {}
+        self.json_config = {}
+        self.yaml_config = {}
+
+        parser = argparse.ArgumentParser()
+
+        self.default_g = ArgumentGroup(parser, "default", "default options.")
+        self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
+        self.json_g = ArgumentGroup(parser, "json", "options from json.")
+        self.com_g = ArgumentGroup(parser, "custom", "customized options.")
+
+        self.default_g.add_arg("epoch", int, 2,
+                               "Number of epoches for training.")
+        self.default_g.add_arg("learning_rate", float, 1e-2,
+                               "Learning rate used to train.")
+        self.default_g.add_arg("do_train", bool, False,
+                               "Whether to perform training.")
+        self.default_g.add_arg("do_predict", bool, False,
+                               "Whether to perform predicting.")
+        self.default_g.add_arg("do_eval", bool, False,
+                               "Whether to perform evaluating.")
+
+        self.parser = parser
+
+        if json_file != "":
+            self.load_json(json_file, fuse_args=fuse_args)
+
+        if yaml_file:
+            self.load_yaml(yaml_file, fuse_args=fuse_args)
+
+    def load_json(self, file_path, fuse_args=True):
+
+        if not os.path.exists(file_path):
+            raise Warning("the json file %s does not exist." % file_path)
+            return
+
+        with open(file_path, "r") as fin:
+            self.json_config = json.loads(fin.read())
+            fin.close()
+
+        if fuse_args:
+            for name in self.json_config:
+                if not isinstance(self.json_config[name], int) \
+                    and not isinstance(self.json_config[name], float) \
+                    and not isinstance(self.json_config[name], str) \
+                    and not isinstance(self.json_config[name], bool):
+
+                    continue
+
+                self.json_g.add_arg(name,
+                                    type(self.json_config[name]),
+                                    self.json_config[name],
+                                    "This is from %s" % file_path)
+
+    def load_yaml(self, file_path, fuse_args=True):
+
+        if not os.path.exists(file_path):
+            raise Warning("the yaml file %s does not exist." % file_path)
+            return
+
+        with open(file_path, "r") as fin:
+            self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
+            fin.close()
+
+        if fuse_args:
+            for name in self.yaml_config:
+                if not isinstance(self.yaml_config[name], int) \
+                    and not isinstance(self.yaml_config[name], float) \
+                    and not isinstance(self.yaml_config[name], str) \
+                    and not isinstance(self.yaml_config[name], bool):
+
+                    continue
+
+                self.yaml_g.add_arg(name,
+                                    type(self.yaml_config[name]),
+                                    self.yaml_config[name],
+                                    "This is from %s" % file_path)
+
+    def build(self):
+        self.args = self.parser.parse_args()
+        self.arg_config = vars(self.args)
+
+    def __add__(self, new_arg):
+        assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
+        assert len(new_arg) >= 3
+        assert self.args is None
+
+        name = new_arg[0]
+        dtype = new_arg[1]
+        dvalue = new_arg[2]
+        desc = new_arg[3] if len(
+            new_arg) == 4 else "Description is not provided."
+
+        self.com_g.add_arg(name, dtype, dvalue, desc)
+
+        return self
+
+    def __getattr__(self, name):
+        if name in self.arg_config:
+            return self.arg_config[name]
+
+        if name in self.json_config:
+            return self.json_config[name]
+
+        if name in self.yaml_config:
+            return self.yaml_config[name]
+
+        raise Warning("The argument %s is not defined." % name)
+
+    def Print(self):
+
+        print("-" * 70)
+        for name in self.arg_config:
+            print("%s:\t\t\t\t%s" % (str(name), str(self.arg_config[name])))
+
+        for name in self.json_config:
+            if name not in self.arg_config:
+                print("%s:\t\t\t\t%s" %
+                      (str(name), str(self.json_config[name])))
+
+        for name in self.yaml_config:
+            if name not in self.arg_config:
+                print("%s:\t\t\t\t%s" %
+                      (str(name), str(self.yaml_config[name])))
+
+        print("-" * 70)
+
+
+# 如果模型非常复杂，需要处理的 input 很多，那么建议使用一个类来管理输入。
+class InputField(object):
+    """
+    A high-level API for handling inputs in PaddlePaddle.
+    """
+
+    def __init__(self, input_slots=[]):
+
+        self.shapes = []
+        self.dtypes = []
+        self.names = []
+        self.lod_levels = []
+
+        self.input_slots = {}
+        self.feed_list_str = []
+        self.feed_list = []
+
+        self.reader = None
+
+        if input_slots:
+            for input_slot in input_slots:
+                self += input_slot
+
+    def __add__(self, input_slot):
+
+        if isinstance(input_slot, list) or isinstance(input_slot, tuple):
+            name = input_slot[0]
+            shape = input_slot[1]
+            dtype = input_slot[2]
+            lod_level = input_slot[3] if len(input_slot) == 4 else 0
+
+        if isinstance(input_slot, dict):
+            name = input_slot["name"]
+            shape = input_slot["shape"]
+            dtype = input_slot["dtype"]
+            lod_level = input_slot[
+                "lod_level"] if "lod_level" in input_slot else 0
+
+        self.shapes.append(shape)
+        self.dtypes.append(dtype)
+        self.names.append(name)
+        self.lod_levels.append(lod_level)
+
+        self.feed_list_str.append(name)
+
+        return self
+
+    def __getattr__(self, name):
+
+        if name not in self.input_slots:
+            raise Warning("the attr %s has not been defined yet." % name)
+            return None
+
+        return self.input_slots[name]
+
+    def build(self, build_pyreader=False, capacity=100, iterable=False):
+
+        for _name, _shape, _dtype, _lod_level in zip(
+                self.names, self.shapes, self.dtypes, self.lod_levels):
+            self.input_slots[_name] = fluid.layers.data(
+                name=_name, shape=_shape, dtype=_dtype, lod_level=_lod_level)
+
+        for name in self.feed_list_str:
+            self.feed_list.append(self.input_slots[name])
+
+        if build_pyreader:
+            self.reader = fluid.io.PyReader(
+                feed_list=self.feed_list, capacity=capacity, iterable=iterable)
+
+    def start(self, generator=None):
+
+        if generator is not None:
+            self.reader.decorate_batch_generator(generator)
+
+        self.reader.start()
+
+# 训练函数入口，args默认使用argparse
+def do_train(args):
+
+    train_prog = fluid.default_main_program()
+    startup_prog = fluid.default_startup_program()
+
+    with fluid.program_guard(train_prog, startup_prog):
+        train_prog.random_seed = args.random_seed
+        startup_prog.random_seed = args.random_seed
+
+        with fluid.unique_name.guard():
+
+            # define input and reader
+            # 对于比较简单的任务可以直接写 input_data = fluid.layers.data()
+
+            input_slots = [{
+                "name": "src_ids",
+                "shape": (-1, args.max_seq_len, 1),
+                "dtype": "int64"
+            }, 
+            ...
+            {
+                "name": "is_null_answer",
+                "shape": (-1, 1),
+                "dtype": "int64"
+            }]
+
+            input_field = InputField(input_slots)
+            input_field.build(build_pyreader=True)
+
+            # define the network
+            # 调用 XXX_net.py 声明网络定义，如果有多个网络，可以通过args进行分支判断
+            
+            loss = create_net(
+                is_training=True, model_input=input_field, args=args)
+            
+            # 如果使用了GC，则需要保留persistable的定义（for 1.5），1.6后移除
+            loss.persistable = True
+
+            # define the optimizer
+
+            optimizor = optimization(
+                loss=loss)
+
+    # prepare training
+
+    ## decorate the pyreader with batch_generator
+    ## 定义和装配reader，reader统一采用PyReader（注意并非py_reader）
+    input_field.reader.decorate_batch_generator(batch_generator)
+
+    ## define the executor and program for training
+    
+    # 根据args.use_cuda定义模型的place
+    # 后续如果框架支持自动适配place则不需加入
+    
+    if args.use_cuda:
+        place = fluid.CUDAPlace(0)
+    else:
+        place = fluid.CPUPlace()
+
+    exe = fluid.Executor(place)
+
+    exe.run(startup_prog)
+
+    assert (args.init_from_checkpoint == "") or (
+        args.init_from_pretrain_model == "")
+
+    ## init from some checkpoint, to resume the previous training
+    if args.init_from_checkpoint:
+        init_from_checkpoint(args, exe, train_prog)
+
+    ## init from some pretrain models, to better solve the current task
+    if args.init_from_pretrain_model:
+        init_from_pretrain_model(args, exe, train_prog)
+
+    # 统一通过 compile_program + with_data_parallel 开启GPU多卡和CPU多核训练
+    # 注意这里要区分一下CPU和GPU运行环境的配置，建议对每个场景都尽量能达到最优配置，防止用户吐槽
+    
+    build_strategy = fluid.compiler.BuildStrategy()
+    build_strategy.enable_inplace = True
+
+    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
+        loss_name=loss.name, build_strategy=build_strategy)
+
+    # start training
+
+    step = 0
+    for epoch_step in range(args.epoch):
+        input_field.reader.start()
+        while True:
+            try:
+
+                # this is for minimizing the fetching op, saving the training speed.
+                if step % args.print_step == 0:
+                    fetch_list = [loss.name]
+                else:
+                    fetch_list = []
+
+                output = exe.run(compiled_train_prog, fetch_list=fetch_list)
+
+                if step % args.print_step == 0:
+                    print("step: %d, loss: %.4f" % (step, np.sum(output[0])))
+
+                if step % args.save_step == 0 and step != 0:
+
+                    if args.save_checkpoint:
+                        save_checkpoint(args, exe, train_prog,
+                                        "step_" + str(step))
+
+                    if args.save_param:
+                        save_param(args, exe, train_prog, "step_" + str(step))
+
+                step += 1
+
+            except fluid.core.EOFException:
+                input_field.reader.reset()
+                break
+
+    if args.save_checkpoint:
+        save_checkpoint(args, exe, train_prog, "step_final")
+
+    if args.save_param:
+        save_param(args, exe, train_prog, "step_final")
+
+
+if __name__ == "__main__":
+    # 参数控制可以根据需求使用argparse，yaml或者json
+    
+    args = PDConfig(yaml_file="./data/config/squad1.yaml")
+    args.build()
+    args.Print()
+
+    do_train(args)
+    
+```
+predict.py 和 eval.py 的定义可以参考train.py进行。
+
+#### main.py书写示例
+
+```python
+#encoding=utf8
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+
+from train import do_train
+from predict import do_predict
+from eval import do_eval
+from inference_model import do_save_inference_model
+
+if __name__ == "__main__":
+
+    # 可选，载入预定义参数
+    args = PDConfig(yaml_file="./data/config/squad1.yaml")
+    args.build()
+    
+    # 打印参数，帮助用户提早发现模型中的问题
+    args.Print()
+
+    if args.do_train:
+        do_train(args)
+
+    if args.do_predict:
+        do_predict(args)
+
+    if args.do_eval:
+        do_eval(args)
+
+    if args.do_save_inference_model:
+        do_save_inference_model(args)
+
+
+```
+
+#### 整体需要注意的问题
+
+- 如果模型依赖paddlepaddle未涵盖的依赖（如 pandas），则需要在README中显示提示用户安装对应依赖。
+
+- reader部分统一采用PyReader(http://paddlepaddle.org/documentation/docs/zh/1.4/api_cn/io_cn.html#pyreader)
+  iterable=True or False还在验证中，目前不做具体要求。
+
+- executor统一采用exectuor + compile_program，不再使用parallel_executor
+
+- 参数控制使用双轨制：argparse + json （或者argparse + yaml），根据具体场景而定
+
+- 对于非常复杂的模型（输入数据很多（fluid.layers.data），如BERT），需要书写模型的人具体定义一个数据类，用于管理数据（可以参考PALM的实现方式）
+
+- create_model需要统一区分is_training，以便于处理训练和预测网络结构不同的情况
+
+- program_guard 和 unique_name.guard()是否需要显示写明，视具体任务而定，对于简单任务，可以不写，对于复杂任务推荐写明。 
+
+- 模型IO部分，区分以下场景：
+  - checkpoint，用于恢复训练，使用API save/load_persistable，save和load的时候需要手动指定文件后缀".pdckpt"
+  - 参数文件，用于进行预测或者保存预训练模型，使用API save/load_params，save的时候需要手动指定文件后缀".pdparams"，对load不做要求（为了适配过去的预训练模型）
+  - inference_model，用于保存部署模型，使用API save/load_inference_model，需要手动指定文件后缀为".pdparams"和".pdmodel"。
+
+- 显存优化，统一废弃 fluid.memory_optimize，统一改为开启GC，参见[**显存优化**](#显存优化)
+
+- 随机控制，需要尽量固定program，numpy等含有随机因素模块的随机种子，保证模型可以正常复现。
+
+- 超参数：模型内部超参数禁止写死，尽量都可以通过配置文件进行配置。
+
+
+### 命名规范和使用规范
+1. 文件和文件夹命名中，尽量使用下划线\'_ \'代表空格，不要使用'-'
+2. 模型定义过程中，需要有一个统一的变量（parameter）命名管理手段，如尽量手动声明每个变量的名字并支持名称可变，禁止将名称定义为一个常数（如"embedding"），避免用户在复用代码阶段出现各种诡异的问题。
+3. 重要文件，变量的名称定义过程中需要能够通过名字表明含义，禁止使用含混不清的名称，如net.py, aaa.py等。
+4. 在代码中定义path时，需要使用os.path.join完成，禁止使用string加的方式，导致模型对windows环境缺乏支持。
+5. 其他代码规范需要遵守paddlepaddle python规范。
+
+### README
+
+```
+写在前面
+
+（1） 文档用.md格式撰写，请在github上对应模型的目录下将文档命名为README.md，默认为中文；如果需要添加英文请使用README_en.md文件，并在README.md中添加"English"超链接。
+
+（2） 如果文档中包含图片，请单独建立images子目录并将图片放置在该目录下。
+
+（3） 请使用正确的语法和句法撰写文档。如果提交的是英文文档，请尽量使用简单句描述，减少长难句和从句，推荐用grammarly检查过英文语法后再提交。
+
+（4） 写文档的目的是为用户提供使用说明书，如果一个模型/产品做的很棒，但说明书写的不全/表述不清，用户理解起来会比较困难。请大家认真对待文档工作。
+
+
+
+——————以下为Models文档撰写标准——————
+
+# 模型名称
+
+## 目录（可选）
+* [模型简介](#模型简介)
+* [快速开始](#快速开始)
+* [进阶使用](#进阶使用)
+* [参考论文](#参考论文)
+* [版本更新](#版本更新)
+* [作者](#作者)
+
+## 1、模型简介
+
+简要介绍本算法，这是什么，有什么用，发布了哪些内容，对比竞品有哪些亮点。
+
+
+
+## 2、快速开始
+
+### 安装说明
+
+声明该模型对PaddlePaddle版本的要求；python版本要求
+
+### 任务简介
+
+任务介绍，比如Transformer选用wmt2016任务
+
+### 数据准备
+
+自定义数据：数据格式说明和如何使用自定义数据
+
+公开数据集：1-3句概要介绍，提供公开数据集百度云的下载链接，数据目录结构简要介绍，如有接口封装可简要介绍
+
+建议提供一键式的数据下载、预处理脚本
+
+
+
+### 单机训练
+
+在PaddlePaddle上是如何启动单机多卡训练模型的，介绍训练命令，给出训练结果
+
+模型输出统一设为该模型所在的目录。需要写清楚关键的步骤，并附上代码
+
+### 模型预估
+
+模型评估方式简要介绍
+
+模型Benchmarks
+实验配置 任务、数据规模、参数设置、机器配置、cuda、cudnn版本
+实验结果 精度、训练速度、显存占用、训练时长
+
+### 模型推断
+
+在PaddlePaddle上是如何启动模型推断，介绍推断命令，给出推断结果，如有可视化结果需要给出可视化结果。
+
+实验配置 任务、数据规模、参数设置、机器配置、cuda、cudnn版本
+实验结果 精度、预测速度
+
+### 预训练模型
+
+要求：不同数据集、不同配置的预训练模型介绍及下载地址
+
+### 服务部署
+
+简要介绍帮助用户在自己产品中使用训练好的模型
+
+1）服务器部署
+
+如何在服务器端进行部署使用，通用操作参考预测部署文档，特殊操作在这里说明
+
+2）模型压缩（可选）
+
+如何做模型压缩
+
+3）移动端部署（可选）
+
+如何使用Paddle-Mobile进行移动端部署
+
+
+
+## 3、进阶使用
+
+### 背景介绍
+
+“背景介绍”的写法可参考一般学术论文中的概览
+
+可参考以下结构：
+
+处理xxx问题，传统的做法是xxx，但存在缺点，如xxxx。在本模型中，用了xxx方法，它的特点是xxx，和传统方法相比，优势是xxx等。
+
+也可以根据实际情况自由组织语言。
+
+###模型概览
+
+模型3-5句概要介绍，模型的网络结构简要介绍，附上结构图。
+
+### 模型特点 / 关键概念
+
+a. 图文结合展开介绍本算法的原理、网络结构详解、算法先进性等，突出亮点。
+
+b. 图文结合展开介绍训练阶段、预测阶段做的特色优化。
+
+### 分布式训练（可选）
+
+在PaddlePaddle上是如何启动分布式训练模型的，介绍训练命令，给出训练结果
+
+模型输出统一设为该模型所在的目录。需要写清楚关键的步骤，并附上代码
+
+### 增量训练（可选）
+
+要求：简要介绍如何使用用户自有数据训练、如何在预训练模型上进性Fine-Tuning
+
+## FAQ
+
+常见问题及解答
+
+
+
+## 参考论文
+
+参考论文列表
+
+
+
+## 版本更新
+
+重要历史版本更新信息（重要的功能）
+
+
+
+## 作者
+
+模型建设参与团队与个人介绍
+
+
+
+## 如何贡献代码
+
+如果你可以修复某个issue或者增加一个新功能，欢迎给我们提交PR。如果对应的PR被接受了，我们将根据贡献的质量和难度进行打分（0-5分，越高越好）。如果你累计获得了10分，可以联系我们获得面试机会或者为你写推荐信。
+```
+
+### 多环境支持
+
+- 对于安装paddlepaddle-gpu的用户：
+  
+    训练命令：在run.sh中，给出GPU单卡，GPU多卡，CPU多核的训练命令样例，供用户选择
+    
+    预测/评估命令：在run.sh中，给出GPU单卡，CPU单核的预测命令样例，供用户选择
+
+- 对于安装paddlepaddle的用户（CPU版本）：
+
+  如果用户安装了CPU版本的paddle，但是执行了使用GPU的命令，则通过框架报错的方式提示用户使用CPU命令进行训练和预测。
+  
+  同时，模型的开发人员也要通过fluid.is_compiled_with_cuda()接口，判断安装paddle的类型（GPU or CPU），如果用户使用CPU版本paddle但是指定了CUDAPlace，模型需要给出warning，引导用户修改配置或者安装GPU版本paddle，并自动退出。（参考：https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/models/model_check.py）
+  
+  如果模型库提供的模型不支持CPU环境运行或者在CPU环境运行会有致命缺陷（如速度慢到无法接受），则需要在README显著位置提示用户安装paddle-GPU。
+
+- 硬件和操作系统环境： 请参考：http://agroup.baidu.com/paddlepaddle/view/office/1860862
+
+### 注释和Licenses
+对于代码中重要的部分，包括但不仅限于：
+- Reader的定义。
+- Data Processer 和 Data Generator的定义。
+- 整个模型定义，包括input，运算过程，loss等内容。
+- init，save，load，等io部分
+- 运行中间的关键状态，如print loss，save model等。
+
+对于整个模型代码，都需要在文件头内加入licenses，readme中加入licenses标识。
+
+需要加入注释介绍功能，帮助用户快速熟悉代码结构。
+
+## 建议规范
+
+以下为建议规则，不强制要求，但是建议模型库使用。
+
+### 显存优化
+
+对于过去使用或者需要使用显存优化功能的模型，建议使用GC开启显存优化，开启GC的最佳方法为：
+
+```shell
+
+设置环境变量：FLAGS_eager_delete_tensor_gb=0.0
+
+设置build_strategy.enable_inplace = True
+
+对需要fetch的vars: 设置 persistable = True
+
+```
+
+
+## 合入规则
+
+### 代码提交
+
+请遵守 [paddle官方代码提交规则](http://paddlepaddle.org/documentation/docs/zh/1.4/advanced_usage/development/contribute_to_paddle/index_cn.html)提交代码。
+
+
+## 升级维护
+
+（1） 官方库进入CE，部分核心模型进入CI单测。
+
+（2） 原则上，Paddle只维护官方库，对于非官方库（如一些基于paddle的paper work），由具体业务方负责。
+
+（3） 框架如有重大更新（包括reader，executor，save/load等），建议先在官方模型下回归一下效果，对比最终效果diff，效率diff，易用性（缩减的代码行数）等内容，用于指导发布和上线。
+
diff --git a/0.coding_guideline/appendix/README.MD b/0.coding_guideline/appendix/README.MD
new file mode 100644
index 0000000000000000000000000000000000000000..ed1e5e63cf9d064cc1da1b8c0ebff1c05fccec18
--- /dev/null
+++ b/0.coding_guideline/appendix/README.MD
@@ -0,0 +1 @@
+Stores some images here.
diff --git a/0.coding_guideline/appendix/dir-1.png b/0.coding_guideline/appendix/dir-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..05cfa4167bfedc83c327a9b49f8eb034b101f0e5
Binary files /dev/null and b/0.coding_guideline/appendix/dir-1.png differ
diff --git a/0.coding_guideline/appendix/dir-2.png b/0.coding_guideline/appendix/dir-2.png
new file mode 100644
index 0000000000000000000000000000000000000000..837ce1f7ace7f5bdc1d797ac9b48ee20255d6b7f
Binary files /dev/null and b/0.coding_guideline/appendix/dir-2.png differ
diff --git a/0.coding_guideline/appendix/dir-3.png b/0.coding_guideline/appendix/dir-3.png
new file mode 100644
index 0000000000000000000000000000000000000000..f9431cfb7484a9d1d27bfb10dea2541955e571c7
Binary files /dev/null and b/0.coding_guideline/appendix/dir-3.png differ
diff --git a/0.coding_guideline/appendix/dir-4.png b/0.coding_guideline/appendix/dir-4.png
new file mode 100644
index 0000000000000000000000000000000000000000..fc5ddb8069402d6b1e28360c5285b37018795f6a
Binary files /dev/null and b/0.coding_guideline/appendix/dir-4.png differ
diff --git a/0.coding_guideline/appendix/dir-5.png b/0.coding_guideline/appendix/dir-5.png
new file mode 100644
index 0000000000000000000000000000000000000000..1182ecfa5b249c0c10f66d2c497b3d4262fb74fa
Binary files /dev/null and b/0.coding_guideline/appendix/dir-5.png differ
diff --git a/0.coding_guideline/appendix/readme.png b/0.coding_guideline/appendix/readme.png
new file mode 100644
index 0000000000000000000000000000000000000000..b0b257a237fd6ac9e5731d257cbffffe3921326c
Binary files /dev/null and b/0.coding_guideline/appendix/readme.png differ
diff --git a/01.fit_a_line/README.cn.md b/01.fit_a_line/README.cn.md
index e95337d76003269caa27786500e32b91e3a53a21..715bc98e5dcbbcfb340478c5de8d448fa4d4cc55 100644
--- a/01.fit_a_line/README.cn.md
+++ b/01.fit_a_line/README.cn.md
@@ -154,52 +154,48 @@ test_reader = paddle.batch(
         batch_size=BATCH_SIZE)
 ```
 
-如果想直接从txt文件中读取数据的话，可以参考以下方式。
-
+如果想直接从txt文件中读取数据的话，可以参考以下方式(需要自行准备txt文件)。
+```text
 feature_names = [
     'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
     'PTRATIO', 'B', 'LSTAT', 'convert'
 ]
-
 feature_num = len(feature_names)
-
 data = numpy.fromfile(filename, sep=' ') # 从文件中读取原始数据
-
 data = data.reshape(data.shape[0] // feature_num, feature_num)
-
 maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(axis=0)/data.shape[0]
 
 for i in six.moves.range(feature_num-1):
- data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) # six.moves可以兼容python2和python3
+   data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) # six.moves可以兼容python2和python3
 
 ratio = 0.8 # 训练集和验证集的划分比例
-
 offset = int(data.shape[0]*ratio)
-
 train_data = data[:offset]
-
 test_data = data[offset:]
 
-def reader(data):
-    for d in train_data:
-        yield d[:1], d[-1:]
+def reader_creator(train_data):  
+    def reader():  
+        for d in train_data:  
+            yield d[:-1], d[-1:]  
+    return reader
 
 train_reader = paddle.batch(
     paddle.reader.shuffle(
-        reader(train_data), buf_size=500),
+        reader_creator(train_data), buf_size=500),
         batch_size=BATCH_SIZE)
 
 test_reader = paddle.batch(
     paddle.reader.shuffle(
-        reader(test_data), buf_size=500),
+        reader_creator(test_data), buf_size=500),
         batch_size=BATCH_SIZE)
+```
 
 ### 配置训练程序
 训练程序的目的是定义一个训练模型的网络结构。对于线性回归来讲，它就是一个从输入到输出的简单的全连接层。更加复杂的结果，比如卷积神经网络，递归神经网络等会在随后的章节中介绍。训练程序必须返回`平均损失`作为第一个返回值，因为它会被后面反向传播算法所用到。
 
 ```python
-x = fluid.layers.data(name='x', shape=[13], dtype='float32') # 定义输入的形状和数据类型
-y = fluid.layers.data(name='y', shape=[1], dtype='float32') # 定义输出的形状和数据类型
+x = fluid.data(name='x', shape=[-1, 13], dtype='float32') # 定义输入的形状和数据类型
+y = fluid.data(name='y', shape=[-1, 1], dtype='float32') # 定义输出的形状和数据类型
 y_predict = fluid.layers.fc(input=x, size=1, act=None) # 连接输入和输出的全连接层
 
 main_program = fluid.default_main_program() # 获取默认/全局主函数
diff --git a/01.fit_a_line/README.md b/01.fit_a_line/README.md
index f8a6e8ee27dbff873f907776d824db26e5512167..29fdc1a6c72c4f3da0939688359fb98b296f87f6 100644
--- a/01.fit_a_line/README.md
+++ b/01.fit_a_line/README.md
@@ -156,48 +156,48 @@ test_reader = paddle.batch(
         batch_size=BATCH_SIZE)
 ```
 
-If you want to read data directly from \*.txt file, you can refer to the method as follows.
-
+If you want to read data directly from \*.txt file, you can refer to the method as follows(need to prepare txt file by yourself).
+```text
 feature_names = [
     'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
     'PTRATIO', 'B', 'LSTAT', 'convert'
 ]
-
 feature_num = len(feature_names)
-
 data = numpy.fromfile(filename, sep=' ') # Read primary data from file
-
 data = data.reshape(data.shape[0] // feature_num, feature_num)
-
 maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(axis=0)/data.shape[0]
 
 for i in six.moves.range(feature_num-1):
- data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) # six.moves is compatible to python2 and python3
+   data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) # six.moves is compatible to python2 and python3
 
 ratio = 0.8 # distribution ratio of train dataset and verification dataset
-
 offset = int(data.shape[0]\*ratio)
-
 train_data = data[:offset]
-
 test_data = data[offset:]
 
+def reader_creator(train_data):  
+    def reader():  
+        for d in train_data:  
+            yield d[:-1], d[-1:]  
+    return reader
+
 train_reader = paddle.batch(
     paddle.reader.shuffle(
-        train_data, buf_size=500),
+        reader_creator(train_data), buf_size=500),
         batch_size=BATCH_SIZE)
 
 test_reader = paddle.batch(
     paddle.reader.shuffle(
-        test_data, buf_size=500),
+        reader_creator(test_data), buf_size=500),
         batch_size=BATCH_SIZE)
+```
 
 ### Configure Program for Training
 The aim of the program for training is to define a network structure of a training model. For linear regression, it is a simple fully connected layer from input to output. More complex result, such as Convolutional Neural Network and Recurrent Neural Network, will be introduced in later chapters. It must return `mean error` as the first return value in program for training, for that `mean error` will be used for BackPropagation.
 
 ```python
-x = fluid.layers.data(name='x', shape=[13], dtype='float32') # define shape and data type of input
-y = fluid.layers.data(name='y', shape=[1], dtype='float32') # define shape and data type of output
+x = fluid.data(name='x', shape=[-1, 13], dtype='float32') # define shape and data type of input
+y = fluid.data(name='y', shape=[-1, 1], dtype='float32') # define shape and data type of output
 y_predict = fluid.layers.fc(input=x, size=1, act=None) # fully connected layer connecting input and output
 
 main_program = fluid.default_main_program() # get default/global main function
diff --git a/01.fit_a_line/index.cn.html b/01.fit_a_line/index.cn.html
index 4c99b3e552f2bbaf0751f6a1f2034232dbfd26fa..40d91286ee06e375b056bdea95e5a2bbf5a43328 100644
--- a/01.fit_a_line/index.cn.html
+++ b/01.fit_a_line/index.cn.html
@@ -196,52 +196,48 @@ test_reader = paddle.batch(
         batch_size=BATCH_SIZE)
 ```
 
-如果想直接从txt文件中读取数据的话，可以参考以下方式。
-
+如果想直接从txt文件中读取数据的话，可以参考以下方式(需要自行准备txt文件)。
+```text
 feature_names = [
     'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
     'PTRATIO', 'B', 'LSTAT', 'convert'
 ]
-
 feature_num = len(feature_names)
-
 data = numpy.fromfile(filename, sep=' ') # 从文件中读取原始数据
-
 data = data.reshape(data.shape[0] // feature_num, feature_num)
-
 maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(axis=0)/data.shape[0]
 
 for i in six.moves.range(feature_num-1):
- data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) # six.moves可以兼容python2和python3
+   data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) # six.moves可以兼容python2和python3
 
 ratio = 0.8 # 训练集和验证集的划分比例
-
 offset = int(data.shape[0]*ratio)
-
 train_data = data[:offset]
-
 test_data = data[offset:]
 
-def reader(data):
-    for d in train_data:
-        yield d[:1], d[-1:]
+def reader_creator(train_data):  
+    def reader():  
+        for d in train_data:  
+            yield d[:-1], d[-1:]  
+    return reader
 
 train_reader = paddle.batch(
     paddle.reader.shuffle(
-        reader(train_data), buf_size=500),
+        reader_creator(train_data), buf_size=500),
         batch_size=BATCH_SIZE)
 
 test_reader = paddle.batch(
     paddle.reader.shuffle(
-        reader(test_data), buf_size=500),
+        reader_creator(test_data), buf_size=500),
         batch_size=BATCH_SIZE)
+```
 
 ### 配置训练程序
 训练程序的目的是定义一个训练模型的网络结构。对于线性回归来讲，它就是一个从输入到输出的简单的全连接层。更加复杂的结果，比如卷积神经网络，递归神经网络等会在随后的章节中介绍。训练程序必须返回`平均损失`作为第一个返回值，因为它会被后面反向传播算法所用到。
 
 ```python
-x = fluid.layers.data(name='x', shape=[13], dtype='float32') # 定义输入的形状和数据类型
-y = fluid.layers.data(name='y', shape=[1], dtype='float32') # 定义输出的形状和数据类型
+x = fluid.data(name='x', shape=[-1, 13], dtype='float32') # 定义输入的形状和数据类型
+y = fluid.data(name='y', shape=[-1, 1], dtype='float32') # 定义输出的形状和数据类型
 y_predict = fluid.layers.fc(input=x, size=1, act=None) # 连接输入和输出的全连接层
 
 main_program = fluid.default_main_program() # 获取默认/全局主函数
diff --git a/01.fit_a_line/index.html b/01.fit_a_line/index.html
index 5c29f8ed37f0159ea935da3684715ed8975e812d..184e8253bda3711afe6f3f2d60fc21aab2bcdb1c 100644
--- a/01.fit_a_line/index.html
+++ b/01.fit_a_line/index.html
@@ -198,48 +198,48 @@ test_reader = paddle.batch(
         batch_size=BATCH_SIZE)
 ```
 
-If you want to read data directly from \*.txt file, you can refer to the method as follows.
-
+If you want to read data directly from \*.txt file, you can refer to the method as follows(need to prepare txt file by yourself).
+```text
 feature_names = [
     'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX',
     'PTRATIO', 'B', 'LSTAT', 'convert'
 ]
-
 feature_num = len(feature_names)
-
 data = numpy.fromfile(filename, sep=' ') # Read primary data from file
-
 data = data.reshape(data.shape[0] // feature_num, feature_num)
-
 maximums, minimums, avgs = data.max(axis=0), data.min(axis=0), data.sum(axis=0)/data.shape[0]
 
 for i in six.moves.range(feature_num-1):
- data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) # six.moves is compatible to python2 and python3
+   data[:, i] = (data[:, i] - avgs[i]) / (maximums[i] - minimums[i]) # six.moves is compatible to python2 and python3
 
 ratio = 0.8 # distribution ratio of train dataset and verification dataset
-
 offset = int(data.shape[0]\*ratio)
-
 train_data = data[:offset]
-
 test_data = data[offset:]
 
+def reader_creator(train_data):  
+    def reader():  
+        for d in train_data:  
+            yield d[:-1], d[-1:]  
+    return reader
+
 train_reader = paddle.batch(
     paddle.reader.shuffle(
-        train_data, buf_size=500),
+        reader_creator(train_data), buf_size=500),
         batch_size=BATCH_SIZE)
 
 test_reader = paddle.batch(
     paddle.reader.shuffle(
-        test_data, buf_size=500),
+        reader_creator(test_data), buf_size=500),
         batch_size=BATCH_SIZE)
+```
 
 ### Configure Program for Training
 The aim of the program for training is to define a network structure of a training model. For linear regression, it is a simple fully connected layer from input to output. More complex result, such as Convolutional Neural Network and Recurrent Neural Network, will be introduced in later chapters. It must return `mean error` as the first return value in program for training, for that `mean error` will be used for BackPropagation.
 
 ```python
-x = fluid.layers.data(name='x', shape=[13], dtype='float32') # define shape and data type of input
-y = fluid.layers.data(name='y', shape=[1], dtype='float32') # define shape and data type of output
+x = fluid.data(name='x', shape=[-1, 13], dtype='float32') # define shape and data type of input
+y = fluid.data(name='y', shape=[-1, 1], dtype='float32') # define shape and data type of output
 y_predict = fluid.layers.fc(input=x, size=1, act=None) # fully connected layer connecting input and output
 
 main_program = fluid.default_main_program() # get default/global main function
diff --git a/01.fit_a_line/train.py b/01.fit_a_line/train.py
index b2c21574f622a8ce0403454710cd8b90948ac779..2ebe21e6fd9319c18369d177684ed62186bedd82 100644
--- a/01.fit_a_line/train.py
+++ b/01.fit_a_line/train.py
@@ -87,8 +87,8 @@ def main():
             batch_size=batch_size)
 
     # feature vector of length 13
-    x = fluid.layers.data(name='x', shape=[13], dtype='float32')
-    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+    x = fluid.data(name='x', shape=[-1, 13], dtype='float32')
+    y = fluid.data(name='y', shape=[-1, 1], dtype='float32')
 
     main_program = fluid.default_main_program()
     startup_program = fluid.default_startup_program()
diff --git a/02.recognize_digits/README.cn.md b/02.recognize_digits/README.cn.md
index 33b528fefd05ab3d8f58be909378e719596b3ca8..31c2adbb6b27fd53179643af099f336d713d3483 100644
--- a/02.recognize_digits/README.cn.md
+++ b/02.recognize_digits/README.cn.md
@@ -195,7 +195,7 @@ import paddle.fluid as fluid
 ### Program Functions 配置
 
 我们需要设置 `inference_program` 函数。我们想用这个程序来演示三个不同的分类器，每个分类器都定义为 Python 函数。
-我们需要将图像数据输入到分类器中。Paddle 为读取数据提供了一个特殊的层 `layer.data` 层。
+我们需要将图像数据输入到分类器中。Paddle 为读取数据提供了一个特殊的层 `fluid.data` 层。
 让我们创建一个数据层来读取图像并将其连接到分类网络。
 
 - Softmax回归：只通过一层简单的以softmax为激活函数的全连接层，就可以得到分类的结果。
@@ -209,7 +209,7 @@ def softmax_regression():
         predict_image -- 分类的结果
     """
     # 输入的原始图像数据，大小为28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # 以softmax为激活函数的全连接层，输出层的大小必须为数字的个数10
     predict = fluid.layers.fc(
         input=img, size=10, act='softmax')
@@ -229,7 +229,7 @@ def multilayer_perceptron():
         predict_image -- 分类的结果
     """
     # 输入的原始图像数据，大小为28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # 第一个全连接层，激活函数为ReLU
     hidden = fluid.layers.fc(input=img, size=200, act='relu')
     # 第二个全连接层，激活函数为ReLU
@@ -251,7 +251,7 @@ def convolutional_neural_network():
         predict -- 分类的结果
     """
     # 输入的原始图像数据，大小为28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # 第一个卷积-池化层
     # 使用20个5*5的滤波器，池化大小为2，池化步长为2，激活函数为Relu
     conv_pool_1 = fluid.nets.simple_img_conv_pool(
@@ -296,7 +296,7 @@ def train_program():
 
     """
     # 标签层，名称为label,对应输入图片的类别标签
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
 
     # predict = softmax_regression() # 取消注释将使用 Softmax回归
     # predict = multilayer_perceptron() # 取消注释将使用 多层感知器
diff --git a/02.recognize_digits/README.md b/02.recognize_digits/README.md
index b0832f537c0dc1da49a66dc023f5fc7aa17f0196..b3ca3c86f4b300841a6e0df7733453e0762c27a6 100644
--- a/02.recognize_digits/README.md
+++ b/02.recognize_digits/README.md
@@ -174,7 +174,7 @@ import paddle.fluid as fluid
 ### Program Functions Configuration
 
 We need to configure `inference_program` function. We want to use this program to show three different classifiers, each of which is defined as a Python function.
-We need to input the image data into the classifier. Paddle provides a special layer `layer.data` for reading data.
+We need to input the image data into the classifier. Paddle provides a special layer `fluid.data` for reading data.
 Let's create a data layer to read the image and connect it to the network of classification.
 
 -Softmax regression: The results of the classification can be obtained only through a simple layer of simple fully connected layer with softmax as the activation function.
@@ -188,7 +188,7 @@ def softmax_regression():
     predict_image -- result of classification
     """
     # input original image data in size of 28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # With softmax as the fully connected layer of the activation function, the size of the output layer must be 10
     predict = fluid.layers.fc(
     input=img, size=10, act='softmax')
@@ -208,7 +208,7 @@ def multilayer_perceptron():
     predict_image -- result of classification
     """
     # input raw image data in size of 28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # the first fully connected layer, whose activation function is ReLU
     hidden = fluid.layers.fc(input=img, size=200, act='relu')
     # the second fully connected layer, whose activation function is ReLU
@@ -230,7 +230,7 @@ def convolutional_neural_network():
     predict -- result of classification
     """
     # input raw image data in size of 28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # the first convolution-pooling layer
     # Use 20 5*5 filters, the pooling size is 2, the pooling step is 2, and the activation function is Relu.
     conv_pool_1 = fluid.nets.simple_img_conv_pool(
@@ -275,7 +275,7 @@ def train_program():
 
     """
     # label layer, called label, correspondent with label category of input picture
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
 
     # predict = softmax_regression() # cancel note and run Softmax regression
     # predict = multilayer_perceptron() # cancel note and run multiple perceptron
diff --git a/02.recognize_digits/index.cn.html b/02.recognize_digits/index.cn.html
index 5733f2db12e1f20d629becfcd95ac8c4ef3cafb4..328815220ae2521fdaf91e7bb4cf3ef3aa29698f 100644
--- a/02.recognize_digits/index.cn.html
+++ b/02.recognize_digits/index.cn.html
@@ -237,7 +237,7 @@ import paddle.fluid as fluid
 ### Program Functions 配置
 
 我们需要设置 `inference_program` 函数。我们想用这个程序来演示三个不同的分类器，每个分类器都定义为 Python 函数。
-我们需要将图像数据输入到分类器中。Paddle 为读取数据提供了一个特殊的层 `layer.data` 层。
+我们需要将图像数据输入到分类器中。Paddle 为读取数据提供了一个特殊的层 `fluid.data` 层。
 让我们创建一个数据层来读取图像并将其连接到分类网络。
 
 - Softmax回归：只通过一层简单的以softmax为激活函数的全连接层，就可以得到分类的结果。
@@ -251,7 +251,7 @@ def softmax_regression():
         predict_image -- 分类的结果
     """
     # 输入的原始图像数据，大小为28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # 以softmax为激活函数的全连接层，输出层的大小必须为数字的个数10
     predict = fluid.layers.fc(
         input=img, size=10, act='softmax')
@@ -271,7 +271,7 @@ def multilayer_perceptron():
         predict_image -- 分类的结果
     """
     # 输入的原始图像数据，大小为28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # 第一个全连接层，激活函数为ReLU
     hidden = fluid.layers.fc(input=img, size=200, act='relu')
     # 第二个全连接层，激活函数为ReLU
@@ -293,7 +293,7 @@ def convolutional_neural_network():
         predict -- 分类的结果
     """
     # 输入的原始图像数据，大小为28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # 第一个卷积-池化层
     # 使用20个5*5的滤波器，池化大小为2，池化步长为2，激活函数为Relu
     conv_pool_1 = fluid.nets.simple_img_conv_pool(
@@ -338,7 +338,7 @@ def train_program():
 
     """
     # 标签层，名称为label,对应输入图片的类别标签
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
 
     # predict = softmax_regression() # 取消注释将使用 Softmax回归
     # predict = multilayer_perceptron() # 取消注释将使用 多层感知器
diff --git a/02.recognize_digits/index.html b/02.recognize_digits/index.html
index 44c80a1644a2bc3f1f06c1eaab95221354d45003..9ab5b1eccc9c35a1364b486ede01571d780d7c4e 100644
--- a/02.recognize_digits/index.html
+++ b/02.recognize_digits/index.html
@@ -216,7 +216,7 @@ import paddle.fluid as fluid
 ### Program Functions Configuration
 
 We need to configure `inference_program` function. We want to use this program to show three different classifiers, each of which is defined as a Python function.
-We need to input the image data into the classifier. Paddle provides a special layer `layer.data` for reading data.
+We need to input the image data into the classifier. Paddle provides a special layer `fluid.data` for reading data.
 Let's create a data layer to read the image and connect it to the network of classification.
 
 -Softmax regression: The results of the classification can be obtained only through a simple layer of simple fully connected layer with softmax as the activation function.
@@ -230,7 +230,7 @@ def softmax_regression():
     predict_image -- result of classification
     """
     # input original image data in size of 28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # With softmax as the fully connected layer of the activation function, the size of the output layer must be 10
     predict = fluid.layers.fc(
     input=img, size=10, act='softmax')
@@ -250,7 +250,7 @@ def multilayer_perceptron():
     predict_image -- result of classification
     """
     # input raw image data in size of 28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # the first fully connected layer, whose activation function is ReLU
     hidden = fluid.layers.fc(input=img, size=200, act='relu')
     # the second fully connected layer, whose activation function is ReLU
@@ -272,7 +272,7 @@ def convolutional_neural_network():
     predict -- result of classification
     """
     # input raw image data in size of 28*28*1
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
     # the first convolution-pooling layer
     # Use 20 5*5 filters, the pooling size is 2, the pooling step is 2, and the activation function is Relu.
     conv_pool_1 = fluid.nets.simple_img_conv_pool(
@@ -317,7 +317,7 @@ def train_program():
 
     """
     # label layer, called label, correspondent with label category of input picture
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
 
     # predict = softmax_regression() # cancel note and run Softmax regression
     # predict = multilayer_perceptron() # cancel note and run multiple perceptron
diff --git a/02.recognize_digits/train.py b/02.recognize_digits/train.py
index 04fbc29cc74b69b1bfa513ebbc4725a9c20761df..27cb3c72ed20a63993c5fc1c11cd222e5d72b5ef 100644
--- a/02.recognize_digits/train.py
+++ b/02.recognize_digits/train.py
@@ -101,8 +101,8 @@ def train(nn_type,
         test_reader = paddle.batch(
             paddle.dataset.mnist.test(), batch_size=BATCH_SIZE)
 
-    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    img = fluid.data(name='img', shape=[-1, 1, 28, 28], dtype='float32')
+    label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
 
     if nn_type == 'softmax_regression':
         net_conf = softmax_regression
diff --git a/03.image_classification/README.cn.md b/03.image_classification/README.cn.md
index 35035a7dd20446de8adaddddc8623477b827ca98..d4fcef9cf0dcf44f8cab19916d36ee06483fce01 100644
--- a/03.image_classification/README.cn.md
+++ b/03.image_classification/README.cn.md
@@ -314,8 +314,8 @@ def resnet_cifar10(ipt, depth=32):
 ```python
 def inference_program():
     # The image is 32 * 32 with RGB representation.
-    data_shape = [3, 32, 32]
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    data_shape = [None, 3, 32, 32]
+    images = fluid.data(name='pixel', shape=data_shape, dtype='float32')
 
     predict = resnet_cifar10(images, 32)
     # predict = vgg_bn_drop(images) # un-comment to use vgg net
@@ -326,7 +326,7 @@ def inference_program():
 
 然后我们需要设置训练程序 `train_program`。它首先从推理程序中进行预测。
 在训练期间，它将从预测中计算 `avg_cost`。
-在有监督训练中需要输入图像对应的类别信息，同样通过`fluid.layers.data`来定义。训练中采用多类交叉熵作为损失函数，并作为网络的输出，预测阶段定义网络的输出为分类器得到的概率信息。
+在有监督训练中需要输入图像对应的类别信息，同样通过`fluid.data`来定义。训练中采用多类交叉熵作为损失函数，并作为网络的输出，预测阶段定义网络的输出为分类器得到的概率信息。
 
 **注意:** 训练程序应该返回一个数组，第一个返回参数必须是 `avg_cost`。训练器使用它来计算梯度。
 
@@ -334,7 +334,7 @@ def inference_program():
 def train_program():
     predict = inference_program()
 
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    label = fluid.data(name='label', shape=[None, 1], dtype='int64')
     cost = fluid.layers.cross_entropy(input=predict, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=predict, label=label)
diff --git a/03.image_classification/README.md b/03.image_classification/README.md
index fa05900a23c82bd00a1b06f8175d22ecfd4c4d15..4704a4baea4b87f846253de4001e5494e702c34a 100644
--- a/03.image_classification/README.md
+++ b/03.image_classification/README.md
@@ -305,13 +305,13 @@ def resnet_cifar10(ipt, depth=32):
 
 ## Inference Program Configuration
 
-The input to the network is defined as `fluid.layers.data` , corresponding to image pixels in the context of image classification. The images in CIFAR10 are 32x32 coloured images with three channels. Therefore, the size of the input data is 3072 (3x32x32).
+The input to the network is defined as `fluid.data` , corresponding to image pixels in the context of image classification. The images in CIFAR10 are 32x32 coloured images with three channels. Therefore, the size of the input data is 3072 (3x32x32).
 
 ```python
 def inference_program():
     # The image is 32 * 32 with RGB representation.
-    data_shape = [3, 32, 32]
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    data_shape = [None, 3, 32, 32]
+    images = fluid.data(name='pixel', shape=data_shape, dtype='float32')
 
     predict = resnet_cifar10(images, 32)
     # predict = vgg_bn_drop(images) # un-comment to use vgg net
@@ -322,7 +322,7 @@ def inference_program():
 Then we need to set up the the `train_program`. It takes the prediction from the inference_program first.
 During the training, it will calculate the `avg_loss` from the prediction.
 
-In the context of supervised learning, labels of training images are defined in `fluid.layers.data` as well. During training, the multi-class cross-entropy is used as the loss function and becomes the output of the network. During testing, the outputs are the probabilities calculated in the classifier.
+In the context of supervised learning, labels of training images are defined in `fluid.data` as well. During training, the multi-class cross-entropy is used as the loss function and becomes the output of the network. During testing, the outputs are the probabilities calculated in the classifier.
 
 **NOTE:** A training program should return an array and the first returned argument has to be `avg_cost` .
 The trainer always uses it to calculate the gradients.
@@ -331,7 +331,7 @@ The trainer always uses it to calculate the gradients.
 def train_program():
     predict = inference_program()
 
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    label = fluid.data(name='label', shape=[None, 1], dtype='int64')
     cost = fluid.layers.cross_entropy(input=predict, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=predict, label=label)
diff --git a/03.image_classification/index.cn.html b/03.image_classification/index.cn.html
index 910a837cafaf944fb24f67d7fefce2ddfab3a23d..a0d6d2231e04f0ff2b4431c1e151a8d9172b1225 100644
--- a/03.image_classification/index.cn.html
+++ b/03.image_classification/index.cn.html
@@ -356,8 +356,8 @@ def resnet_cifar10(ipt, depth=32):
 ```python
 def inference_program():
     # The image is 32 * 32 with RGB representation.
-    data_shape = [3, 32, 32]
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    data_shape = [None, 3, 32, 32]
+    images = fluid.data(name='pixel', shape=data_shape, dtype='float32')
 
     predict = resnet_cifar10(images, 32)
     # predict = vgg_bn_drop(images) # un-comment to use vgg net
@@ -368,7 +368,7 @@ def inference_program():
 
 然后我们需要设置训练程序 `train_program`。它首先从推理程序中进行预测。
 在训练期间，它将从预测中计算 `avg_cost`。
-在有监督训练中需要输入图像对应的类别信息，同样通过`fluid.layers.data`来定义。训练中采用多类交叉熵作为损失函数，并作为网络的输出，预测阶段定义网络的输出为分类器得到的概率信息。
+在有监督训练中需要输入图像对应的类别信息，同样通过`fluid.data`来定义。训练中采用多类交叉熵作为损失函数，并作为网络的输出，预测阶段定义网络的输出为分类器得到的概率信息。
 
 **注意:** 训练程序应该返回一个数组，第一个返回参数必须是 `avg_cost`。训练器使用它来计算梯度。
 
@@ -376,7 +376,7 @@ def inference_program():
 def train_program():
     predict = inference_program()
 
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    label = fluid.data(name='label', shape=[None, 1], dtype='int64')
     cost = fluid.layers.cross_entropy(input=predict, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=predict, label=label)
diff --git a/03.image_classification/index.html b/03.image_classification/index.html
index c336ab7d61f759e494c385821b119e87a7806385..50ed16241ce0268ddb633eb883611845c51efcc7 100644
--- a/03.image_classification/index.html
+++ b/03.image_classification/index.html
@@ -347,13 +347,13 @@ def resnet_cifar10(ipt, depth=32):
 
 ## Inference Program Configuration
 
-The input to the network is defined as `fluid.layers.data` , corresponding to image pixels in the context of image classification. The images in CIFAR10 are 32x32 coloured images with three channels. Therefore, the size of the input data is 3072 (3x32x32).
+The input to the network is defined as `fluid.data` , corresponding to image pixels in the context of image classification. The images in CIFAR10 are 32x32 coloured images with three channels. Therefore, the size of the input data is 3072 (3x32x32).
 
 ```python
 def inference_program():
     # The image is 32 * 32 with RGB representation.
-    data_shape = [3, 32, 32]
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    data_shape = [None, 3, 32, 32]
+    images = fluid.data(name='pixel', shape=data_shape, dtype='float32')
 
     predict = resnet_cifar10(images, 32)
     # predict = vgg_bn_drop(images) # un-comment to use vgg net
@@ -364,7 +364,7 @@ def inference_program():
 Then we need to set up the the `train_program`. It takes the prediction from the inference_program first.
 During the training, it will calculate the `avg_loss` from the prediction.
 
-In the context of supervised learning, labels of training images are defined in `fluid.layers.data` as well. During training, the multi-class cross-entropy is used as the loss function and becomes the output of the network. During testing, the outputs are the probabilities calculated in the classifier.
+In the context of supervised learning, labels of training images are defined in `fluid.data` as well. During training, the multi-class cross-entropy is used as the loss function and becomes the output of the network. During testing, the outputs are the probabilities calculated in the classifier.
 
 **NOTE:** A training program should return an array and the first returned argument has to be `avg_cost` .
 The trainer always uses it to calculate the gradients.
@@ -373,7 +373,7 @@ The trainer always uses it to calculate the gradients.
 def train_program():
     predict = inference_program()
 
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    label = fluid.data(name='label', shape=[None, 1], dtype='int64')
     cost = fluid.layers.cross_entropy(input=predict, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=predict, label=label)
diff --git a/03.image_classification/train.py b/03.image_classification/train.py
index 47df1809b055e0b90c126816c49521e4166cc3f3..c4e2941cd636a6179d9ea582c58b3f4a8ba488e4 100644
--- a/03.image_classification/train.py
+++ b/03.image_classification/train.py
@@ -40,8 +40,8 @@ def parse_args():
 
 def inference_network():
     # The image is 32 * 32 with RGB representation.
-    data_shape = [3, 32, 32]
-    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
+    data_shape = [None, 3, 32, 32]
+    images = fluid.data(name='pixel', shape=data_shape, dtype='float32')
 
     predict = resnet_cifar10(images, 32)
     # predict = vgg_bn_drop(images) # un-comment to use vgg net
@@ -49,7 +49,7 @@ def inference_network():
 
 
 def train_network(predict):
-    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    label = fluid.data(name='label', shape=[None, 1], dtype='int64')
     cost = fluid.layers.cross_entropy(input=predict, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=predict, label=label)
diff --git a/05.recommender_system/README.cn.md b/05.recommender_system/README.cn.md
index ab4398a74fa2b1148f6bcedfd3fe58228c84143f..ffd07a1252e4ab58e81198d2dfbab78f9bd7c120 100644
--- a/05.recommender_system/README.cn.md
+++ b/05.recommender_system/README.cn.md
@@ -254,9 +254,9 @@ def get_usr_combined_features():
 
     USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
 
-    uid = layers.data(name='user_id', shape=[1], dtype='int64')
+    uid = fluid.data(name='user_id', shape=[-1], dtype='int64')
 
-    usr_emb = layers.embedding(
+    usr_emb = fluid.embedding(
         input=uid,
         dtype='float32',
         size=[USR_DICT_SIZE, 32],
@@ -267,9 +267,9 @@ def get_usr_combined_features():
 
     USR_GENDER_DICT_SIZE = 2
 
-    usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
+    usr_gender_id = fluid.data(name='gender_id', shape=[-1], dtype='int64')
 
-    usr_gender_emb = layers.embedding(
+    usr_gender_emb = fluid.embedding(
         input=usr_gender_id,
         size=[USR_GENDER_DICT_SIZE, 16],
         param_attr='gender_table',
@@ -278,9 +278,9 @@ def get_usr_combined_features():
     usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
 
     USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
-    usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
+    usr_age_id = fluid.data(name='age_id', shape=[-1], dtype="int64")
 
-    usr_age_emb = layers.embedding(
+    usr_age_emb = fluid.embedding(
         input=usr_age_id,
         size=[USR_AGE_DICT_SIZE, 16],
         is_sparse=IS_SPARSE,
@@ -289,9 +289,9 @@ def get_usr_combined_features():
     usr_age_fc = layers.fc(input=usr_age_emb, size=16)
 
     USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
-    usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
+    usr_job_id = fluid.data(name='job_id', shape=[-1], dtype="int64")
 
-    usr_job_emb = layers.embedding(
+    usr_job_emb = fluid.embedding(
         input=usr_job_id,
         size=[USR_JOB_DICT_SIZE, 16],
         param_attr='job_table',
@@ -320,9 +320,9 @@ def get_mov_combined_features():
 
     MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
 
-    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
+    mov_id = fluid.data(name='movie_id', shape=[-1], dtype='int64')
 
-    mov_emb = layers.embedding(
+    mov_emb = fluid.embedding(
         input=mov_id,
         dtype='float32',
         size=[MOV_DICT_SIZE, 32],
@@ -333,10 +333,10 @@ def get_mov_combined_features():
 
     CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
 
-    category_id = layers.data(
-        name='category_id', shape=[1], dtype='int64', lod_level=1)
+    category_id = fluid.data(
+        name='category_id', shape=[-1], dtype='int64', lod_level=1)
 
-    mov_categories_emb = layers.embedding(
+    mov_categories_emb = fluid.embedding(
         input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
 
     mov_categories_hidden = layers.sequence_pool(
@@ -344,10 +344,10 @@ def get_mov_combined_features():
 
     MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
 
-    mov_title_id = layers.data(
-        name='movie_title', shape=[1], dtype='int64', lod_level=1)
+    mov_title_id = fluid.data(
+        name='movie_title', shape=[-1], dtype='int64', lod_level=1)
 
-    mov_title_emb = layers.embedding(
+    mov_title_emb = fluid.embedding(
         input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
 
     mov_title_conv = nets.sequence_conv_pool(
@@ -390,7 +390,7 @@ def train_program():
 
     scale_infer = inference_program()
 
-    label = layers.data(name='score', shape=[1], dtype='float32')
+    label = fluid.data(name='score', shape=[-1, 1], dtype='float32')
     square_cost = layers.square_error_cost(input=scale_infer, label=label)
     avg_cost = layers.mean(square_cost)
 
@@ -416,12 +416,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 下一步是为训练和测试定义数据提供器。提供器读入一个大小为 `BATCH_SIZE`的数据。`paddle.dataset.movielens.train` 每次会在乱序化后提供一个大小为`BATCH_SIZE`的数据，乱序化的大小为缓存大小`buf_size`。
 
 ```python
-train_reader = paddle.batch(
-    paddle.reader.shuffle(
+train_reader = fluid.io.batch(
+    fluid.io.shuffle(
         paddle.dataset.movielens.train(), buf_size=8192),
     batch_size=BATCH_SIZE)
 
-test_reader = paddle.batch(
+test_reader = fluid.io.batch(
     paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
 ```
 
@@ -533,13 +533,13 @@ train_loop()
 ```python
 infer_movie_id = 783
 infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title
-user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)
-gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)
-age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place)
-job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place)
-movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place) # Hunchback of Notre Dame
-category_id = fluid.create_lod_tensor([np.array([10, 8, 9], dtype='int64')], [[3]], place) # Animation, Children's, Musical
-movie_title = fluid.create_lod_tensor([np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]],
+user_id = np.array([1]).astype("int64").reshape(-1)
+gender_id = np.array([1]).astype("int64").reshape(-1)
+age_id = np.array([0]).astype("int64").reshape(-1)
+job_id = np.array([10]).astype("int64").reshape(-1)
+movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame
+category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical
+movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]],
                                       place) # 'hunchback','of','notre','dame','the'
 ```
 
diff --git a/05.recommender_system/README.md b/05.recommender_system/README.md
index a25fa14cc9f361c8d1ce836ae874921444374604..abf032c6a513c1f7a95a687d38ca355a578c22e8 100644
--- a/05.recommender_system/README.md
+++ b/05.recommender_system/README.md
@@ -206,9 +206,8 @@ mov_id = train_sample[len(user_info[uid].value())]
 print("User %s rates Movie %s with Score %s"%(user_info[uid], movie_info[mov_id], train_sample[-1]))
 ```
 
-```python
-User <UserInfo id(1), gender(F), age(1), job(10)> rates Movie <MovieInfo id(1193), title(One Flew Over the Cuckoo's Nest ), categories(['Drama'])> with Score [5.0]
-```
+    User <UserInfo id(1), gender(F), age(1), job(10)> rates Movie <MovieInfo id(1193), title(One Flew Over the Cuckoo's Nest ), categories(['Drama'])> with Score [5.0]
+
 
 That is, the user 1 evaluates the movie 1193 as 5 points.
 
@@ -242,9 +241,9 @@ def get_usr_combined_features():
 
     USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
 
-    uid = layers.data(name='user_id', shape=[1], dtype='int64')
+    uid = fluid.data(name='user_id', shape=[-1], dtype='int64')
 
-    usr_emb = layers.embedding(
+    usr_emb = fluid.embedding(
         input=uid,
         dtype='float32',
         size=[USR_DICT_SIZE, 32],
@@ -255,9 +254,9 @@ def get_usr_combined_features():
 
     USR_GENDER_DICT_SIZE = 2
 
-    usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
+    usr_gender_id = fluid.data(name='gender_id', shape=[-1], dtype='int64')
 
-    usr_gender_emb = layers.embedding(
+    usr_gender_emb = fluid.embedding(
         input=usr_gender_id,
         size=[USR_GENDER_DICT_SIZE, 16],
         param_attr='gender_table',
@@ -266,9 +265,9 @@ def get_usr_combined_features():
     usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
 
     USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
-    usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
+    usr_age_id = fluid.data(name='age_id', shape=[-1], dtype="int64")
 
-    usr_age_emb = layers.embedding(
+    usr_age_emb = fluid.embedding(
         input=usr_age_id,
         size=[USR_AGE_DICT_SIZE, 16],
         is_sparse=IS_SPARSE,
@@ -277,9 +276,9 @@ def get_usr_combined_features():
     usr_age_fc = layers.fc(input=usr_age_emb, size=16)
 
     USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
-    usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
+    usr_job_id = fluid.data(name='job_id', shape=[-1], dtype="int64")
 
-    usr_job_emb = layers.embedding(
+    usr_job_emb = fluid.embedding(
         input=usr_job_id,
         size=[USR_JOB_DICT_SIZE, 16],
         param_attr='job_table',
@@ -308,9 +307,9 @@ def get_mov_combined_features():
 
     MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
 
-    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
+    mov_id = fluid.data(name='movie_id', shape=[-1], dtype='int64')
 
-    mov_emb = layers.embedding(
+    mov_emb = fluid.embedding(
         input=mov_id,
         dtype='float32',
         size=[MOV_DICT_SIZE, 32],
@@ -321,10 +320,10 @@ def get_mov_combined_features():
 
     CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
 
-    category_id = layers.data(
-        name='category_id', shape=[1], dtype='int64', lod_level=1)
+    category_id = fluid.data(
+        name='category_id', shape=[-1], dtype='int64', lod_level=1)
 
-    mov_categories_emb = layers.embedding(
+    mov_categories_emb = fluid.embedding(
         input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
 
     mov_categories_hidden = layers.sequence_pool(
@@ -332,10 +331,10 @@ def get_mov_combined_features():
 
     MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
 
-    mov_title_id = layers.data(
-        name='movie_title', shape=[1], dtype='int64', lod_level=1)
+    mov_title_id = fluid.data(
+        name='movie_title', shape=[-1], dtype='int64', lod_level=1)
 
-    mov_title_emb = layers.embedding(
+    mov_title_emb = fluid.embedding(
         input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
 
     mov_title_conv = nets.sequence_conv_pool(
@@ -379,7 +378,7 @@ def train_program():
 
     scale_infer = inference_program()
 
-    label = layers.data(name='score', shape=[1], dtype='float32')
+    label = fluid.data(name='score', shape=[-1, 1], dtype='float32')
     square_cost = layers.square_error_cost(input=scale_infer, label=label)
     avg_cost = layers.mean(square_cost)
 
@@ -405,12 +404,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 The next step is to define a data provider for training and testing. The provider reads in a data of size `BATCH_SIZE`. `paddle.dataset.movielens.train` will provide a data of size `BATCH_SIZE` after each scribbling, and the size of the out-of-order is the cache size `buf_size`.
 
 ```python
-train_reader = paddle.batch(
-    paddle.reader.shuffle(
+train_reader = fluid.io.batch(
+    fluid.io.shuffle(
         paddle.dataset.movielens.train(), buf_size=8192),
     batch_size=BATCH_SIZE)
 
-test_reader = paddle.batch(
+test_reader = fluid.io.batch(
     paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
 ```
 
@@ -522,13 +521,13 @@ In this prediction example, we try to predict the score given by user with ID1 f
 ```python
 infer_movie_id = 783
 infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title
-user_id = fluid.create_lod_tensor([[1]], [[1]], place)
-gender_id = fluid.create_lod_tensor([[1]], [[1]], place)
-age_id = fluid.create_lod_tensor([[0]], [[1]], place)
-job_id = fluid.create_lod_tensor([[10]], [[1]], place)
-movie_id = fluid.create_lod_tensor([[783]], [[1]], place) # Hunchback of Notre Dame
-category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) # Animation, Children's, Musical
-movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]],
+user_id = np.array([1]).astype("int64").reshape(-1)
+gender_id = np.array([1]).astype("int64").reshape(-1)
+age_id = np.array([0]).astype("int64").reshape(-1)
+job_id = np.array([10]).astype("int64").reshape(-1)
+movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame
+category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical
+movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]],
                                       place) # 'hunchback','of','notre','dame','the'
 ```
 
diff --git a/05.recommender_system/index.cn.html b/05.recommender_system/index.cn.html
index 7c00581afee7086eba3e5a691aff28404ac687c5..17191a58795ea4c0879958fd0ca5ce639877f678 100644
--- a/05.recommender_system/index.cn.html
+++ b/05.recommender_system/index.cn.html
@@ -296,9 +296,9 @@ def get_usr_combined_features():
 
     USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
 
-    uid = layers.data(name='user_id', shape=[1], dtype='int64')
+    uid = fluid.data(name='user_id', shape=[-1], dtype='int64')
 
-    usr_emb = layers.embedding(
+    usr_emb = fluid.embedding(
         input=uid,
         dtype='float32',
         size=[USR_DICT_SIZE, 32],
@@ -309,9 +309,9 @@ def get_usr_combined_features():
 
     USR_GENDER_DICT_SIZE = 2
 
-    usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
+    usr_gender_id = fluid.data(name='gender_id', shape=[-1], dtype='int64')
 
-    usr_gender_emb = layers.embedding(
+    usr_gender_emb = fluid.embedding(
         input=usr_gender_id,
         size=[USR_GENDER_DICT_SIZE, 16],
         param_attr='gender_table',
@@ -320,9 +320,9 @@ def get_usr_combined_features():
     usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
 
     USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
-    usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
+    usr_age_id = fluid.data(name='age_id', shape=[-1], dtype="int64")
 
-    usr_age_emb = layers.embedding(
+    usr_age_emb = fluid.embedding(
         input=usr_age_id,
         size=[USR_AGE_DICT_SIZE, 16],
         is_sparse=IS_SPARSE,
@@ -331,9 +331,9 @@ def get_usr_combined_features():
     usr_age_fc = layers.fc(input=usr_age_emb, size=16)
 
     USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
-    usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
+    usr_job_id = fluid.data(name='job_id', shape=[-1], dtype="int64")
 
-    usr_job_emb = layers.embedding(
+    usr_job_emb = fluid.embedding(
         input=usr_job_id,
         size=[USR_JOB_DICT_SIZE, 16],
         param_attr='job_table',
@@ -362,9 +362,9 @@ def get_mov_combined_features():
 
     MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
 
-    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
+    mov_id = fluid.data(name='movie_id', shape=[-1], dtype='int64')
 
-    mov_emb = layers.embedding(
+    mov_emb = fluid.embedding(
         input=mov_id,
         dtype='float32',
         size=[MOV_DICT_SIZE, 32],
@@ -375,10 +375,10 @@ def get_mov_combined_features():
 
     CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
 
-    category_id = layers.data(
-        name='category_id', shape=[1], dtype='int64', lod_level=1)
+    category_id = fluid.data(
+        name='category_id', shape=[-1], dtype='int64', lod_level=1)
 
-    mov_categories_emb = layers.embedding(
+    mov_categories_emb = fluid.embedding(
         input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
 
     mov_categories_hidden = layers.sequence_pool(
@@ -386,10 +386,10 @@ def get_mov_combined_features():
 
     MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
 
-    mov_title_id = layers.data(
-        name='movie_title', shape=[1], dtype='int64', lod_level=1)
+    mov_title_id = fluid.data(
+        name='movie_title', shape=[-1], dtype='int64', lod_level=1)
 
-    mov_title_emb = layers.embedding(
+    mov_title_emb = fluid.embedding(
         input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
 
     mov_title_conv = nets.sequence_conv_pool(
@@ -432,7 +432,7 @@ def train_program():
 
     scale_infer = inference_program()
 
-    label = layers.data(name='score', shape=[1], dtype='float32')
+    label = fluid.data(name='score', shape=[-1, 1], dtype='float32')
     square_cost = layers.square_error_cost(input=scale_infer, label=label)
     avg_cost = layers.mean(square_cost)
 
@@ -458,12 +458,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 下一步是为训练和测试定义数据提供器。提供器读入一个大小为 `BATCH_SIZE`的数据。`paddle.dataset.movielens.train` 每次会在乱序化后提供一个大小为`BATCH_SIZE`的数据，乱序化的大小为缓存大小`buf_size`。
 
 ```python
-train_reader = paddle.batch(
-    paddle.reader.shuffle(
+train_reader = fluid.io.batch(
+    fluid.io.shuffle(
         paddle.dataset.movielens.train(), buf_size=8192),
     batch_size=BATCH_SIZE)
 
-test_reader = paddle.batch(
+test_reader = fluid.io.batch(
     paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
 ```
 
@@ -575,13 +575,13 @@ train_loop()
 ```python
 infer_movie_id = 783
 infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title
-user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)
-gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)
-age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place)
-job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place)
-movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place) # Hunchback of Notre Dame
-category_id = fluid.create_lod_tensor([np.array([10, 8, 9], dtype='int64')], [[3]], place) # Animation, Children's, Musical
-movie_title = fluid.create_lod_tensor([np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]],
+user_id = np.array([1]).astype("int64").reshape(-1)
+gender_id = np.array([1]).astype("int64").reshape(-1)
+age_id = np.array([0]).astype("int64").reshape(-1)
+job_id = np.array([10]).astype("int64").reshape(-1)
+movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame
+category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical
+movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]],
                                       place) # 'hunchback','of','notre','dame','the'
 ```
 
diff --git a/05.recommender_system/index.html b/05.recommender_system/index.html
index bb11402875ef3245944ffa58b557092fda097dbd..808d891cf58a077e35bd42a2739c708d540257a3 100644
--- a/05.recommender_system/index.html
+++ b/05.recommender_system/index.html
@@ -248,9 +248,8 @@ mov_id = train_sample[len(user_info[uid].value())]
 print("User %s rates Movie %s with Score %s"%(user_info[uid], movie_info[mov_id], train_sample[-1]))
 ```
 
-```python
-User <UserInfo id(1), gender(F), age(1), job(10)> rates Movie <MovieInfo id(1193), title(One Flew Over the Cuckoo's Nest ), categories(['Drama'])> with Score [5.0]
-```
+    User <UserInfo id(1), gender(F), age(1), job(10)> rates Movie <MovieInfo id(1193), title(One Flew Over the Cuckoo's Nest ), categories(['Drama'])> with Score [5.0]
+
 
 That is, the user 1 evaluates the movie 1193 as 5 points.
 
@@ -284,9 +283,9 @@ def get_usr_combined_features():
 
     USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
 
-    uid = layers.data(name='user_id', shape=[1], dtype='int64')
+    uid = fluid.data(name='user_id', shape=[-1], dtype='int64')
 
-    usr_emb = layers.embedding(
+    usr_emb = fluid.embedding(
         input=uid,
         dtype='float32',
         size=[USR_DICT_SIZE, 32],
@@ -297,9 +296,9 @@ def get_usr_combined_features():
 
     USR_GENDER_DICT_SIZE = 2
 
-    usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
+    usr_gender_id = fluid.data(name='gender_id', shape=[-1], dtype='int64')
 
-    usr_gender_emb = layers.embedding(
+    usr_gender_emb = fluid.embedding(
         input=usr_gender_id,
         size=[USR_GENDER_DICT_SIZE, 16],
         param_attr='gender_table',
@@ -308,9 +307,9 @@ def get_usr_combined_features():
     usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
 
     USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
-    usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
+    usr_age_id = fluid.data(name='age_id', shape=[-1], dtype="int64")
 
-    usr_age_emb = layers.embedding(
+    usr_age_emb = fluid.embedding(
         input=usr_age_id,
         size=[USR_AGE_DICT_SIZE, 16],
         is_sparse=IS_SPARSE,
@@ -319,9 +318,9 @@ def get_usr_combined_features():
     usr_age_fc = layers.fc(input=usr_age_emb, size=16)
 
     USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
-    usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
+    usr_job_id = fluid.data(name='job_id', shape=[-1], dtype="int64")
 
-    usr_job_emb = layers.embedding(
+    usr_job_emb = fluid.embedding(
         input=usr_job_id,
         size=[USR_JOB_DICT_SIZE, 16],
         param_attr='job_table',
@@ -350,9 +349,9 @@ def get_mov_combined_features():
 
     MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
 
-    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
+    mov_id = fluid.data(name='movie_id', shape=[-1], dtype='int64')
 
-    mov_emb = layers.embedding(
+    mov_emb = fluid.embedding(
         input=mov_id,
         dtype='float32',
         size=[MOV_DICT_SIZE, 32],
@@ -363,10 +362,10 @@ def get_mov_combined_features():
 
     CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
 
-    category_id = layers.data(
-        name='category_id', shape=[1], dtype='int64', lod_level=1)
+    category_id = fluid.data(
+        name='category_id', shape=[-1], dtype='int64', lod_level=1)
 
-    mov_categories_emb = layers.embedding(
+    mov_categories_emb = fluid.embedding(
         input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
 
     mov_categories_hidden = layers.sequence_pool(
@@ -374,10 +373,10 @@ def get_mov_combined_features():
 
     MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
 
-    mov_title_id = layers.data(
-        name='movie_title', shape=[1], dtype='int64', lod_level=1)
+    mov_title_id = fluid.data(
+        name='movie_title', shape=[-1], dtype='int64', lod_level=1)
 
-    mov_title_emb = layers.embedding(
+    mov_title_emb = fluid.embedding(
         input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
 
     mov_title_conv = nets.sequence_conv_pool(
@@ -421,7 +420,7 @@ def train_program():
 
     scale_infer = inference_program()
 
-    label = layers.data(name='score', shape=[1], dtype='float32')
+    label = fluid.data(name='score', shape=[-1, 1], dtype='float32')
     square_cost = layers.square_error_cost(input=scale_infer, label=label)
     avg_cost = layers.mean(square_cost)
 
@@ -447,12 +446,12 @@ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 The next step is to define a data provider for training and testing. The provider reads in a data of size `BATCH_SIZE`. `paddle.dataset.movielens.train` will provide a data of size `BATCH_SIZE` after each scribbling, and the size of the out-of-order is the cache size `buf_size`.
 
 ```python
-train_reader = paddle.batch(
-    paddle.reader.shuffle(
+train_reader = fluid.io.batch(
+    fluid.io.shuffle(
         paddle.dataset.movielens.train(), buf_size=8192),
     batch_size=BATCH_SIZE)
 
-test_reader = paddle.batch(
+test_reader = fluid.io.batch(
     paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
 ```
 
@@ -564,13 +563,13 @@ In this prediction example, we try to predict the score given by user with ID1 f
 ```python
 infer_movie_id = 783
 infer_movie_name = paddle.dataset.movielens.movie_info()[infer_movie_id].title
-user_id = fluid.create_lod_tensor([[1]], [[1]], place)
-gender_id = fluid.create_lod_tensor([[1]], [[1]], place)
-age_id = fluid.create_lod_tensor([[0]], [[1]], place)
-job_id = fluid.create_lod_tensor([[10]], [[1]], place)
-movie_id = fluid.create_lod_tensor([[783]], [[1]], place) # Hunchback of Notre Dame
-category_id = fluid.create_lod_tensor([[10, 8, 9]], [[3]], place) # Animation, Children's, Musical
-movie_title = fluid.create_lod_tensor([[1069, 4140, 2923, 710, 988]], [[5]],
+user_id = np.array([1]).astype("int64").reshape(-1)
+gender_id = np.array([1]).astype("int64").reshape(-1)
+age_id = np.array([0]).astype("int64").reshape(-1)
+job_id = np.array([10]).astype("int64").reshape(-1)
+movie_id = np.array([783]).astype("int64").reshape(-1) # Hunchback of Notre Dame
+category_id = fluid.create_lod_tensor(np.array([10, 8, 9], dtype='int64'), [[3]], place) # Animation, Children's, Musical
+movie_title = fluid.create_lod_tensor(np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]],
                                       place) # 'hunchback','of','notre','dame','the'
 ```
 
diff --git a/05.recommender_system/train.py b/05.recommender_system/train.py
index 70e71608f76543eba2531d284fe25ecb22a427a5..5cf64acf0908c456420ef2d200af231a5de7f6ce 100644
--- a/05.recommender_system/train.py
+++ b/05.recommender_system/train.py
@@ -44,9 +44,9 @@ def get_usr_combined_features():
 
     USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
 
-    uid = layers.data(name='user_id', shape=[1], dtype='int64')
+    uid = fluid.data(name='user_id', shape=[-1], dtype='int64')
 
-    usr_emb = layers.embedding(
+    usr_emb = fluid.embedding(
         input=uid,
         dtype='float32',
         size=[USR_DICT_SIZE, 32],
@@ -57,9 +57,9 @@ def get_usr_combined_features():
 
     USR_GENDER_DICT_SIZE = 2
 
-    usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
+    usr_gender_id = fluid.data(name='gender_id', shape=[-1], dtype='int64')
 
-    usr_gender_emb = layers.embedding(
+    usr_gender_emb = fluid.embedding(
         input=usr_gender_id,
         size=[USR_GENDER_DICT_SIZE, 16],
         param_attr='gender_table',
@@ -68,9 +68,9 @@ def get_usr_combined_features():
     usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
 
     USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
-    usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
+    usr_age_id = fluid.data(name='age_id', shape=[-1], dtype="int64")
 
-    usr_age_emb = layers.embedding(
+    usr_age_emb = fluid.embedding(
         input=usr_age_id,
         size=[USR_AGE_DICT_SIZE, 16],
         is_sparse=IS_SPARSE,
@@ -79,9 +79,9 @@ def get_usr_combined_features():
     usr_age_fc = layers.fc(input=usr_age_emb, size=16)
 
     USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
-    usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
+    usr_job_id = fluid.data(name='job_id', shape=[-1], dtype="int64")
 
-    usr_job_emb = layers.embedding(
+    usr_job_emb = fluid.embedding(
         input=usr_job_id,
         size=[USR_JOB_DICT_SIZE, 16],
         param_attr='job_table',
@@ -101,9 +101,9 @@ def get_mov_combined_features():
 
     MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
 
-    mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
+    mov_id = fluid.data(name='movie_id', shape=[-1], dtype='int64')
 
-    mov_emb = layers.embedding(
+    mov_emb = fluid.embedding(
         input=mov_id,
         dtype='float32',
         size=[MOV_DICT_SIZE, 32],
@@ -114,10 +114,10 @@ def get_mov_combined_features():
 
     CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
 
-    category_id = layers.data(
-        name='category_id', shape=[1], dtype='int64', lod_level=1)
+    category_id = fluid.data(
+        name='category_id', shape=[-1], dtype='int64', lod_level=1)
 
-    mov_categories_emb = layers.embedding(
+    mov_categories_emb = fluid.embedding(
         input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
 
     mov_categories_hidden = layers.sequence_pool(
@@ -125,10 +125,10 @@ def get_mov_combined_features():
 
     MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
 
-    mov_title_id = layers.data(
-        name='movie_title', shape=[1], dtype='int64', lod_level=1)
+    mov_title_id = fluid.data(
+        name='movie_title', shape=[-1], dtype='int64', lod_level=1)
 
-    mov_title_emb = layers.embedding(
+    mov_title_emb = fluid.embedding(
         input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
 
     mov_title_conv = nets.sequence_conv_pool(
@@ -153,7 +153,7 @@ def inference_program():
     inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features)
     scale_infer = layers.scale(x=inference, scale=5.0)
 
-    label = layers.data(name='score', shape=[1], dtype='float32')
+    label = fluid.data(name='score', shape=[-1, 1], dtype='float32')
     square_cost = layers.square_error_cost(input=scale_infer, label=label)
     avg_cost = layers.mean(square_cost)
 
@@ -168,16 +168,15 @@ def train(use_cuda, params_dirname):
     place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 
     if args.enable_ce:
-        train_reader = paddle.batch(
+        train_reader = fluid.io.batch(
             paddle.dataset.movielens.train(), batch_size=BATCH_SIZE)
-        test_reader = paddle.batch(
+        test_reader = fluid.io.batch(
             paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
     else:
-        train_reader = paddle.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.movielens.train(), buf_size=8192),
+        train_reader = fluid.io.batch(
+            fluid.io.shuffle(paddle.dataset.movielens.train(), buf_size=8192),
             batch_size=BATCH_SIZE)
-        test_reader = paddle.batch(
+        test_reader = fluid.io.batch(
             paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
 
     feed_order = [
@@ -293,28 +292,27 @@ def infer(use_cuda, params_dirname):
         # Correspondingly, recursive_sequence_lengths = [[3, 2]] contains one
         # level of detail info, indicating that `data` consists of two sequences
         # of length 3 and 2, respectively.
-        user_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)
+        user_id = np.array([1]).astype("int64").reshape(-1)
 
         assert feed_target_names[1] == "gender_id"
-        gender_id = fluid.create_lod_tensor([[np.int64(1)]], [[1]], place)
+        gender_id = np.array([1]).astype("int64").reshape(-1)
 
         assert feed_target_names[2] == "age_id"
-        age_id = fluid.create_lod_tensor([[np.int64(0)]], [[1]], place)
+        age_id = np.array([0]).astype("int64").reshape(-1)
 
         assert feed_target_names[3] == "job_id"
-        job_id = fluid.create_lod_tensor([[np.int64(10)]], [[1]], place)
+        job_id = np.array([10]).astype("int64").reshape(-1)
 
         assert feed_target_names[4] == "movie_id"
-        movie_id = fluid.create_lod_tensor([[np.int64(783)]], [[1]], place)
+        movie_id = np.array([783]).astype("int64").reshape(-1)
 
         assert feed_target_names[5] == "category_id"
         category_id = fluid.create_lod_tensor(
-            [np.array([10, 8, 9], dtype='int64')], [[3]], place)
+            np.array([10, 8, 9], dtype='int64'), [[3]], place)
 
         assert feed_target_names[6] == "movie_title"
         movie_title = fluid.create_lod_tensor(
-            [np.array([1069, 4140, 2923, 710, 988], dtype='int64')], [[5]],
-            place)
+            np.array([1069, 4140, 2923, 710, 988], dtype='int64'), [[5]], place)
 
         # Construct feed as a dictionary of {feed_target_name: feed_target_data}
         # and results will contain a list of data corresponding to fetch_targets.
diff --git a/06.understand_sentiment/README.cn.md b/06.understand_sentiment/README.cn.md
index 58755fb86f444be5ae28f70461812d9aca42a9d2..84b44d53ceae93d054c0391fa5a48d7bb2ca049c 100755
--- a/06.understand_sentiment/README.cn.md
+++ b/06.understand_sentiment/README.cn.md
@@ -151,7 +151,7 @@ BATCH_SIZE = 128  #batch的大小
 ```python
 #文本卷积神经网络
 def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
-    emb = fluid.layers.embedding(
+    emb = fluid.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
     conv_3 = fluid.nets.sequence_conv_pool(
         input=emb,
@@ -183,7 +183,7 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
 def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
 
     #计算词向量
-    emb = fluid.layers.embedding(
+    emb = fluid.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
 
     #第一层栈
@@ -218,8 +218,8 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
 
 ```python
 def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
+    data = fluid.data(
+        name="words", shape=[-1], dtype="int64", lod_level=1)
 
     dict_dim = len(word_dict)
     net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
@@ -235,7 +235,7 @@ def inference_program(word_dict):
 
 ```python
 def train_program(prediction):
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = fluid.data(name="label", shape=[-1,1], dtype="int64")
     cost = fluid.layers.cross_entropy(input=prediction, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=prediction, label=label)
@@ -269,12 +269,12 @@ print("Loading IMDB word dict....")
 word_dict = paddle.dataset.imdb.word_dict()
 
 print ("Reading training data....")
-train_reader = paddle.batch(
+train_reader = fluid.io.batch(
     paddle.reader.shuffle(
         paddle.dataset.imdb.train(word_dict), buf_size=25000),
     batch_size=BATCH_SIZE)
 print("Reading testing data....")
-test_reader = paddle.batch(
+test_reader = fluid.io.batch(
     paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
 ```
 word_dict是一个字典序列，是词和label的对应关系，运行下一行可以看到具体内容：
@@ -401,11 +401,15 @@ reviews = [c.split() for c in reviews_str]
 
 UNK = word_dict['<unk>']
 lod = []
-for c in reviews:
-    lod.append([word_dict.get(words, UNK) for words in c])
+base_shape = []
 
-base_shape = [[len(c) for c in lod]]
+for c in reviews:
+    re = np.array([np.int64(word_dict.get(words, UNK)) for words in c])
+    lod = np.concatenate([lod,re],axis = 0)
+    base_shape.insert(-1, re.shape[0])
 
+base_shape = [base_shape]
+lod = np.array(lod).astype('int64')
 tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
 ```
 
diff --git a/06.understand_sentiment/README.md b/06.understand_sentiment/README.md
index 3d8eae5e01ab562bf4606dd5192595f75564a3b5..c7db4f6e8a1fa030fc366c85b5913e0674e8dff3 100644
--- a/06.understand_sentiment/README.md
+++ b/06.understand_sentiment/README.md
@@ -140,7 +140,7 @@ Note that `fluid.nets.sequence_conv_pool` contains both convolution and pooling
 ```python
 #Textconvolution neural network
 def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
-    emb = fluid.layers.embedding(
+    emb = fluid.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
     conv_3 = fluid.nets.sequence_conv_pool(
         input=emb,
@@ -172,7 +172,7 @@ The code of the stack bidirectional LSTM `stacked_lstm_net` is as follows:
 def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
 
     # Calculate word vectorvector
-    emb = fluid.layers.embedding(
+    emb = fluid.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
 
     #First stack
@@ -191,7 +191,7 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
         inputs = [fc, lstm]
 
     #pooling layer
-    pc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
+    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
     lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
 
     #Fully connected layer, softmax prediction
@@ -207,8 +207,8 @@ Next we define the prediction program (`inference_program`). We use `convolution
 
 ```python
 def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
+    data = fluid.data(
+        name="words", shape=[-1], dtype="int64", lod_level=1)
 
     dict_dim = len(word_dict)
     net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
@@ -224,7 +224,7 @@ During the testing, the classifier calculates the probability of each output. Th
 
 ```python
 def train_program(prediction):
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = fluid.data(name="label", shape=[-1, 1], dtype="int64")
     cost = fluid.layers.cross_entropy(input=prediction, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=prediction, label=label)
@@ -258,12 +258,12 @@ print("Loading IMDB word dict....")
 word_dict = paddle.dataset.imdb.word_dict()
 
 print ("Reading training data....")
-train_reader = paddle.batch(
+train_reader = fluid.io.batch(
     paddle.reader.shuffle(
         paddle.dataset.imdb.train(word_dict), buf_size=25000),
     batch_size=BATCH_SIZE)
 print("Reading testing data....")
-test_reader = paddle.batch(
+test_reader = fluid.io.batch(
     paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
 ```
 Word_dict is a dictionary sequence, which is the correspondence between words and labels. You can see it specifically by running the next code:
@@ -390,11 +390,15 @@ reviews = [c.split() for c in reviews_str]
 
 UNK = word_dict['<unk>']
 lod = []
-for c in reviews:
-    lod.append([word_dict.get(words, UNK) for words in c])
+base_shape = []
 
-base_shape = [[len(c) for c in lod]]
+for c in reviews:
+    re = np.array([np.int64(word_dict.get(words, UNK)) for words in c])
+    lod = np.concatenate([lod,re],axis = 0)
+    base_shape.insert(-1, re.shape[0])
 
+base_shape = [base_shape]
+lod = np.array(lod).astype('int64')
 tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
 ```
 
diff --git a/06.understand_sentiment/image/formula_lstm_more.png b/06.understand_sentiment/image/formula_lstm_more.png
index a6401dd2464c234453160bee9e0d0f8ea2f2d702..a0a98fe53addd075bd7249c6da1da5d5b9e6889b 100644
Binary files a/06.understand_sentiment/image/formula_lstm_more.png and b/06.understand_sentiment/image/formula_lstm_more.png differ
diff --git a/06.understand_sentiment/index.cn.html b/06.understand_sentiment/index.cn.html
index d73442f3185569eb06c0178d33f1145048faab27..3c8645c916dd698d315d06df92a8e69534987c29 100644
--- a/06.understand_sentiment/index.cn.html
+++ b/06.understand_sentiment/index.cn.html
@@ -193,7 +193,7 @@ BATCH_SIZE = 128  #batch的大小
 ```python
 #文本卷积神经网络
 def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
-    emb = fluid.layers.embedding(
+    emb = fluid.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
     conv_3 = fluid.nets.sequence_conv_pool(
         input=emb,
@@ -225,7 +225,7 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
 def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
 
     #计算词向量
-    emb = fluid.layers.embedding(
+    emb = fluid.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
 
     #第一层栈
@@ -260,8 +260,8 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
 
 ```python
 def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
+    data = fluid.data(
+        name="words", shape=[-1], dtype="int64", lod_level=1)
 
     dict_dim = len(word_dict)
     net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
@@ -277,7 +277,7 @@ def inference_program(word_dict):
 
 ```python
 def train_program(prediction):
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = fluid.data(name="label", shape=[-1,1], dtype="int64")
     cost = fluid.layers.cross_entropy(input=prediction, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=prediction, label=label)
@@ -311,12 +311,12 @@ print("Loading IMDB word dict....")
 word_dict = paddle.dataset.imdb.word_dict()
 
 print ("Reading training data....")
-train_reader = paddle.batch(
+train_reader = fluid.io.batch(
     paddle.reader.shuffle(
         paddle.dataset.imdb.train(word_dict), buf_size=25000),
     batch_size=BATCH_SIZE)
 print("Reading testing data....")
-test_reader = paddle.batch(
+test_reader = fluid.io.batch(
     paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
 ```
 word_dict是一个字典序列，是词和label的对应关系，运行下一行可以看到具体内容：
@@ -443,11 +443,15 @@ reviews = [c.split() for c in reviews_str]
 
 UNK = word_dict['<unk>']
 lod = []
-for c in reviews:
-    lod.append([word_dict.get(words, UNK) for words in c])
+base_shape = []
 
-base_shape = [[len(c) for c in lod]]
+for c in reviews:
+    re = np.array([np.int64(word_dict.get(words, UNK)) for words in c])
+    lod = np.concatenate([lod,re],axis = 0)
+    base_shape.insert(-1, re.shape[0])
 
+base_shape = [base_shape]
+lod = np.array(lod).astype('int64')
 tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
 ```
 
diff --git a/06.understand_sentiment/index.html b/06.understand_sentiment/index.html
index e8d31257ac9021a5f3c1ea2ac939467a4e13b3a4..eef42a88a32c8ea2e8bd1330550404b944f8210a 100644
--- a/06.understand_sentiment/index.html
+++ b/06.understand_sentiment/index.html
@@ -182,7 +182,7 @@ Note that `fluid.nets.sequence_conv_pool` contains both convolution and pooling
 ```python
 #Textconvolution neural network
 def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
-    emb = fluid.layers.embedding(
+    emb = fluid.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
     conv_3 = fluid.nets.sequence_conv_pool(
         input=emb,
@@ -214,7 +214,7 @@ The code of the stack bidirectional LSTM `stacked_lstm_net` is as follows:
 def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
 
     # Calculate word vectorvector
-    emb = fluid.layers.embedding(
+    emb = fluid.embedding(
         input=data, size=[input_dim, emb_dim], is_sparse=True)
 
     #First stack
@@ -233,7 +233,7 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
         inputs = [fc, lstm]
 
     #pooling layer
-    pc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
+    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
     lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
 
     #Fully connected layer, softmax prediction
@@ -249,8 +249,8 @@ Next we define the prediction program (`inference_program`). We use `convolution
 
 ```python
 def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
+    data = fluid.data(
+        name="words", shape=[-1], dtype="int64", lod_level=1)
 
     dict_dim = len(word_dict)
     net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
@@ -266,7 +266,7 @@ During the testing, the classifier calculates the probability of each output. Th
 
 ```python
 def train_program(prediction):
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = fluid.data(name="label", shape=[-1, 1], dtype="int64")
     cost = fluid.layers.cross_entropy(input=prediction, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=prediction, label=label)
@@ -300,12 +300,12 @@ print("Loading IMDB word dict....")
 word_dict = paddle.dataset.imdb.word_dict()
 
 print ("Reading training data....")
-train_reader = paddle.batch(
+train_reader = fluid.io.batch(
     paddle.reader.shuffle(
         paddle.dataset.imdb.train(word_dict), buf_size=25000),
     batch_size=BATCH_SIZE)
 print("Reading testing data....")
-test_reader = paddle.batch(
+test_reader = fluid.io.batch(
     paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
 ```
 Word_dict is a dictionary sequence, which is the correspondence between words and labels. You can see it specifically by running the next code:
@@ -432,11 +432,15 @@ reviews = [c.split() for c in reviews_str]
 
 UNK = word_dict['<unk>']
 lod = []
-for c in reviews:
-    lod.append([word_dict.get(words, UNK) for words in c])
+base_shape = []
 
-base_shape = [[len(c) for c in lod]]
+for c in reviews:
+    re = np.array([np.int64(word_dict.get(words, UNK)) for words in c])
+    lod = np.concatenate([lod,re],axis = 0)
+    base_shape.insert(-1, re.shape[0])
 
+base_shape = [base_shape]
+lod = np.array(lod).astype('int64')
 tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
 ```
 
diff --git a/06.understand_sentiment/train_conv.py b/06.understand_sentiment/train_conv.py
index bdc43a2d1d012aa436278fc59ddabe193ec1f13b..d378ef3da593521aa9e188947ee7322a3e3042c1 100644
--- a/06.understand_sentiment/train_conv.py
+++ b/06.understand_sentiment/train_conv.py
@@ -42,8 +42,7 @@ def parse_args():
 
 
 def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True)
+    emb = fluid.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True)
     conv_3 = fluid.nets.sequence_conv_pool(
         input=emb,
         num_filters=hid_dim,
@@ -62,16 +61,15 @@ def convolution_net(data, input_dim, class_dim, emb_dim, hid_dim):
 
 
 def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
-
     dict_dim = len(word_dict)
+    data = fluid.data(name="words", shape=[-1], dtype="int64", lod_level=1)
+
     net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
     return net
 
 
 def train_program(prediction):
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = fluid.data(name="label", shape=[-1, 1], dtype="int64")
     cost = fluid.layers.cross_entropy(input=prediction, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=prediction, label=label)
@@ -90,16 +88,16 @@ def train(use_cuda, params_dirname):
 
     print("Reading training data....")
     if args.enable_ce:
-        train_reader = paddle.batch(
+        train_reader = fluid.io.batch(
             paddle.dataset.imdb.train(word_dict), batch_size=BATCH_SIZE)
     else:
-        train_reader = paddle.batch(
-            paddle.reader.shuffle(
+        train_reader = fluid.io.batch(
+            fluid.io.shuffle(
                 paddle.dataset.imdb.train(word_dict), buf_size=25000),
             batch_size=BATCH_SIZE)
 
     print("Reading testing data....")
-    test_reader = paddle.batch(
+    test_reader = fluid.io.batch(
         paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
 
     feed_order = ['words', 'label']
@@ -213,11 +211,15 @@ def infer(use_cuda, params_dirname=None):
 
         UNK = word_dict['<unk>']
         lod = []
-        for c in reviews:
-            lod.append([np.int64(word_dict.get(words, UNK)) for words in c])
+        base_shape = []
 
-        base_shape = [[len(c) for c in lod]]
+        for c in reviews:
+            re = np.array([np.int64(word_dict.get(words, UNK)) for words in c])
+            lod = np.concatenate([lod, re], axis=0)
+            base_shape.insert(-1, re.shape[0])
 
+        base_shape = [base_shape]
+        lod = np.array(lod).astype('int64')
         tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
         assert feed_target_names[0] == "words"
         results = exe.run(
diff --git a/06.understand_sentiment/train_dyn_rnn.py b/06.understand_sentiment/train_dyn_rnn.py
index 54f1b53197b128539b59bd7636223d8294f581f3..0aec7bbba1a5ec0ad8d534295c58d966b2211c29 100644
--- a/06.understand_sentiment/train_dyn_rnn.py
+++ b/06.understand_sentiment/train_dyn_rnn.py
@@ -42,44 +42,18 @@ def parse_args():
 
 
 def dynamic_rnn_lstm(data, input_dim, class_dim, emb_dim, lstm_size):
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True)
-    sentence = fluid.layers.fc(input=emb, size=lstm_size, act='tanh')
-
-    rnn = fluid.layers.DynamicRNN()
-    with rnn.block():
-        word = rnn.step_input(sentence)
-        prev_hidden = rnn.memory(value=0.0, shape=[lstm_size])
-        prev_cell = rnn.memory(value=0.0, shape=[lstm_size])
-
-        def gate_common(ipt, hidden, size):
-            gate0 = fluid.layers.fc(input=ipt, size=size, bias_attr=True)
-            gate1 = fluid.layers.fc(input=hidden, size=size, bias_attr=False)
-            return gate0 + gate1
-
-        forget_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                         lstm_size))
-        input_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                        lstm_size))
-        output_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                         lstm_size))
-        cell_gate = fluid.layers.sigmoid(x=gate_common(word, prev_hidden,
-                                                       lstm_size))
-
-        cell = forget_gate * prev_cell + input_gate * cell_gate
-        hidden = output_gate * fluid.layers.tanh(x=cell)
-        rnn.update_memory(prev_cell, cell)
-        rnn.update_memory(prev_hidden, hidden)
-        rnn.output(hidden)
-
-    last = fluid.layers.sequence_last_step(rnn())
+    emb = fluid.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True)
+    sentence = fluid.layers.fc(input=emb, size=lstm_size * 4, act='tanh')
+
+    lstm, _ = fluid.layers.dynamic_lstm(sentence, size=lstm_size * 4)
+
+    last = fluid.layers.sequence_last_step(lstm)
     prediction = fluid.layers.fc(input=last, size=class_dim, act="softmax")
     return prediction
 
 
 def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
+    data = fluid.data(name="words", shape=[-1], dtype="int64", lod_level=1)
 
     dict_dim = len(word_dict)
     pred = dynamic_rnn_lstm(data, dict_dim, CLASS_DIM, EMB_DIM, LSTM_SIZE)
@@ -87,7 +61,7 @@ def inference_program(word_dict):
 
 
 def train_program(prediction):
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = fluid.data(name="label", shape=[-1, 1], dtype="int64")
     cost = fluid.layers.cross_entropy(input=prediction, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=prediction, label=label)
@@ -105,16 +79,16 @@ def train(use_cuda, params_dirname):
 
     print("Reading training data....")
     if args.enable_ce:
-        train_reader = paddle.batch(
+        train_reader = fluid.io.batch(
             paddle.dataset.imdb.train(word_dict), batch_size=BATCH_SIZE)
     else:
-        train_reader = paddle.batch(
+        train_reader = fluid.io.batch(
             paddle.reader.shuffle(
                 paddle.dataset.imdb.train(word_dict), buf_size=25000),
             batch_size=BATCH_SIZE)
 
     print("Reading testing data....")
-    test_reader = paddle.batch(
+    test_reader = fluid.io.batch(
         paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
 
     feed_order = ['words', 'label']
@@ -226,11 +200,15 @@ def infer(use_cuda, params_dirname=None):
 
         UNK = word_dict['<unk>']
         lod = []
-        for c in reviews:
-            lod.append([np.int64(word_dict.get(words, UNK)) for words in c])
+        base_shape = []
 
-        base_shape = [[len(c) for c in lod]]
+        for c in reviews:
+            re = np.array([np.int64(word_dict.get(words, UNK)) for words in c])
+            lod = np.concatenate([lod, re], axis=0)
+            base_shape.insert(-1, re.shape[0])
 
+        base_shape = [base_shape]
+        lod = np.array(lod).astype('int64')
         tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
         assert feed_target_names[0] == "words"
         results = exe.run(
diff --git a/06.understand_sentiment/train_stacked_lstm.py b/06.understand_sentiment/train_stacked_lstm.py
index acf38b209da82bdc8bac32ccd58f5155e0bdcfcc..1f8eb7c3730839706960b10516f87e06cf733f1f 100644
--- a/06.understand_sentiment/train_stacked_lstm.py
+++ b/06.understand_sentiment/train_stacked_lstm.py
@@ -46,8 +46,7 @@ def parse_args():
 def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
     assert stacked_num % 2 == 1
 
-    emb = fluid.layers.embedding(
-        input=data, size=[input_dim, emb_dim], is_sparse=True)
+    emb = fluid.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True)
 
     fc1 = fluid.layers.fc(input=emb, size=hid_dim)
     lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
@@ -69,8 +68,7 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
 
 
 def inference_program(word_dict):
-    data = fluid.layers.data(
-        name="words", shape=[1], dtype="int64", lod_level=1)
+    data = fluid.data(name="words", shape=[-1], dtype="int64", lod_level=1)
 
     dict_dim = len(word_dict)
     net = stacked_lstm_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM,
@@ -80,7 +78,7 @@ def inference_program(word_dict):
 
 def train_program(prediction):
     # prediction = inference_program(word_dict)
-    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
+    label = fluid.data(name="label", shape=[-1, 1], dtype="int64")
     cost = fluid.layers.cross_entropy(input=prediction, label=label)
     avg_cost = fluid.layers.mean(cost)
     accuracy = fluid.layers.accuracy(input=prediction, label=label)
@@ -100,16 +98,16 @@ def train(use_cuda, params_dirname):
     print("Reading training data....")
 
     if args.enable_ce:
-        train_reader = paddle.batch(
+        train_reader = fluid.io.batch(
             paddle.dataset.imdb.train(word_dict), batch_size=BATCH_SIZE)
     else:
-        train_reader = paddle.batch(
+        train_reader = fluid.io.batch(
             paddle.reader.shuffle(
                 paddle.dataset.imdb.train(word_dict), buf_size=25000),
             batch_size=BATCH_SIZE)
 
     print("Reading testing data....")
-    test_reader = paddle.batch(
+    test_reader = fluid.io.batch(
         paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
 
     feed_order = ['words', 'label']
@@ -223,11 +221,15 @@ def infer(use_cuda, params_dirname=None):
 
         UNK = word_dict['<unk>']
         lod = []
-        for c in reviews:
-            lod.append([np.int64(word_dict.get(words, UNK)) for words in c])
+        base_shape = []
 
-        base_shape = [[len(c) for c in lod]]
+        for c in reviews:
+            re = np.array([np.int64(word_dict.get(words, UNK)) for words in c])
+            lod = np.concatenate([lod, re], axis=0)
+            base_shape.insert(-1, re.shape[0])
 
+        base_shape = [base_shape]
+        lod = np.array(lod).astype('int64')
         tensor_words = fluid.create_lod_tensor(lod, base_shape, place)
         assert feed_target_names[0] == "words"
         results = exe.run(
diff --git a/09.gan/README.cn.md b/09.gan/README.cn.md
index 10266d523f8fb96ed539f69a6a703a8320d7a97d..99cf01ed570e122248a38982eeaf5b284034226a 100644
--- a/09.gan/README.cn.md
+++ b/09.gan/README.cn.md
@@ -162,7 +162,7 @@ def bn(x, name=None, act='relu'):
 
 - 卷积层
 
-调用 `fluid.nets.simple_img_conv_pool` 实现卷积池化组，卷积核大小为3x3，池化窗口大小为2x2，窗口滑动步长为2，激活函数类型由具体网络结构指定。
+调用 `fluid.nets.simple_img_conv_pool` 实现卷积池化组，卷积核大小为5x5，池化窗口大小为2x2，窗口滑动步长为2，激活函数类型由具体网络结构指定。
 
 ```python
 def conv(x, num_filters, name=None, act=None):
diff --git a/09.gan/index.cn.html b/09.gan/index.cn.html
index ca7ee664d486e33ba84798f138df953cc2043400..8947f5a1725538fea260a17b89f57e6f028a2336 100644
--- a/09.gan/index.cn.html
+++ b/09.gan/index.cn.html
@@ -204,7 +204,7 @@ def bn(x, name=None, act='relu'):
 
 - 卷积层
 
-调用 `fluid.nets.simple_img_conv_pool` 实现卷积池化组，卷积核大小为3x3，池化窗口大小为2x2，窗口滑动步长为2，激活函数类型由具体网络结构指定。
+调用 `fluid.nets.simple_img_conv_pool` 实现卷积池化组，卷积核大小为5x5，池化窗口大小为2x2，窗口滑动步长为2，激活函数类型由具体网络结构指定。
 
 ```python
 def conv(x, num_filters, name=None, act=None):