Merge branch 'master' into fasttext

1aa218f3 · 123malin · GitHub · 526094ab · f07cf23d · 1aa218f3
292 changed file
--- a/.gitignore
+++ b/.gitignore
 *.o
 output
 .idea/
-build/
-dist/
-fleetrec.egg-info/
+paddlerec.egg-info/
 *~
 *.pyc
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+repos:
+-   repo: https://github.com/Lucas-C/pre-commit-hooks.git
+    sha: v1.0.1
+    hooks:
+    -   id: remove-crlf
+        files: (?!.*third_party)^.*$ | (?!.*book)^.*$
+-   repo: https://github.com/PaddlePaddle/mirrors-yapf.git
+    sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37
+    hooks:
+    -   id: yapf
+        files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0
+    hooks:
+    -   id: check-added-large-files
+    -   id: check-merge-conflict
+    -   id: check-symlinks
+    -   id: detect-private-key
+        files: (?!.*third_party)^.*$ | (?!.*book)^.*$
+    -   id: end-of-file-fixer
+-   repo: local
+    hooks:
+    -   id: copyright_checker
+        name: copyright_checker
+        entry: python ./tools/codestyle/copyright.hook
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
+        exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$
--- a/.travis.yml
+++ b/.travis.yml
+language: generic 
+sudo: required
+dist: trusty
+
+services:
+  - docker
+
+os:
+  - linux
+
+env:
+  - JOB=check_style
+
+before_install:
+  # For pylint dockstring checker
+  - sudo pip install pylint pytest astroid isort pre-commit
+  - |
+    function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
+
+script:
+  - "travis_wait 30 sleep 1800 &"
+  - |
+    # 43min timeout
+    tools/build_script.sh ${JOB}
+    if [ $? -eq 0 ] || [ $? -eq 142 ]; then true; else exit 1; fi;
+
+notifications:
+  email:
+    on_success: change
+    on_failure: always
--- a/LICENSE
+++ b/LICENSE
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/README.md
+++ b/README.md
+<p align="center">
+<img align="center" src="doc/imgs/logo.png">
+<p>
+
+<p align="center">
+    <br>
+    <img alt="Release" src="https://img.shields.io/badge/Release-0.1.0-yellowgreen">
+    <img alt="License" src="https://img.shields.io/github/license/PaddlePaddle/Serving">
+    <img alt="Slack" src="https://img.shields.io/badge/Join-Slack-green">
+    <br>
+<p>
+
+
+<h2 align="center">什么是PaddleRec</h2>
+
+<p align="center">
+<img align="center" src="doc/imgs/structure.png">
+<p>
+
+- 源于飞桨生态的搜索推荐模型**一站式开箱即用工具** 
+- 适合初学者，开发者，研究者从调研，训练到预测部署的全流程解决方案
+- 包含语义理解、召回、粗排、精排、多任务学习、融合等多个任务的推荐搜索算法库
+- 配置**yaml**自定义选项，即可快速上手使用单机训练、大规模分布式训练、离线预测、在线部署
+
+
+<h2 align="center">PadlleRec概览</h2>
+
+<p align="center">
+<img align="center" src="doc/imgs/overview.png">
+<p>
+
+
+<h2 align="center">推荐系统-流程概览</h2>
+
+<p align="center">
+<img align="center" src="doc/imgs/rec-overview.png">
+<p>
+
+<h2 align="center">便捷安装</h2>
+
+### 环境要求
+* Python 2.7/ 3.5 / 3.6 / 3.7
+* PaddlePaddle  >= 1.7.2
+* 操作系统: Windows/Mac/Linux
+
+  > Windows下目前仅提供单机训练，建议使用Linux
+  
+### 安装命令
+
+- 安装方法一<PIP源直接安装>：
+  ```bash
+  python -m pip install paddle-rec
+  ```
+
+- 安装方法二
+
+  源码编译安装
+  1. 安装飞桨  **注：需要用户安装版本 >1.7.2 的飞桨**
+
+    ```shell
+    python -m pip install paddlepaddle -i https://mirror.baidu.com/pypi/simple
+    ```
+
+  2. 源码安装PaddleRec
+
+    ```
+    git clone https://github.com/PaddlePaddle/PaddleRec/
+    cd PaddleRec
+    python setup.py install
+    ```
+
+
+<h2 align="center">快速启动</h2>
+
+### 启动内置模型的默认配置
+
+目前框架内置了多个模型，一行命令即可使用内置模型开始单机训练和本地模拟分布式训练。
+  > 本地模拟分布式(`local_cluster`)为`1个server + 1个trainer`的参数服务器模式
+
+
+我们以排序模型中的`dnn`模型为例介绍PaddleRec的简单使用。训练数据来源为[Criteo数据集](https://www.kaggle.com/c/criteo-display-ad-challenge/)，我们从中截取了100条方便您快速上手体验完整的PaddleRec流程。
+
+```bash
+# 使用CPU进行单机训练
+python -m paddlerec.run -m paddlerec.models.rank.dnn  
+```
+
+### 启动内置模型的自定配置
+
+若您复用内置模型，对**yaml**配置文件进行了修改，如更改超参，重新配置数据后，可以直接使用paddlerec运行该yaml文件。
+
+我们以dnn模型为例，在paddlerec代码目录下:
+```bash
+cd paddlerec
+```
+
+修改dnn模型的[超参配置](./models/rank/dnn/config.yaml)，例如将迭代训练轮数从10轮修改为5轮:
+```yaml
+runner:
+- name: runner1
+  class: single_train
+  epochs: 5 # 10->5
+```
+
+在Linux环境下，可以使用`vim`等文本编辑工具修改yaml文件：
+
+```bash
+vim ./models/rank/dnn/config.yaml
+# 键入 i, 进入编辑模式
+# 修改yaml文件配置
+# 完成修改后，点击esc，退出编辑模式
+# 键入 :wq 保存文件并退出 
+```
+
+完成dnn模型`models/rank/dnn/config.yaml`的配置修改后，运行`dnn`模型：
+```bash
+# 使用自定配置进行训练
+python -m paddlerec.run -m ./models/rank/dnn/config.yaml 
+```
+
+### 分布式训练
+
+分布式训练需要配置`config.yaml`，加入或修改`engine`选项为`cluster`或`local_cluster`，以进行分布式训练，或本地模拟分布式训练。
+
+#### 本地模拟分布式训练
+
+我们以dnn模型为例，在paddlerec代码目录下，修改dnn模型的`config.yaml`文件:
+
+```yaml
+runner:
+- name: runner1
+  class: local_cluster_train # single_train -> local_cluster_train
+```
+然后启动paddlerec训练：
+
+```bash
+# 进行本地模拟分布式训练
+python -m paddlerec.run -m ./models/rank/dnn/config.yaml  
+```
+
+#### 集群分布式训练
+
+我们以dnn模型为例，在paddlerec代码目录下，首先修改dnn模型`config.yaml`文件:
+
+```yaml
+runner:
+- name: runner1
+  class: cluster_train # single_train -> cluster_train
+```
+再添加分布式启动配置文件`backend.yaml`，具体配置规则在[分布式训练](doc/distributed_train.md)教程中介绍。最后启动paddlerec训练：
+
+```bash
+# 配置好 mpi/k8s/paddlecloud集群环境后
+python -m paddlerec.run -m ./models/rank/dnn/config.yaml -b backend.yaml
+```
+
+
+<h2 align="center">支持模型列表</h2>
+
+
+|   方向   |                                   模型                                    | 单机CPU训练 | 单机GPU训练 | 分布式CPU训练 |
+| :------: | :-----------------------------------------------------------------------: | :---------: | :---------: | :-----------: |
+| 内容理解 | [Text-Classifcation](models/contentunderstanding/classification/model.py) |      ✓      |      x      |       ✓       |
+| 内容理解 |         [TagSpace](models/contentunderstanding/tagspace/model.py)         |      ✓      |      x      |       ✓       |
+|   召回   |                    [DSSM](models/match/dssm/model.py)                     |      ✓      |      x      |       ✓       |
+|   召回   |        [MultiView-Simnet](models/match/multiview-simnet/model.py)         |      ✓      |      x      |       ✓       |
+|   召回   |                   [TDM](models/treebased/tdm/model.py)                    |      ✓      |      x      |       ✓       |
+|   召回   |                [Word2Vec](models/recall/word2vec/model.py)                |      ✓      |      x      |       ✓       |
+|   召回   |                     [SSR](models/recall/ssr/model.py)                     |      ✓      |      ✓      |       ✓       |
+|   召回   |                 [Gru4Rec](models/recall/gru4rec/model.py)                 |      ✓      |      ✓      |       ✓       |
+|   召回   |             [Youtube_dnn](models/recall/youtube_dnn/model.py)             |      ✓      |      ✓      |       ✓       |
+|   召回   |                     [NCF](models/recall/ncf/model.py)                     |      ✓      |      ✓      |       ✓       |
+|   排序   |                      [Dnn](models/rank/dnn/model.py)                      |      ✓      |      x      |       ✓       |
+|   排序   |                   [DeepFM](models/rank/deepfm/model.py)                   |      ✓      |      x      |       ✓       |
+|   排序   |                  [xDeepFM](models/rank/xdeepfm/model.py)                  |      ✓      |      x      |       ✓       |
+|   排序   |                      [DIN](models/rank/din/model.py)                      |      ✓      |      x      |       ✓       |
+|   排序   |                [Wide&Deep](models/rank/wide_deep/model.py)                |      ✓      |      x      |       ✓       |
+|  多任务  |                  [ESMM](models/multitask/esmm/model.py)                   |      ✓      |      ✓      |       ✓       |
+|  多任务  |                  [MMOE](models/multitask/mmoe/model.py)                   |      ✓      |      ✓      |       ✓       |
+|  多任务  |           [ShareBottom](models/multitask/share-bottom/model.py)           |      ✓      |      ✓      |       ✓       |
+|  重排序  |                [Listwise](models/rerank/listwise/model.py)                |      ✓      |      x      |       ✓       |
+
+
+
+
+<h2 align="center">文档</h2>
+
+### 背景介绍
+* [推荐系统介绍](doc/rec_background.md)
+* [分布式深度学习介绍](doc/ps_background.md)
+
+### 新手教程
+* [环境要求](#环境要求)
+* [安装命令](#安装命令)
+* [快速开始](#启动内置模型的默认配置)
+
+### 进阶教程
+* [自定义数据集及Reader](doc/custom_dataset_reader.md)
+* [分布式训练](doc/distributed_train.md)
+
+### 开发者教程
+* [PaddleRec设计文档](doc/design.md)
+
+### 关于PaddleRec性能
+* [Benchmark](doc/benchmark.md)
+
+### 开发者教程
+* [PaddleRec设计文档](doc/design.md)
+* [二次开发](doc/development.md)
+
+### 关于PaddleRec性能
+* [Benchmark](doc/benchmark.md)
+
+### FAQ
+* [常见问题FAQ](doc/faq.md)
+
+
+<h2 align="center">社区</h2>
+
+### 反馈
+如有意见、建议及使用中的BUG，欢迎在[GitHub Issue](https://github.com/PaddlePaddle/PaddleRec/issues)提交
+
+### 版本历史
+- 2020.5.14 - PaddleRec v0.1
+  
+### 许可证书
+本项目的发布受[Apache 2.0 license](LICENSE)许可认证。
+  
--- a/fleet_rec/__init__.py
+++ b/fleet_rec/__init__.py
--- a/fleet_rec/core/metrics/__init__.py
+++ b/fleet_rec/core/metrics/__init__.py
--- a/fleet_rec/core/utils/__init__.py
+++ b/fleet_rec/core/utils/__init__.py
--- a/models/rank/tagspace/__init__.py
+++ b/models/rank/tagspace/__init__.py
--- a/models/recall/multiview-simnet/__init__.py
+++ b/models/recall/multiview-simnet/__init__.py
--- a/core/engine/cluster/cloud/cluster.sh
+++ b/core/engine/cluster/cloud/cluster.sh
+#!/bin/bash
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+###################################################
+# Usage: submit.sh
+# Description: run mpi submit client implement
+###################################################
+
+# ---------------------------------------------------------------------------- #
+#                            variable define                                   #
+# ---------------------------------------------------------------------------- #
+
+#-----------------------------------------------------------------------------------------------------------------
+#fun : package
+#param : N/A
+#return : 0 -- success; not 0 -- failure
+#-----------------------------------------------------------------------------------------------------------------
+function package_hook() {
+  g_run_stage="package"
+  package
+}
+
+#-----------------------------------------------------------------------------------------------------------------
+#fun : before hook submit to cluster
+#param : N/A
+#return : 0 -- success; not 0 -- failure
+#-----------------------------------------------------------------------------------------------------------------
+function _before_submit() {
+  echo "before_submit"
+  before_submit_hook
+}
+
+#-----------------------------------------------------------------------------------------------------------------
+#fun : after hook submit to cluster
+#param : N/A
+#return : 0 -- success; not 0 -- failure
+#-----------------------------------------------------------------------------------------------------------------
+function _after_submit() {
+  echo "after_submit"
+  after_submit_hook
+}
+
+#-----------------------------------------------------------------------------------------------------------------
+#fun : submit to cluster
+#param : N/A
+#return : 0 -- success; not 0 -- failure
+#-----------------------------------------------------------------------------------------------------------------
+function _submit() {
+  g_run_stage="submit"
+
+  cd ${engine_temp_path}
+
+  paddlecloud job --ak ${engine_submit_ak} --sk ${engine_submit_sk} train --cluster-name ${engine_submit_cluster} \
+    --job-version ${engine_submit_version} \
+    --mpi-priority ${engine_submit_priority} \
+    --mpi-wall-time 300:59:00 \
+    --mpi-nodes ${engine_submit_nodes} --is-standalone 0 \
+    --mpi-memory 110Gi \
+    --job-name ${engine_submit_jobname} \
+    --start-cmd "${g_run_cmd}" \
+    --group-name ${engine_submit_group} \
+    --job-conf ${engine_submit_config} \
+    --files ${g_submitfiles} \
+    --json
+
+  cd -
+}
+
+function submit_hook() {
+  _before_submit
+  _submit
+  _after_submit
+}
+
+function main() {
+  source ${engine_submit_scrpit}
+
+  package_hook
+  submit_hook
+}
+
+main
--- a/core/engine/cluster/cluster.py
+++ b/core/engine/cluster/cluster.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+from __future__ import unicode_literals
+
+import copy
+import os
+import subprocess
+
+from paddlerec.core.engine.engine import Engine
+from paddlerec.core.factory import TrainerFactory
+from paddlerec.core.utils import envs
+
+
+class ClusterEngine(Engine):
+    def __init_impl__(self):
+        abs_dir = os.path.dirname(os.path.abspath(__file__))
+
+        backend = envs.get_runtime_environ("engine_backend")
+        if backend == "PaddleCloud":
+            self.submit_script = os.path.join(abs_dir, "cloud/cluster.sh")
+        else:
+            raise ValueError("{} can not be supported now".format(backend))
+
+    def start_worker_procs(self):
+        trainer = TrainerFactory.create(self.trainer)
+        trainer.run()
+
+    def start_master_procs(self):
+        default_env = os.environ.copy()
+        current_env = copy.copy(default_env)
+        current_env.pop("http_proxy", None)
+        current_env.pop("https_proxy", None)
+
+        cmd = ("bash {}".format(self.submit_script)).split(" ")
+        proc = subprocess.Popen(cmd, env=current_env, cwd=os.getcwd())
+        proc.wait()
+
+    def run(self):
+        role = envs.get_runtime_environ("engine_role")
+
+        if role == "MASTER":
+            self.start_master_procs()
+
+        elif role == "WORKER":
+            self.start_worker_procs()
+
+        else:
+            raise ValueError("role {} error, must in MASTER/WORKER".format(
+                role))
--- a/core/engine/engine.py
+++ b/core/engine/engine.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+
+
+class Engine:
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, envs, trainer):
+        self.envs = envs
+        self.trainer = trainer
+        self.__init_impl__()
+
+    def __init_impl__(self):
+        pass
+
+    @abc.abstractmethod
+    def run(self):
+        pass
--- a/fleet_rec/core/engine/local_cluster_engine.py
+++ b/fleet_rec/core/engine/local_cluster_engine.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,13 +14,15 @@

 from __future__ import print_function
 from __future__ import unicode_literals
-import subprocess
-import sys
-import os
+
 import copy
+import os
+import sys
+import subprocess
+
+from paddlerec.core.engine.engine import Engine
+from paddlerec.core.utils import envs

-from fleetrec.core.engine.engine import Engine
-from fleetrec.core.utils import envs

 class LocalClusterEngine(Engine):
    def start_procs(self):
@@ -36,7 +38,7 @@ class LocalClusterEngine(Engine):
        current_env.pop("https_proxy", None)
        procs = []
        log_fns = []
-       
+
        for i in range(server_num - 1):
            while True:
                new_port = envs.find_free_port()
@@ -44,10 +46,15 @@ class LocalClusterEngine(Engine):
                    ports.append(new_port)
                    break
        user_endpoints = ",".join(["127.0.0.1:" + str(x) for x in ports])
-        user_endpoints_ips = [x.split(":")[0] for x in user_endpoints.split(",")]
-        user_endpoints_port = [x.split(":")[1] for x in user_endpoints.split(",")]

-        factory = "fleetrec.core.factory"
+        user_endpoints_ips = [
+            x.split(":")[0] for x in user_endpoints.split(",")
+        ]
+        user_endpoints_port = [
+            x.split(":")[1] for x in user_endpoints.split(",")
+        ]
+
+        factory = "paddlerec.core.factory"
        cmd = [sys.executable, "-u", "-m", factory, self.trainer]

        for i in range(server_num):
@@ -62,7 +69,8 @@ class LocalClusterEngine(Engine):
            os.system("mkdir -p {}".format(logs_dir))
            fn = open("%s/server.%d" % (logs_dir, i), "w")
            log_fns.append(fn)
-            proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn, cwd=os.getcwd())
+            proc = subprocess.Popen(
+                cmd, env=current_env, stdout=fn, stderr=fn, cwd=os.getcwd())
            procs.append(proc)

        for i in range(worker_num):
@@ -76,7 +84,8 @@ class LocalClusterEngine(Engine):
            os.system("mkdir -p {}".format(logs_dir))
            fn = open("%s/worker.%d" % (logs_dir, i), "w")
            log_fns.append(fn)
-            proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn, cwd=os.getcwd())
+            proc = subprocess.Popen(
+                cmd, env=current_env, stdout=fn, stderr=fn, cwd=os.getcwd())
            procs.append(proc)

        # only wait worker to finish here
@@ -91,8 +100,10 @@ class LocalClusterEngine(Engine):
            if len(log_fns) > 0:
                log_fns[i].close()
            procs[i].terminate()
-        print("all workers already completed, you can view logs under the `{}` directory".format(logs_dir),
-              file=sys.stderr)
+        print(
+            "all workers already completed, you can view logs under the `{}` directory".
+            format(logs_dir),
+            file=sys.stderr)

    def run(self):
        self.start_procs()
--- a/fleet_rec/core/engine/local_mpi_engine.py
+++ b/fleet_rec/core/engine/local_mpi_engine.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,18 +14,18 @@

 from __future__ import print_function
 from __future__ import unicode_literals
-import subprocess
-import sys
-import os
+
 import copy
+import os
+import sys
+import subprocess

-from fleetrec.core.engine.engine import Engine
+from paddlerec.core.engine.engine import Engine


 class LocalMPIEngine(Engine):
    def start_procs(self):
        logs_dir = self.envs["log_dir"]
-
        default_env = os.environ.copy()
        current_env = copy.copy(default_env)
        current_env.pop("http_proxy", None)
@@ -33,7 +33,7 @@ class LocalMPIEngine(Engine):
        procs = []
        log_fns = []

-        factory = "fleetrec.core.factory"
+        factory = "paddlerec.core.factory"
        cmd = "mpirun -npernode 2 -timestamp-output -tag-output".split(" ")
        cmd.extend([sys.executable, "-u", "-m", factory, self.trainer])

@@ -41,7 +41,8 @@ class LocalMPIEngine(Engine):
            os.system("mkdir -p {}".format(logs_dir))
            fn = open("%s/job.log" % logs_dir, "w")
            log_fns.append(fn)
-            proc = subprocess.Popen(cmd, env=current_env, stdout=fn, stderr=fn, cwd=os.getcwd())
+            proc = subprocess.Popen(
+                cmd, env=current_env, stdout=fn, stderr=fn, cwd=os.getcwd())
        else:
            proc = subprocess.Popen(cmd, env=current_env, cwd=os.getcwd())
        procs.append(proc)
@@ -50,7 +51,9 @@ class LocalMPIEngine(Engine):
            if len(log_fns) > 0:
                log_fns[i].close()
            procs[i].wait()
-        print("all workers and parameter servers already completed", file=sys.stderr)
+        print(
+            "all workers and parameter servers already completed",
+            file=sys.stderr)

    def run(self):
        self.start_procs()
--- a/fleet_rec/core/factory.py
+++ b/fleet_rec/core/factory.py
@@ -17,26 +17,26 @@ import sys

 import yaml

-from fleetrec.core.utils import envs
+from paddlerec.core.utils import envs

-trainer_abs = os.path.join(os.path.dirname(
-    os.path.abspath(__file__)), "trainers")
+trainer_abs = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "trainers")
 trainers = {}


 def trainer_registry():
-    trainers["SingleTrainer"] = os.path.join(
-        trainer_abs, "single_trainer.py")
-    trainers["ClusterTrainer"] = os.path.join(
-        trainer_abs, "cluster_trainer.py")
-    trainers["CtrCodingTrainer"] = os.path.join(
-        trainer_abs, "ctr_coding_trainer.py")
-    trainers["CtrModulTrainer"] = os.path.join(
-        trainer_abs, "ctr_modul_trainer.py")
-    trainers["TDMSingleTrainer"] = os.path.join(
-        trainer_abs, "tdm_single_trainer.py")
-    trainers["TDMClusterTrainer"] = os.path.join(
-        trainer_abs, "tdm_cluster_trainer.py")
+    trainers["SingleTrainer"] = os.path.join(trainer_abs, "single_trainer.py")
+    trainers["SingleInfer"] = os.path.join(trainer_abs, "single_infer.py")
+    trainers["ClusterTrainer"] = os.path.join(trainer_abs,
+                                              "cluster_trainer.py")
+    trainers["CtrCodingTrainer"] = os.path.join(trainer_abs,
+                                                "ctr_coding_trainer.py")
+    trainers["CtrModulTrainer"] = os.path.join(trainer_abs,
+                                               "ctr_modul_trainer.py")
+    trainers["TDMSingleTrainer"] = os.path.join(trainer_abs,
+                                                "tdm_single_trainer.py")
+    trainers["TDMClusterTrainer"] = os.path.join(trainer_abs,
+                                                 "tdm_cluster_trainer.py")


 trainer_registry()
@@ -55,8 +55,8 @@ class TrainerFactory(object):

        if trainer_abs is None:
            if not os.path.isfile(train_mode):
-                raise IOError(
-                    "trainer {} can not be recognized".format(train_mode))
+                raise IOError("trainer {} can not be recognized".format(
+                    train_mode))
            trainer_abs = train_mode
            train_mode = "UserDefineTrainer"

@@ -71,7 +71,7 @@ class TrainerFactory(object):
            with open(config, 'r') as rb:
                _config = yaml.load(rb.read(), Loader=yaml.FullLoader)
        else:
-            raise ValueError("fleetrec's config only support yaml")
+            raise ValueError("paddlerec's config only support yaml")

        envs.set_global_envs(_config)
        envs.update_workspace()

--- a/core/layer.py
+++ b/core/layer.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+
+
+class Layer(object):
+    """R
+    """
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, config):
+        """R
+        """
+        pass
+
+    @abc.abstractmethod
+    def generate(self, param):
+        """R
+        """
+        pass
--- a/fleet_rec/core/metric.py
+++ b/fleet_rec/core/metric.py
@@ -53,7 +53,7 @@ class Metric(object):
        pass

    @abc.abstractmethod
-    def get_result_to_string(self):
+    def __str__(self):
        """
        Return:
            result(string) : calculate result with string format, for output

--- a/core/metrics/__init__.py
+++ b/core/metrics/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/fleet_rec/core/metrics/auc_metrics.py
+++ b/fleet_rec/core/metrics/auc_metrics.py
@@ -13,14 +13,16 @@
 # limitations under the License.

 import math
+
 import numpy as np
 import paddle.fluid as fluid
-from fleetrec.core.metric import Metric
+
+from paddlerec.core.metric import Metric


 class AUCMetric(Metric):
    """
-    Metric For Paddle Model
+    Metric For Fluid Model
    """

    def __init__(self, config, fleet):
@@ -81,7 +83,8 @@ class AUCMetric(Metric):
            if scope.find_var(metric_item['var'].name) is None:
                result[metric_name] = None
                continue
-            result[metric_name] = self.get_metric(scope, metric_item['var'].name)
+            result[metric_name] = self.get_metric(scope,
+                                                  metric_item['var'].name)
        return result

    def calculate_auc(self, global_pos, global_neg):
@@ -176,14 +179,18 @@ class AUCMetric(Metric):
            self._result['mean_q'] = 0
            return self._result
        if 'stat_pos' in result and 'stat_neg' in result:
-            result['auc'] = self.calculate_auc(result['stat_pos'], result['stat_neg'])
-            result['bucket_error'] = self.calculate_auc(result['stat_pos'], result['stat_neg'])
+            result['auc'] = self.calculate_auc(result['stat_pos'],
+                                               result['stat_neg'])
+            result['bucket_error'] = self.calculate_auc(result['stat_pos'],
+                                                        result['stat_neg'])
        if 'pos_ins_num' in result:
-            result['actual_ctr'] = result['pos_ins_num'] / result['total_ins_num']
+            result['actual_ctr'] = result['pos_ins_num'] / result[
+                'total_ins_num']
        if 'abserr' in result:
            result['mae'] = result['abserr'] / result['total_ins_num']
        if 'sqrerr' in result:
-            result['rmse'] = math.sqrt(result['sqrerr'] / result['total_ins_num'])
+            result['rmse'] = math.sqrt(result['sqrerr'] /
+                                       result['total_ins_num'])
        if 'prob' in result:
            result['predict_ctr'] = result['prob'] / result['total_ins_num']
            if abs(result['predict_ctr']) > 1e-6:
@@ -198,7 +205,7 @@ class AUCMetric(Metric):
        """ """
        return self._result

-    def get_result_to_string(self):
+    def __str__(self):
        """ """
        result = self.get_result()
        result_str = "%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f " \

--- a/core/model.py
+++ b/core/model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import abc
+
+import paddle.fluid as fluid
+
+from paddlerec.core.utils import envs
+
+
+class Model(object):
+    """Base Model
+    """
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, config):
+        """R
+        """
+        self._cost = None
+        self._metrics = {}
+        self._data_var = []
+        self._infer_data_var = []
+        self._infer_results = {}
+        self._data_loader = None
+        self._infer_data_loader = None
+        self._fetch_interval = 20
+        self._namespace = "train.model"
+        self._platform = envs.get_platform()
+        self._init_hyper_parameters()
+        self._env = config
+        self._slot_inited = False
+
+    def _init_hyper_parameters(self):
+        pass
+
+    def _init_slots(self, **kargs):
+        if self._slot_inited:
+            return
+        self._slot_inited = True
+        dataset = {}
+        model_dict = {}
+        for i in self._env["executor"]:
+            if i["name"] == kargs["name"]:
+                model_dict = i
+                break
+        for i in self._env["dataset"]:
+            if i["name"] == model_dict["dataset_name"]:
+                dataset = i
+                break
+        name = "dataset." + dataset["name"] + "."
+        sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip()
+        dense_slots = envs.get_global_env(name + "dense_slots", "").strip()
+        if sparse_slots != "" or dense_slots != "":
+            if sparse_slots == "":
+                sparse_slots = []
+            else:
+                sparse_slots = sparse_slots.strip().split(" ")
+            if dense_slots == "":
+                dense_slots = []
+            else:
+                dense_slots = dense_slots.strip().split(" ")
+            dense_slots_shape = [[
+                int(j) for j in i.split(":")[1].strip("[]").split(",")
+            ] for i in dense_slots]
+            dense_slots = [i.split(":")[0] for i in dense_slots]
+            self._dense_data_var = []
+            for i in range(len(dense_slots)):
+                l = fluid.layers.data(
+                    name=dense_slots[i],
+                    shape=dense_slots_shape[i],
+                    dtype="float32")
+                self._data_var.append(l)
+                self._dense_data_var.append(l)
+            self._sparse_data_var = []
+            for name in sparse_slots:
+                l = fluid.layers.data(
+                    name=name, shape=[1], lod_level=1, dtype="int64")
+                self._data_var.append(l)
+                self._sparse_data_var.append(l)
+
+        dataset_class = dataset["type"]
+        if dataset_class == "DataLoader":
+            self._init_dataloader()
+
+    def _init_dataloader(self, is_infer=False):
+        if is_infer:
+            data = self._infer_data_var
+        else:
+            data = self._data_var
+        self._data_loader = fluid.io.DataLoader.from_generator(
+            feed_list=data,
+            capacity=64,
+            use_double_buffer=False,
+            iterable=False)
+
+    def get_inputs(self):
+        return self._data_var
+
+    def get_infer_inputs(self):
+        return self._infer_data_var
+
+    def get_infer_results(self):
+        return self._infer_results
+
+    def get_avg_cost(self):
+        """R
+        """
+        return self._cost
+
+    def get_metrics(self):
+        """R
+        """
+        return self._metrics
+
+    def get_fetch_period(self):
+        return self._fetch_interval
+
+    def _build_optimizer(self, name, lr, strategy=None):
+        name = name.upper()
+        optimizers = ["SGD", "ADAM", "ADAGRAD"]
+        if name not in optimizers:
+            raise ValueError(
+                "configured optimizer can only supported SGD/Adam/Adagrad")
+
+        if name == "SGD":
+            reg = envs.get_global_env("hyper_parameters.reg", 0.0001,
+                                      self._namespace)
+            optimizer_i = fluid.optimizer.SGD(
+                lr, regularization=fluid.regularizer.L2DecayRegularizer(reg))
+        elif name == "ADAM":
+            optimizer_i = fluid.optimizer.Adam(lr, lazy_mode=True)
+        elif name == "ADAGRAD":
+            optimizer_i = fluid.optimizer.Adagrad(lr)
+        else:
+            raise ValueError(
+                "configured optimizer can only supported SGD/Adam/Adagrad")
+
+        return optimizer_i
+
+    def optimizer(self):
+        learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
+                                            None, self._namespace)
+        optimizer = envs.get_global_env("hyper_parameters.optimizer", None,
+                                        self._namespace)
+        return self._build_optimizer(optimizer, learning_rate)
+
+    def input_data(self, is_infer=False, **kwargs):
+        name = "dataset." + kwargs.get("dataset_name") + "."
+        sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip()
+        dense_slots = envs.get_global_env(name + "dense_slots", "").strip()
+        self._sparse_data_var_map = {}
+        self._dense_data_var_map = {}
+        if sparse_slots != "" or dense_slots != "":
+            if sparse_slots == "":
+                sparse_slots = []
+            else:
+                sparse_slots = sparse_slots.strip().split(" ")
+            if dense_slots == "":
+                dense_slots = []
+            else:
+                dense_slots = dense_slots.strip().split(" ")
+            dense_slots_shape = [[
+                int(j) for j in i.split(":")[1].strip("[]").split(",")
+            ] for i in dense_slots]
+            dense_slots = [i.split(":")[0] for i in dense_slots]
+            self._dense_data_var = []
+            data_var_ = []
+            for i in range(len(dense_slots)):
+                l = fluid.layers.data(
+                    name=dense_slots[i],
+                    shape=dense_slots_shape[i],
+                    dtype="float32")
+                data_var_.append(l)
+                self._dense_data_var.append(l)
+                self._dense_data_var_map[dense_slots[i]] = l
+            self._sparse_data_var = []
+            for name in sparse_slots:
+                l = fluid.layers.data(
+                    name=name, shape=[1], lod_level=1, dtype="int64")
+                data_var_.append(l)
+                self._sparse_data_var.append(l)
+                self._sparse_data_var_map[name] = l
+            return data_var_
+
+        else:
+            return None
+
+    def net(self, is_infer=False):
+        return None
+
+    def _construct_reader(self, is_infer=False):
+        if is_infer:
+            self._infer_data_loader = fluid.io.DataLoader.from_generator(
+                feed_list=self._infer_data_var,
+                capacity=64,
+                use_double_buffer=False,
+                iterable=False)
+        else:
+            dataset_class = envs.get_global_env("dataset_class", None,
+                                                "train.reader")
+            if dataset_class == "DataLoader":
+                self._data_loader = fluid.io.DataLoader.from_generator(
+                    feed_list=self._data_var,
+                    capacity=64,
+                    use_double_buffer=False,
+                    iterable=False)
+
+    def train_net(self):
+        input_data = self.input_data(is_infer=False)
+        self._data_var = input_data
+        self._construct_reader(is_infer=False)
+        self.net(input_data, is_infer=False)
+
+    def infer_net(self):
+        input_data = self.input_data(is_infer=True)
+        self._infer_data_var = input_data
+        self._construct_reader(is_infer=True)
+        self.net(input_data, is_infer=True)
--- a/core/modules/__init__.py
+++ b/core/modules/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/core/modules/coding/__init__.py
+++ b/core/modules/coding/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/core/modules/coding/layers.py
+++ b/core/modules/coding/layers.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/core/modules/modul/__init__.py
+++ b/core/modules/modul/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/fleet_rec/core/modules/modul/build.py
+++ b/fleet_rec/core/modules/modul/build.py
-import yaml
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import copy
+
 import paddle.fluid as fluid
 from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
+import yaml

-from fleetrec.core.model import Model
-from fleetrec.core.utils import table
+from paddlerec.core.model import Model
+from paddlerec.core.utils import table


 def create(config):
@@ -16,6 +31,7 @@ def create(config):
        Model Instance
    """
    model = None
+
    if config['mode'] == 'fluid':
        model = YamlModel(config)
        model.train_net()
@@ -35,7 +51,12 @@ class YamlModel(Model):
        f = open(config['layer_file'], 'r')
        self._build_nodes = yaml.safe_load(f.read())
        self._build_phase = ['input', 'param', 'summary', 'layer']
-        self._build_param = {'layer': {}, 'inner_layer': {}, 'layer_extend': {}, 'model': {}}
+        self._build_param = {
+            'layer': {},
+            'inner_layer': {},
+            'layer_extend': {},
+            'model': {}
+        }
        self._inference_meta = {'dependency': {}, 'params': {}}

    def train_net(self):
@@ -61,10 +82,12 @@ class YamlModel(Model):
                    if self._build_nodes[phase] is None:
                        continue
                    for node in self._build_nodes[phase]:
-                        exec("""layer=layer.{}(node)""".format(node['class']))
-                        layer_output, extend_output = layer.generate(self._config['mode'], self._build_param)
+                        exec ("""layer=layer.{}(node)""".format(node['class']))
+                        layer_output, extend_output = layer.generate(
+                            self._config['mode'], self._build_param)
                        self._build_param['layer'][node['name']] = layer_output
-                        self._build_param['layer_extend'][node['name']] = extend_output
+                        self._build_param['layer_extend'][node[
+                            'name']] = extend_output
                        if extend_output is None:
                            continue
                        if 'loss' in extend_output:
@@ -74,17 +97,24 @@ class YamlModel(Model):
                                self._cost += extend_output['loss']
                        if 'data_var' in extend_output:
                            self._data_var += extend_output['data_var']
-                        if 'metric_label' in extend_output and extend_output['metric_label'] is not None:
-                            self._metrics[extend_output['metric_label']] = extend_output['metric_dict']
+                        if 'metric_label' in extend_output and extend_output[
+                                'metric_label'] is not None:
+                            self._metrics[extend_output[
+                                'metric_label']] = extend_output['metric_dict']

                        if 'inference_param' in extend_output:
                            inference_param = extend_output['inference_param']
                            param_name = inference_param['name']
                            if param_name not in self._build_param['table']:
-                                self._build_param['table'][param_name] = {'params': []}
-                                table_meta = table.TableMeta.alloc_new_table(inference_param['table_id'])
-                                self._build_param['table'][param_name]['_meta'] = table_meta
-                            self._build_param['table'][param_name]['params'] += inference_param['params']
+                                self._build_param['table'][param_name] = {
+                                    'params': []
+                                }
+                                table_meta = table.TableMeta.alloc_new_table(
+                                    inference_param['table_id'])
+                                self._build_param['table'][param_name][
+                                    '_meta'] = table_meta
+                            self._build_param['table'][param_name][
+                                'params'] += inference_param['params']
        pass

    @classmethod
@@ -99,20 +129,25 @@ class YamlModel(Model):
            metrics = params['metrics']
            for name in metrics:
                model_metrics = metrics[name]
-                stat_var_names += [model_metrics[metric]['var'].name for metric in model_metrics]
+                stat_var_names += [
+                    model_metrics[metric]['var'].name
+                    for metric in model_metrics
+                ]
            strategy['stat_var_names'] = list(set(stat_var_names))
        optimizer_generator = 'optimizer = fluid.optimizer.' + optimizer_conf['class'] + \
                              '(learning_rate=' + str(optimizer_conf['learning_rate']) + ')'
-        exec(optimizer_generator)
+        exec (optimizer_generator)
        optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
        return optimizer

    def dump_model_program(self, path):
        """R
        """
-        with open(path + '/' + self._name + '_main_program.pbtxt', "w") as fout:
+        with open(path + '/' + self._name + '_main_program.pbtxt',
+                  "w") as fout:
            print >> fout, self._build_param['model']['train_program']
-        with open(path + '/' + self._name + '_startup_program.pbtxt', "w") as fout:
+        with open(path + '/' + self._name + '_startup_program.pbtxt',
+                  "w") as fout:
            print >> fout, self._build_param['model']['startup_program']
        pass

@@ -122,7 +157,8 @@ class YamlModel(Model):
        scope = params['scope']
        decay = params['decay']
        for param_table in self._build_param['table']:
-            table_id = self._build_param['table'][param_table]['_meta']._table_id
+            table_id = self._build_param['table'][param_table][
+                '_meta']._table_id
            fleet.shrink_dense_table(decay, scope=scope, table_id=table_id)

    def dump_inference_program(self, inference_layer, path):
@@ -137,17 +173,25 @@ class YamlModel(Model):
        executor = params['executor']
        program = self._build_param['model']['train_program']
        for table_name, table in self._build_param['table'].items():
-            fleet._fleet_ptr.pull_dense(scope, table['_meta']._table_id, table['params'])
+            fleet._fleet_ptr.pull_dense(scope, table['_meta']._table_id,
+                                        table['params'])
        for infernce_item in params['inference_list']:
-            params_name_list = self.inference_params(infernce_item['layer_name'])
-            params_var_list = [program.global_block().var(i) for i in params_name_list]
+            params_name_list = self.inference_params(infernce_item[
+                'layer_name'])
+            params_var_list = [
+                program.global_block().var(i) for i in params_name_list
+            ]
            params_file_name = infernce_item['save_file_name']
            with fluid.scope_guard(scope):
                if params['save_combine']:
                    fluid.io.save_vars(executor, "./", \
                                       program, vars=params_var_list, filename=params_file_name)
                else:
-                    fluid.io.save_vars(executor, params_file_name, program, vars=params_var_list)
+                    fluid.io.save_vars(
+                        executor,
+                        params_file_name,
+                        program,
+                        vars=params_var_list)

    def inference_params(self, inference_layer):
        """
@@ -162,11 +206,13 @@ class YamlModel(Model):
            return self._inference_meta['params'][layer]

        self._inference_meta['params'][layer] = []
-        self._inference_meta['dependency'][layer] = self.get_dependency(self._build_param['inner_layer'], layer)
+        self._inference_meta['dependency'][layer] = self.get_dependency(
+            self._build_param['inner_layer'], layer)
        for node in self._build_nodes['layer']:
            if node['name'] not in self._inference_meta['dependency'][layer]:
                continue
-            if 'inference_param' in self._build_param['layer_extend'][node['name']]:
+            if 'inference_param' in self._build_param['layer_extend'][node[
+                    'name']]:
                self._inference_meta['params'][layer] += \
                    self._build_param['layer_extend'][node['name']]['inference_param']['params']
        return self._inference_meta['params'][layer]
@@ -184,5 +230,6 @@ class YamlModel(Model):
            dependencys = copy.deepcopy(layer_graph[dest_layer]['input'])
            dependency_list = copy.deepcopy(dependencys)
            for dependency in dependencys:
-                dependency_list = dependency_list + self.get_dependency(layer_graph, dependency)
+                dependency_list = dependency_list + self.get_dependency(
+                    layer_graph, dependency)
        return list(set(dependency_list))
--- a/fleet_rec/core/modules/modul/layers.py
+++ b/fleet_rec/core/modules/modul/layers.py
@@ -13,11 +13,12 @@
 # limitations under the License.

 import paddle.fluid as fluid
-from fleetrec.core.layer import Layer

+from paddlerec.core.layer import Layer

-class EmbeddingInputLayer(Layer):
-    """R
+
+class EmbeddingFuseLayer(Layer):
+    """embedding + sequence + concat
    """

    def __init__(self, config):
@@ -31,7 +32,7 @@ class EmbeddingInputLayer(Layer):
        self._emb_dim = self._mf_dim + 3  # append show ctr lr
        self._emb_layers = []

-    def generate_fluid(self, param):
+    def generate(self, param):
        """R
        """
        show_clk = fluid.layers.concat(
@@ -39,7 +40,8 @@ class EmbeddingInputLayer(Layer):
        show_clk.stop_gradient = True
        data_var = []
        for slot in self._slots:
-            l = fluid.layers.data(name=slot, shape=[1], dtype="int64", lod_level=1)
+            l = fluid.layers.data(
+                name=slot, shape=[1], dtype="int64", lod_level=1)
            data_var.append(l)
            emb = fluid.layers.embedding(input=l, size=[10, self._emb_dim], \
                                         is_sparse=True, is_distributed=True,
@@ -47,7 +49,8 @@ class EmbeddingInputLayer(Layer):
            emb = fluid.layers.sequence_pool(input=emb, pool_type='sum')
            emb = fluid.layers.continuous_value_model(emb, show_clk, self._cvm)
            self._emb_layers.append(emb)
-        output = fluid.layers.concat(input=self._emb_layers, axis=1, name=self._name)
+        output = fluid.layers.concat(
+            input=self._emb_layers, axis=1, name=self._name)
        return output, {'data_var': data_var}


@@ -63,7 +66,7 @@ class LabelInputLayer(Layer):
        self._data_type = config.get('data_type', "int64")
        self._label_idx = config['label_idx']

-    def generate_fluid(self, param):
+    def generate(self, param):
        """R
        """
        label = fluid.layers.data(name=self._name, shape=[-1, self._dim], \
@@ -85,7 +88,7 @@ class TagInputLayer(Layer):
        self._dim = config.get('dim', 1)
        self._data_type = config['data_type']

-    def generate_fluid(self, param):
+    def generate(self, param):
        """R
        """
        output = fluid.layers.data(name=self._name, shape=[-1, self._dim], \
@@ -107,10 +110,16 @@ class ParamLayer(Layer):
        self._data_type = config.get('data_type', 'float32')
        self._config = config

-    def generate_fluid(self, param):
+    def generate(self, param):
        """R
        """
-        return self._config, {'inference_param': {'name': 'param', 'params': [], 'table_id': self._table_id}}
+        return self._config, {
+            'inference_param': {
+                'name': 'param',
+                'params': [],
+                'table_id': self._table_id
+            }
+        }


 class SummaryLayer(Layer):
@@ -125,13 +134,19 @@ class SummaryLayer(Layer):
        self._data_type = config.get('data_type', 'float32')
        self._config = config

-    def generate_fluid(self, param):
+    def generate(self, param):
        """R
        """
-        return self._config, {'inference_param': {'name': 'summary', 'params': [], 'table_id': self._table_id}}
+        return self._config, {
+            'inference_param': {
+                'name': 'summary',
+                'params': [],
+                'table_id': self._table_id
+            }
+        }


-class NormalizetionLayer(Layer):
+class NormalizationLayer(Layer):
    """R
    """

@@ -143,7 +158,7 @@ class NormalizetionLayer(Layer):
        self._summary = config['summary']
        self._table_id = config.get('table_id', -1)

-    def generate_fluid(self, param):
+    def generate(self, param):
        """R
        """
        input_layer = param['layer'][self._input[0]]
@@ -151,14 +166,24 @@ class NormalizetionLayer(Layer):
        if len(self._input) > 0:
            input_list = [param['layer'][i] for i in self._input]
            input_layer = fluid.layers.concat(input=input_list, axis=1)
-        bn = fluid.layers.data_norm(input=input_layer, name=self._name, epsilon=1e-4, param_attr={
-            "batch_size": 1e4, "batch_sum_default": 0.0, "batch_square": 1e4})
-        inference_param = [self._name + '.batch_size', self._name + '.batch_sum', self._name + '.batch_square_sum']
+        bn = fluid.layers.data_norm(
+            input=input_layer,
+            name=self._name,
+            epsilon=1e-4,
+            param_attr={
+                "batch_size": 1e4,
+                "batch_sum_default": 0.0,
+                "batch_square": 1e4
+            })
+        inference_param = [
+            self._name + '.batch_size', self._name + '.batch_sum',
+            self._name + '.batch_square_sum'
+        ]
        return bn, {'inference_param': {'name': 'summary', \
                                        'params': inference_param, 'table_id': summary_layer.get('table_id', -1)}}


-class NeuralLayer(Layer):
+class FCLayer(Layer):
    """R
    """

@@ -171,7 +196,7 @@ class NeuralLayer(Layer):
        self._bias = config.get('bias', True)
        self._act_func = config.get('act_func', None)

-    def generate_fluid(self, param):
+    def generate(self, param):
        """R
        """
        param_layer = param['layer'][self._param]
@@ -180,11 +205,13 @@ class NeuralLayer(Layer):
            input_list = [param['layer'][i] for i in self._input]
            input_layer = fluid.layers.concat(input=input_list, axis=1)
        input_coln = input_layer.shape[1]
-        scale = param_layer['init_range'] / (input_coln ** 0.5)
+        scale = param_layer['init_range'] / (input_coln**0.5)
        bias = None
        if self._bias:
-            bias = fluid.ParamAttr(learning_rate=1.0,
-                                   initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=scale))
+            bias = fluid.ParamAttr(
+                learning_rate=1.0,
+                initializer=fluid.initializer.NormalInitializer(
+                    loc=0.0, scale=scale))
        fc = fluid.layers.fc(
            name=self._name,
            input=input_layer,
@@ -199,7 +226,7 @@ class NeuralLayer(Layer):
                                        'table_id': param_layer.get('table_id', -1)}}


-class SigmoidLossLayer(Layer):
+class LogLossLayer(Layer):
    """R
    """

@@ -215,29 +242,60 @@ class SigmoidLossLayer(Layer):
        self._extend_output = {
            'metric_label': self._metric_label,
            'metric_dict': {
-                'auc': {'var': None},
-                'batch_auc': {'var': None},
-                'stat_pos': {'var': None, 'data_type': 'int64'},
-                'stat_neg': {'var': None, 'data_type': 'int64'},
-                'batch_stat_pos': {'var': None, 'data_type': 'int64'},
-                'batch_stat_neg': {'var': None, 'data_type': 'int64'},
-                'pos_ins_num': {'var': None},
-                'abserr': {'var': None},
-                'sqrerr': {'var': None},
-                'prob': {'var': None},
-                'total_ins_num': {'var': None},
-                'q': {'var': None}
+                'auc': {
+                    'var': None
+                },
+                'batch_auc': {
+                    'var': None
+                },
+                'stat_pos': {
+                    'var': None,
+                    'data_type': 'int64'
+                },
+                'stat_neg': {
+                    'var': None,
+                    'data_type': 'int64'
+                },
+                'batch_stat_pos': {
+                    'var': None,
+                    'data_type': 'int64'
+                },
+                'batch_stat_neg': {
+                    'var': None,
+                    'data_type': 'int64'
+                },
+                'pos_ins_num': {
+                    'var': None
+                },
+                'abserr': {
+                    'var': None
+                },
+                'sqrerr': {
+                    'var': None
+                },
+                'prob': {
+                    'var': None
+                },
+                'total_ins_num': {
+                    'var': None
+                },
+                'q': {
+                    'var': None
+                }
            }
        }

-    def generate_fluid(self, param):
+    def generate(self, param):
        """R
        """
        input_layer = param['layer'][self._input[0]]
        label_layer = param['layer'][self._label]
-        output = fluid.layers.clip(input_layer, self._bound[0], self._bound[1], name=self._name)
+        output = fluid.layers.clip(
+            input_layer, self._bound[0], self._bound[1], name=self._name)
        norm = fluid.layers.sigmoid(output, name=self._name)
-        output = fluid.layers.log_loss(norm, fluid.layers.cast(x=label_layer, dtype='float32'))
+        output = fluid.layers.log_loss(
+            norm, fluid.layers.cast(
+                x=label_layer, dtype='float32'))
        if self._weight:
            weight_layer = param['layer'][self._weight]
            output = fluid.layers.elementwise_mul(output, weight_layer)
@@ -247,7 +305,11 @@ class SigmoidLossLayer(Layer):
        # For AUC Metric
        metric = self._extend_output['metric_dict']
        binary_predict = fluid.layers.concat(
-            input=[fluid.layers.elementwise_sub(fluid.layers.ceil(norm), norm), norm], axis=1)
+            input=[
+                fluid.layers.elementwise_sub(fluid.layers.ceil(norm), norm),
+                norm
+            ],
+            axis=1)
        metric['auc']['var'], metric['batch_auc']['var'], [metric['batch_stat_pos']['var'], \
                                                           metric['batch_stat_neg']['var'], metric['stat_pos']['var'],
                                                           metric['stat_neg']['var']] = \

--- a/core/reader.py
+++ b/core/reader.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import abc
+import os
+
+import paddle.fluid.incubate.data_generator as dg
+import yaml
+
+from paddlerec.core.utils import envs
+
+
+class Reader(dg.MultiSlotDataGenerator):
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, config):
+        dg.MultiSlotDataGenerator.__init__(self)
+
+        if os.path.isfile(config):
+            with open(config, 'r') as rb:
+                _config = yaml.load(rb.read(), Loader=yaml.FullLoader)
+        else:
+            raise ValueError("reader config only support yaml")
+
+    @abc.abstractmethod
+    def init(self):
+        """init"""
+        pass
+
+    @abc.abstractmethod
+    def generate_sample(self, line):
+        pass
+
+
+class SlotReader(dg.MultiSlotDataGenerator):
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, config):
+        dg.MultiSlotDataGenerator.__init__(self)
+        if os.path.isfile(config):
+            with open(config, 'r') as rb:
+                _config = yaml.load(rb.read(), Loader=yaml.FullLoader)
+        else:
+            raise ValueError("reader config only support yaml")
+
+    def init(self, sparse_slots, dense_slots, padding=0):
+        from operator import mul
+        self.sparse_slots = []
+        if sparse_slots.strip() != "#" and sparse_slots.strip(
+        ) != "?" and sparse_slots.strip() != "":
+            self.sparse_slots = sparse_slots.strip().split(" ")
+        self.dense_slots = []
+        if dense_slots.strip() != "#" and dense_slots.strip(
+        ) != "?" and dense_slots.strip() != "":
+            self.dense_slots = dense_slots.strip().split(" ")
+        self.dense_slots_shape = [
+            reduce(mul,
+                   [int(j) for j in i.split(":")[1].strip("[]").split(",")])
+            for i in self.dense_slots
+        ]
+        self.dense_slots = [i.split(":")[0] for i in self.dense_slots]
+        self.slots = self.dense_slots + self.sparse_slots
+        self.slot2index = {}
+        self.visit = {}
+        for i in range(len(self.slots)):
+            self.slot2index[self.slots[i]] = i
+            self.visit[self.slots[i]] = False
+        self.padding = padding
+
+    def generate_sample(self, l):
+        def reader():
+            line = l.strip().split(" ")
+            output = [(i, []) for i in self.slots]
+            for i in line:
+                slot_feasign = i.split(":")
+                slot = slot_feasign[0]
+                if slot not in self.slots:
+                    continue
+                if slot in self.sparse_slots:
+                    feasign = int(slot_feasign[1])
+                else:
+                    feasign = float(slot_feasign[1])
+                output[self.slot2index[slot]][1].append(feasign)
+                self.visit[slot] = True
+            for i in self.visit:
+                slot = i
+                if not self.visit[slot]:
+                    if i in self.dense_slots:
+                        output[self.slot2index[i]][1].extend(
+                            [self.padding] *
+                            self.dense_slots_shape[self.slot2index[i]])
+                    else:
+                        output[self.slot2index[i]][1].extend([self.padding])
+                else:
+                    self.visit[slot] = False
+            yield output
+
+        return reader
--- a/fleet_rec/core/trainer.py
+++ b/fleet_rec/core/trainer.py
@@ -12,15 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import os
-import sys
-
 import abc
+import os
 import time
+import sys
 import yaml

 from paddle import fluid
-from fleetrec.core.utils import envs
+
+from paddlerec.core.utils import envs


 class Trainer(object):
@@ -30,8 +30,10 @@ class Trainer(object):

    def __init__(self, config=None):
        self._status_processor = {}
+
        self._place = fluid.CPUPlace()
        self._exe = fluid.Executor(self._place)
+
        self._exector_context = {}
        self._context = {'status': 'uninit', 'is_exit': False}
        self._config_yaml = config
@@ -95,6 +97,6 @@ def user_define_engine(engine_yaml):
    train_dirname = os.path.dirname(train_location)
    base_name = os.path.splitext(os.path.basename(train_location))[0]
    sys.path.append(train_dirname)
-    trainer_class = envs.lazy_instance_by_fliename(
-        base_name, "UserDefineTraining")
+    trainer_class = envs.lazy_instance_by_fliename(base_name,
+                                                   "UserDefineTraining")
    return trainer_class
--- a/core/trainers/__init__.py
+++ b/core/trainers/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+trainer implement.
+
+          ↗ (single/cluster) CtrTrainer
+Trainer
+                               ↗ (for single  training) SingleTrainer/TDMSingleTrainer
+          ↘ TranspilerTrainer →  (for cluster training) ClusterTrainer/TDMClusterTrainer
+                               ↘  (for online learning training) OnlineLearningTrainer
+
+"""
--- a/fleet_rec/core/trainers/cluster_trainer.py
+++ b/fleet_rec/core/trainers/cluster_trainer.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Training use fluid with one node only.
 """
@@ -19,13 +18,15 @@ Training use fluid with one node only.
 from __future__ import print_function

 import os
+import time
+
 import paddle.fluid as fluid
 from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
 from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
 from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker

-from fleetrec.core.utils import envs
-from fleetrec.core.trainers.transpiler_trainer import TranspileTrainer
+from paddlerec.core.utils import envs
+from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer


 class ClusterTrainer(TranspileTrainer):
@@ -41,11 +42,14 @@ class ClusterTrainer(TranspileTrainer):
            self.regist_context_processor('uninit', self.instance)
            self.regist_context_processor('init_pass', self.init)
            self.regist_context_processor('startup_pass', self.startup)
-            if envs.get_platform() == "LINUX" and envs.get_global_env("dataset_class", None, "train.reader") != "DataLoader":
+
+            if envs.get_platform() == "LINUX" and envs.get_global_env(
+                    "dataset_class", None, "train.reader") != "DataLoader":
                self.regist_context_processor('train_pass', self.dataset_train)
            else:
-                self.regist_context_processor(
-                    'train_pass', self.dataloader_train)
+                self.regist_context_processor('train_pass',
+                                              self.dataloader_train)
+
            self.regist_context_processor('infer_pass', self.infer)
            self.regist_context_processor('terminal_pass', self.terminal)

@@ -73,14 +77,14 @@ class ClusterTrainer(TranspileTrainer):
    def init(self, context):
        self.model.train_net()
        optimizer = self.model.optimizer()
-        optimizer_name = envs.get_global_env(
-            "hyper_parameters.optimizer", None, "train.model")
+        optimizer_name = envs.get_global_env("hyper_parameters.optimizer",
+                                             None, "train.model")
        if optimizer_name not in ["", "sgd", "SGD", "Sgd"]:
            os.environ["FLAGS_communicator_is_sgd_optimizer"] = '0'

        strategy = self.build_strategy()
        optimizer = fleet.distributed_optimizer(optimizer, strategy)
-        optimizer.minimize(self.model.get_cost_op())
+        optimizer.minimize(self.model.get_avg_cost())

        if fleet.is_server():
            context['status'] = 'server_pass'
@@ -112,9 +116,9 @@ class ClusterTrainer(TranspileTrainer):

        program = fluid.compiler.CompiledProgram(
            fleet.main_program).with_data_parallel(
-            loss_name=self.model.get_cost_op().name,
-            build_strategy=self.strategy.get_build_strategy(),
-            exec_strategy=self.strategy.get_execute_strategy())
+                loss_name=self.model.get_avg_cost().name,
+                build_strategy=self.strategy.get_build_strategy(),
+                exec_strategy=self.strategy.get_execute_strategy())

        metrics_varnames = []
        metrics_format = []
@@ -133,9 +137,8 @@ class ClusterTrainer(TranspileTrainer):
            batch_id = 0
            try:
                while True:
-                    metrics_rets = self._exe.run(
-                        program=program,
-                        fetch_list=metrics_varnames)
+                    metrics_rets = self._exe.run(program=program,
+                                                 fetch_list=metrics_varnames)

                    metrics = [epoch, batch_id]
                    metrics.extend(metrics_rets)
@@ -154,14 +157,23 @@ class ClusterTrainer(TranspileTrainer):
        fleet.init_worker()

        dataset = self._get_dataset()
+        ins = self._get_dataset_ins()
+
        epochs = envs.get_global_env("train.epochs")

        for i in range(epochs):
-            self._exe.train_from_dataset(program=fluid.default_main_program(),
-                                         dataset=dataset,
-                                         fetch_list=self.fetch_vars,
-                                         fetch_info=self.fetch_alias,
-                                         print_period=self.fetch_period)
+            begin_time = time.time()
+            self._exe.train_from_dataset(
+                program=fluid.default_main_program(),
+                dataset=dataset,
+                fetch_list=self.fetch_vars,
+                fetch_info=self.fetch_alias,
+                print_period=self.fetch_period)
+            end_time = time.time()
+            times = end_time - begin_time
+            print("epoch {} using time {}, speed {:.2f} lines/s".format(
+                i, times, ins / times))
+
            self.save(i, "train", is_fleet=True)
        fleet.stop_worker()
        context['status'] = 'infer_pass'

--- a/fleet_rec/core/trainers/ctr_coding_trainer.py
+++ b/fleet_rec/core/trainers/ctr_coding_trainer.py
@@ -11,32 +11,28 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
-import numpy as np
-
-import paddle.fluid as fluid
-from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
-from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
+"""
+Training use fluid with one node only.
+"""

-from fleetrec.core.utils import envs
-from fleetrec.core.trainer import Trainer
+from __future__ import print_function

+import datetime
+import os
+import time

-class CtrPaddleTrainer(Trainer):
-    """R
-    """
+import paddle.fluid as fluid
+from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
+from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
+from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker

-    def __init__(self, config):
-        """R
-        """
-        Trainer.__init__(self, config)
+from paddlerec.core.utils import envs
+from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer

-        self.global_config = config
-        self._metrics = {}
-        self.processor_register()

+class OnlineLearningTrainer(TranspileTrainer):
    def processor_register(self):
-        role = MPISymetricRoleMaker()
+        role = PaddleCloudRoleMaker()
        fleet.init(role)

        if fleet.is_server():
@@ -46,48 +42,45 @@ class CtrPaddleTrainer(Trainer):
        else:
            self.regist_context_processor('uninit', self.instance)
            self.regist_context_processor('init_pass', self.init)
-            self.regist_context_processor('train_pass', self.train)
+            self.regist_context_processor('startup_pass', self.startup)
+
+            if envs.get_platform() == "LINUX" and envs.get_global_env(
+                    "dataset_class", None, "train.reader") != "DataLoader":
+                self.regist_context_processor('train_pass', self.dataset_train)
+            else:
+                self.regist_context_processor('train_pass',
+                                              self.dataloader_train)
+
+            self.regist_context_processor('infer_pass', self.infer)
            self.regist_context_processor('terminal_pass', self.terminal)

-    def _get_dataset(self):
-        namespace = "train.reader"
+    def build_strategy(self):
+        mode = envs.get_runtime_environ("train.trainer.strategy")
+        assert mode in ["async", "geo", "sync", "half_async"]

-        inputs = self.model.get_inputs()
-        threads = envs.get_global_env("train.threads", None)
-        batch_size = envs.get_global_env("batch_size", None, namespace)
-        reader_class = envs.get_global_env("class", None, namespace)
-        abs_dir = os.path.dirname(os.path.abspath(__file__))
-        reader = os.path.join(abs_dir, '../utils', 'dataset_instance.py')
-        pipe_cmd = "python {} {} {} {}".format(reader, reader_class, "TRAIN", self._config_yaml)
-        train_data_path = envs.get_global_env("train_data_path", None, namespace)
+        strategy = None

-        dataset = fluid.DatasetFactory().create_dataset()
-        dataset.set_use_var(inputs)
-        dataset.set_pipe_command(pipe_cmd)
-        dataset.set_batch_size(batch_size)
-        dataset.set_thread(threads)
-        file_list = [
-            os.path.join(train_data_path, x)
-            for x in os.listdir(train_data_path)
-        ]
+        if mode == "async":
+            strategy = StrategyFactory.create_async_strategy()
+        elif mode == "geo":
+            push_num = envs.get_global_env("train.strategy.mode.push_num", 100)
+            strategy = StrategyFactory.create_geo_strategy(push_num)
+        elif mode == "sync":
+            strategy = StrategyFactory.create_sync_strategy()
+        elif mode == "half_async":
+            strategy = StrategyFactory.create_half_async_strategy()

-        dataset.set_filelist(file_list)
-        return dataset
+        assert strategy is not None

-    def instance(self, context):
-        models = envs.get_global_env("train.model.models")
-        model_class = envs.lazy_instance_by_fliename(models, "Model")
-        self.model = model_class(None)
-        context['status'] = 'init_pass'
+        self.strategy = strategy
+        return strategy

    def init(self, context):
-        """R
-        """
        self.model.train_net()
        optimizer = self.model.optimizer()
-
-        optimizer = fleet.distributed_optimizer(optimizer, strategy={"use_cvm": False})
-        optimizer.minimize(self.model.get_cost_op())
+        strategy = self.build_strategy()
+        optimizer = fleet.distributed_optimizer(optimizer, strategy)
+        optimizer.minimize(self.model.get_avg_cost())

        if fleet.is_server():
            context['status'] = 'server_pass'
@@ -100,37 +93,97 @@ class CtrPaddleTrainer(Trainer):
            if metrics:
                self.fetch_vars = metrics.values()
                self.fetch_alias = metrics.keys()
-            context['status'] = 'train_pass'
+            context['status'] = 'startup_pass'

    def server(self, context):
+        fleet.init_server()
        fleet.run_server()
-        fleet.stop_worker()
        context['is_exit'] = True

-    def train(self, context):
-        self._exe.run(fluid.default_startup_program())
-        fleet.init_worker()
+    def startup(self, context):
+        self._exe.run(fleet.startup_program)
+        context['status'] = 'train_pass'

-        dataset = self._get_dataset()
+    def dataloader_train(self, context):
+        print("online learning can only support LINUX only")
+        context['status'] = 'terminal_pass'

-        shuf = np.array([fleet.worker_index()])
-        gs = shuf * 0
-        fleet._role_maker._node_type_comm.Allreduce(shuf, gs)
+    def _get_dataset(self, state="TRAIN", hour=None):
+        if state == "TRAIN":
+            inputs = self.model.get_inputs()
+            namespace = "train.reader"
+            train_data_path = envs.get_global_env("train_data_path", None,
+                                                  namespace)
+        else:
+            inputs = self.model.get_infer_inputs()
+            namespace = "evaluate.reader"
+            train_data_path = envs.get_global_env("test_data_path", None,
+                                                  namespace)

-        print("trainer id: {}, trainers: {}, gs: {}".format(fleet.worker_index(), fleet.worker_num(), gs))
+        threads = int(envs.get_runtime_environ("train.trainer.threads"))
+        batch_size = envs.get_global_env("batch_size", None, namespace)
+        reader_class = envs.get_global_env("class", None, namespace)
+        abs_dir = os.path.dirname(os.path.abspath(__file__))
+        reader = os.path.join(abs_dir, '../utils', 'dataset_instance.py')
+        pipe_cmd = "python {} {} {} {}".format(reader, reader_class, state,
+                                               self._config_yaml)

-        epochs = envs.get_global_env("train.epochs")
+        if train_data_path.startswith("paddlerec::"):
+            package_base = envs.get_runtime_environ("PACKAGE_BASE")
+            assert package_base is not None
+            train_data_path = os.path.join(package_base,
+                                           train_data_path.split("::")[1])

-        for i in range(epochs):
-            self._exe.train_from_dataset(program=fluid.default_main_program(),
-                                         dataset=dataset,
-                                         fetch_list=self.fetch_vars,
-                                         fetch_info=self.fetch_alias,
-                                         print_period=self.fetch_period)
+        dataset = fluid.DatasetFactory().create_dataset()
+        dataset.set_use_var(inputs)
+        dataset.set_pipe_command(pipe_cmd)
+        dataset.set_batch_size(batch_size)
+        dataset.set_thread(threads)
+
+        if hour is not None:
+            train_data_path = os.path.join(train_data_path, hour)
+
+        file_list = [
+            os.path.join(train_data_path, x)
+            for x in os.listdir(train_data_path)
+        ]
+
+        self.files = file_list
+        dataset.set_filelist(self.files)
+        return dataset
+
+    def dataset_train(self, context):
+        fleet.init_worker()
+
+        days = envs.get_global_env("train.days")
+        begin_day = datetime.datetime.strptime("begin_day_d", '%Y%m%d')
+
+        for day in range(days):
+            for hour in range(24):
+                day = begin_day + datetime.timedelta(days=day, hours=hour)
+                day_s = day.strftime('%Y%m%d/%H')
+                i = day.strftime('%Y%m%d_%H')
+
+                dataset = self._get_dataset(hour=day_s)
+                ins = self._get_dataset_ins()
+
+                begin_time = time.time()
+                self._exe.train_from_dataset(
+                    program=fluid.default_main_program(),
+                    dataset=dataset,
+                    fetch_list=self.fetch_vars,
+                    fetch_info=self.fetch_alias,
+                    print_period=self.fetch_period)
+                end_time = time.time()
+                times = end_time - begin_time
+                print("epoch {} using time {}, speed {:.2f} lines/s".format(
+                    i, times, ins / times))
+                self.save(i, "train", is_fleet=True)

-        context['status'] = 'terminal_pass'
        fleet.stop_worker()
+        context['status'] = 'infer_pass'

    def terminal(self, context):
-        print("terminal ended.")
+        for model in self.increment_models:
+            print("epoch :{}, dir: {}".format(model[0], model[1]))
        context['is_exit'] = True
--- a/core/trainers/single_infer.py
+++ b/core/trainers/single_infer.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Training use fluid with one node only.
+"""
+
+from __future__ import print_function
+
+import time
+import logging
+import os
+import paddle.fluid as fluid
+
+from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
+from paddlerec.core.utils import envs
+from paddlerec.core.reader import SlotReader
+from paddlerec.core.utils import dataloader_instance
+
+logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger("fluid")
+logger.setLevel(logging.INFO)
+
+
+class SingleInfer(TranspileTrainer):
+    def __init__(self, config=None):
+        super(TranspileTrainer, self).__init__(config)
+        self._env = self._config
+        device = envs.get_global_env("device")
+        if device == 'gpu':
+            self._place = fluid.CUDAPlace(0)
+        elif device == 'cpu':
+            self._place = fluid.CPUPlace()
+        self._exe = fluid.Executor(self._place)
+        self.processor_register()
+        self._model = {}
+        self._dataset = {}
+        envs.set_global_envs(self._config)
+        envs.update_workspace()
+        self._runner_name = envs.get_global_env("mode")
+        device = envs.get_global_env("runner." + self._runner_name + ".device")
+        if device == 'gpu':
+            self._place = fluid.CUDAPlace(0)
+        elif device == 'cpu':
+            self._place = fluid.CPUPlace()
+        self._exe = fluid.Executor(self._place)
+
+    def processor_register(self):
+        self.regist_context_processor('uninit', self.instance)
+        self.regist_context_processor('init_pass', self.init)
+        self.regist_context_processor('startup_pass', self.startup)
+        self.regist_context_processor('train_pass', self.executor_train)
+        self.regist_context_processor('terminal_pass', self.terminal)
+
+    def instance(self, context):
+        context['status'] = 'init_pass'
+
+    def _get_dataset(self, dataset_name):
+        name = "dataset." + dataset_name + "."
+        thread_num = envs.get_global_env(name + "thread_num")
+        batch_size = envs.get_global_env(name + "batch_size")
+        reader_class = envs.get_global_env(name + "data_converter")
+        abs_dir = os.path.dirname(os.path.abspath(__file__))
+        reader = os.path.join(abs_dir, '../utils', 'dataset_instance.py')
+        sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip()
+        dense_slots = envs.get_global_env(name + "dense_slots", "").strip()
+        if sparse_slots == "" and dense_slots == "":
+            pipe_cmd = "python {} {} {} {}".format(reader, reader_class,
+                                                   "TRAIN", self._config_yaml)
+        else:
+            if sparse_slots == "":
+                sparse_slots = "?"
+            if dense_slots == "":
+                dense_slots = "?"
+            padding = envs.get_global_env(name + "padding", 0)
+            pipe_cmd = "python {} {} {} {} {} {} {} {}".format(
+                reader, "slot", "slot", self._config_yaml, "fake", \
+                sparse_slots.replace(" ", "?"), dense_slots.replace(" ", "?"), str(padding))
+
+        dataset = fluid.DatasetFactory().create_dataset()
+        dataset.set_batch_size(envs.get_global_env(name + "batch_size"))
+        dataset.set_pipe_command(pipe_cmd)
+        train_data_path = envs.get_global_env(name + "data_path")
+        file_list = [
+            os.path.join(train_data_path, x)
+            for x in os.listdir(train_data_path)
+        ]
+        dataset.set_filelist(file_list)
+        for model_dict in self._env["phase"]:
+            if model_dict["dataset_name"] == dataset_name:
+                model = self._model[model_dict["name"]][3]
+                inputs = model._infer_data_var
+                dataset.set_use_var(inputs)
+                break
+        return dataset
+
+    def _get_dataloader(self, dataset_name, dataloader):
+        name = "dataset." + dataset_name + "."
+        thread_num = envs.get_global_env(name + "thread_num")
+        batch_size = envs.get_global_env(name + "batch_size")
+        reader_class = envs.get_global_env(name + "data_converter")
+        abs_dir = os.path.dirname(os.path.abspath(__file__))
+        sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip()
+        dense_slots = envs.get_global_env(name + "dense_slots", "").strip()
+        if sparse_slots == "" and dense_slots == "":
+            reader = dataloader_instance.dataloader_by_name(
+                reader_class, dataset_name, self._config_yaml)
+            reader_class = envs.lazy_instance_by_fliename(reader_class,
+                                                          "TrainReader")
+            reader_ins = reader_class(self._config_yaml)
+        else:
+            reader = dataloader_instance.slotdataloader_by_name(
+                "", dataset_name, self._config_yaml)
+            reader_ins = SlotReader(self._config_yaml)
+        if hasattr(reader_ins, 'generate_batch_from_trainfiles'):
+            dataloader.set_sample_list_generator(reader)
+        else:
+            dataloader.set_sample_generator(reader, batch_size)
+        return dataloader
+
+    def _create_dataset(self, dataset_name):
+        name = "dataset." + dataset_name + "."
+        sparse_slots = envs.get_global_env(name + "sparse_slots")
+        dense_slots = envs.get_global_env(name + "dense_slots")
+        thread_num = envs.get_global_env(name + "thread_num")
+        batch_size = envs.get_global_env(name + "batch_size")
+        type_name = envs.get_global_env(name + "type")
+        if envs.get_platform() != "LINUX":
+            print("platform ", envs.get_platform(),
+                  " change reader to DataLoader")
+            type_name = "DataLoader"
+        padding = 0
+
+        if type_name == "DataLoader":
+            return None
+        else:
+            return self._get_dataset(dataset_name)
+
+    def init(self, context):
+        for model_dict in self._env["phase"]:
+            self._model[model_dict["name"]] = [None] * 5
+            train_program = fluid.Program()
+            startup_program = fluid.Program()
+            scope = fluid.Scope()
+            dataset_name = model_dict["dataset_name"]
+            opt_name = envs.get_global_env("hyper_parameters.optimizer.class")
+            opt_lr = envs.get_global_env(
+                "hyper_parameters.optimizer.learning_rate")
+            opt_strategy = envs.get_global_env(
+                "hyper_parameters.optimizer.strategy")
+            with fluid.program_guard(train_program, startup_program):
+                with fluid.unique_name.guard():
+                    with fluid.scope_guard(scope):
+                        model_path = model_dict["model"].replace(
+                            "{workspace}",
+                            envs.path_adapter(self._env["workspace"]))
+                        model = envs.lazy_instance_by_fliename(
+                            model_path, "Model")(self._env)
+                        model._infer_data_var = model.input_data(
+                            dataset_name=model_dict["dataset_name"])
+                        if envs.get_global_env("dataset." + dataset_name +
+                                               ".type") == "DataLoader":
+                            model._init_dataloader(is_infer=True)
+                            self._get_dataloader(dataset_name,
+                                                 model._data_loader)
+                        model.net(model._infer_data_var, True)
+            self._model[model_dict["name"]][0] = train_program
+            self._model[model_dict["name"]][1] = startup_program
+            self._model[model_dict["name"]][2] = scope
+            self._model[model_dict["name"]][3] = model
+            self._model[model_dict["name"]][4] = train_program.clone()
+
+        for dataset in self._env["dataset"]:
+            if dataset["type"] != "DataLoader":
+                self._dataset[dataset["name"]] = self._create_dataset(dataset[
+                    "name"])
+
+        context['status'] = 'startup_pass'
+
+    def startup(self, context):
+        for model_dict in self._env["phase"]:
+            with fluid.scope_guard(self._model[model_dict["name"]][2]):
+                self._exe.run(self._model[model_dict["name"]][1])
+        context['status'] = 'train_pass'
+
+    def executor_train(self, context):
+        epochs = int(
+            envs.get_global_env("runner." + self._runner_name + ".epochs"))
+        for j in range(epochs):
+            for model_dict in self._env["phase"]:
+                if j == 0:
+                    with fluid.scope_guard(self._model[model_dict["name"]][2]):
+                        train_prog = self._model[model_dict["name"]][0]
+                        startup_prog = self._model[model_dict["name"]][1]
+                        with fluid.program_guard(train_prog, startup_prog):
+                            self.load()
+                reader_name = model_dict["dataset_name"]
+                name = "dataset." + reader_name + "."
+                begin_time = time.time()
+                if envs.get_global_env(name + "type") == "DataLoader":
+                    self._executor_dataloader_train(model_dict)
+                else:
+                    self._executor_dataset_train(model_dict)
+                with fluid.scope_guard(self._model[model_dict["name"]][2]):
+                    train_prog = self._model[model_dict["name"]][4]
+                    startup_prog = self._model[model_dict["name"]][1]
+                    with fluid.program_guard(train_prog, startup_prog):
+                        self.save(j)
+                end_time = time.time()
+                seconds = end_time - begin_time
+            print("epoch {} done, time elasped: {}".format(j, seconds))
+        context['status'] = "terminal_pass"
+
+    def _executor_dataset_train(self, model_dict):
+        reader_name = model_dict["dataset_name"]
+        model_name = model_dict["name"]
+        model_class = self._model[model_name][3]
+        fetch_vars = []
+        fetch_alias = []
+        fetch_period = int(
+            envs.get_global_env("runner." + self._runner_name +
+                                ".print_interval", 20))
+        metrics = model_class.get_infer_results()
+        if metrics:
+            fetch_vars = metrics.values()
+            fetch_alias = metrics.keys()
+        scope = self._model[model_name][2]
+        program = self._model[model_name][0]
+        reader = self._dataset[reader_name]
+        with fluid.scope_guard(scope):
+            self._exe.infer_from_dataset(
+                program=program,
+                dataset=reader,
+                fetch_list=fetch_vars,
+                fetch_info=fetch_alias,
+                print_period=fetch_period)
+
+    def _executor_dataloader_train(self, model_dict):
+        reader_name = model_dict["dataset_name"]
+        model_name = model_dict["name"]
+        model_class = self._model[model_name][3]
+        program = self._model[model_name][0].clone()
+        fetch_vars = []
+        fetch_alias = []
+        metrics = model_class.get_infer_results()
+        if metrics:
+            fetch_vars = metrics.values()
+            fetch_alias = metrics.keys()
+        metrics_varnames = []
+        metrics_format = []
+        fetch_period = int(
+            envs.get_global_env("runner." + self._runner_name +
+                                ".print_interval", 20))
+        metrics_format.append("{}: {{}}".format("batch"))
+        for name, var in metrics.items():
+            metrics_varnames.append(var.name)
+            metrics_format.append("{}: {{}}".format(name))
+        metrics_format = ", ".join(metrics_format)
+
+        reader = self._model[model_name][3]._data_loader
+        reader.start()
+        batch_id = 0
+        scope = self._model[model_name][2]
+        with fluid.scope_guard(scope):
+            try:
+                while True:
+                    metrics_rets = self._exe.run(program=program,
+                                                 fetch_list=metrics_varnames)
+                    metrics = [batch_id]
+                    metrics.extend(metrics_rets)
+
+                    if batch_id % fetch_period == 0 and batch_id != 0:
+                        print(metrics_format.format(*metrics))
+                    batch_id += 1
+            except fluid.core.EOFException:
+                reader.reset()
+
+    def terminal(self, context):
+        context['is_exit'] = True
+
+    def load(self, is_fleet=False):
+        name = "runner." + self._runner_name + "."
+        dirname = envs.get_global_env(name + "init_model_path", None)
+        if dirname is None or dirname == "":
+            return
+        print("single_infer going to load ", dirname)
+        if is_fleet:
+            fleet.load_persistables(self._exe, dirname)
+        else:
+            fluid.io.load_persistables(self._exe, dirname)
+
+    def save(self, epoch_id, is_fleet=False):
+        def need_save(epoch_id, epoch_interval, is_last=False):
+            if is_last:
+                return True
+            if epoch_id == -1:
+                return False
+
+            return epoch_id % epoch_interval == 0
+
+        def save_inference_model():
+            name = "runner." + self._runner_name + "."
+            save_interval = int(
+                envs.get_global_env(name + "save_inference_interval", -1))
+            if not need_save(epoch_id, save_interval, False):
+                return
+            feed_varnames = envs.get_global_env(
+                name + "save_inference_feed_varnames", None)
+            fetch_varnames = envs.get_global_env(
+                name + "save_inference_fetch_varnames", None)
+            if feed_varnames is None or fetch_varnames is None or feed_varnames == "":
+                return
+            fetch_vars = [
+                fluid.default_main_program().global_block().vars[varname]
+                for varname in fetch_varnames
+            ]
+            dirname = envs.get_global_env(name + "save_inference_path", None)
+
+            assert dirname is not None
+            dirname = os.path.join(dirname, str(epoch_id))
+
+            if is_fleet:
+                fleet.save_inference_model(self._exe, dirname, feed_varnames,
+                                           fetch_vars)
+            else:
+                fluid.io.save_inference_model(dirname, feed_varnames,
+                                              fetch_vars, self._exe)
+
+        def save_persistables():
+            name = "runner." + self._runner_name + "."
+            save_interval = int(
+                envs.get_global_env(name + "save_checkpoint_interval", -1))
+            if not need_save(epoch_id, save_interval, False):
+                return
+            dirname = envs.get_global_env(name + "save_checkpoint_path", None)
+            if dirname is None or dirname == "":
+                return
+            dirname = os.path.join(dirname, str(epoch_id))
+            if is_fleet:
+                fleet.save_persistables(self._exe, dirname)
+            else:
+                fluid.io.save_persistables(self._exe, dirname)
+
+        save_persistables()
+        save_inference_model()
--- a/core/trainers/single_trainer.py
+++ b/core/trainers/single_trainer.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Training use fluid with one node only.
+"""
+
+from __future__ import print_function
+
+import time
+import logging
+import os
+import paddle.fluid as fluid
+
+from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
+from paddlerec.core.utils import envs
+from paddlerec.core.reader import SlotReader
+from paddlerec.core.utils import dataloader_instance
+
+logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger("fluid")
+logger.setLevel(logging.INFO)
+
+
+class SingleTrainer(TranspileTrainer):
+    def __init__(self, config=None):
+        super(TranspileTrainer, self).__init__(config)
+        self._env = self._config
+        self.processor_register()
+        self._model = {}
+        self._dataset = {}
+        envs.set_global_envs(self._config)
+        envs.update_workspace()
+        self._runner_name = envs.get_global_env("mode")
+        device = envs.get_global_env("runner." + self._runner_name + ".device")
+        if device == 'gpu':
+            self._place = fluid.CUDAPlace(0)
+        elif device == 'cpu':
+            self._place = fluid.CPUPlace()
+        self._exe = fluid.Executor(self._place)
+
+    def processor_register(self):
+        self.regist_context_processor('uninit', self.instance)
+        self.regist_context_processor('init_pass', self.init)
+        self.regist_context_processor('startup_pass', self.startup)
+        self.regist_context_processor('train_pass', self.executor_train)
+        self.regist_context_processor('terminal_pass', self.terminal)
+
+    def instance(self, context):
+        context['status'] = 'init_pass'
+
+    def _get_dataset(self, dataset_name):
+        name = "dataset." + dataset_name + "."
+        thread_num = envs.get_global_env(name + "thread_num")
+        batch_size = envs.get_global_env(name + "batch_size")
+        reader_class = envs.get_global_env(name + "data_converter")
+        abs_dir = os.path.dirname(os.path.abspath(__file__))
+        reader = os.path.join(abs_dir, '../utils', 'dataset_instance.py')
+        sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip()
+        dense_slots = envs.get_global_env(name + "dense_slots", "").strip()
+        if sparse_slots == "" and dense_slots == "":
+            pipe_cmd = "python {} {} {} {}".format(reader, reader_class,
+                                                   "TRAIN", self._config_yaml)
+        else:
+            if sparse_slots == "":
+                sparse_slots = "?"
+            if dense_slots == "":
+                dense_slots = "?"
+            padding = envs.get_global_env(name + "padding", 0)
+            pipe_cmd = "python {} {} {} {} {} {} {} {}".format(
+                reader, "slot", "slot", self._config_yaml, "fake", \
+                sparse_slots.replace(" ", "?"), dense_slots.replace(" ", "?"), str(padding))
+
+        dataset = fluid.DatasetFactory().create_dataset()
+        dataset.set_batch_size(envs.get_global_env(name + "batch_size"))
+        dataset.set_pipe_command(pipe_cmd)
+        train_data_path = envs.get_global_env(name + "data_path")
+        file_list = [
+            os.path.join(train_data_path, x)
+            for x in os.listdir(train_data_path)
+        ]
+        dataset.set_filelist(file_list)
+        for model_dict in self._env["phase"]:
+            if model_dict["dataset_name"] == dataset_name:
+                model = self._model[model_dict["name"]][3]
+                inputs = model._data_var
+                dataset.set_use_var(inputs)
+                break
+        return dataset
+
+    def _get_dataloader(self, dataset_name, dataloader):
+        name = "dataset." + dataset_name + "."
+        sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip()
+        dense_slots = envs.get_global_env(name + "dense_slots", "").strip()
+        thread_num = envs.get_global_env(name + "thread_num")
+        batch_size = envs.get_global_env(name + "batch_size")
+        reader_class = envs.get_global_env(name + "data_converter")
+        abs_dir = os.path.dirname(os.path.abspath(__file__))
+        if sparse_slots == "" and dense_slots == "":
+            reader = dataloader_instance.dataloader_by_name(
+                reader_class, dataset_name, self._config_yaml)
+            reader_class = envs.lazy_instance_by_fliename(reader_class,
+                                                          "TrainReader")
+            reader_ins = reader_class(self._config_yaml)
+        else:
+            reader = dataloader_instance.slotdataloader_by_name(
+                "", dataset_name, self._config_yaml)
+            reader_ins = SlotReader(self._config_yaml)
+        if hasattr(reader_ins, 'generate_batch_from_trainfiles'):
+            dataloader.set_sample_list_generator(reader)
+        else:
+            dataloader.set_sample_generator(reader, batch_size)
+        return dataloader
+
+    def _create_dataset(self, dataset_name):
+        name = "dataset." + dataset_name + "."
+        sparse_slots = envs.get_global_env(name + "sparse_slots", "").strip()
+        dense_slots = envs.get_global_env(name + "dense_slots", "").strip()
+        thread_num = envs.get_global_env(name + "thread_num")
+        batch_size = envs.get_global_env(name + "batch_size")
+        type_name = envs.get_global_env(name + "type")
+        if envs.get_platform() != "LINUX":
+            print("platform ", envs.get_platform(),
+                  " change reader to DataLoader")
+            type_name = "DataLoader"
+        padding = 0
+
+        if type_name == "DataLoader":
+            return None
+        else:
+            return self._get_dataset(dataset_name)
+
+    def init(self, context):
+        for model_dict in self._env["phase"]:
+            self._model[model_dict["name"]] = [None] * 5
+            train_program = fluid.Program()
+            startup_program = fluid.Program()
+            scope = fluid.Scope()
+            dataset_name = model_dict["dataset_name"]
+            opt_name = envs.get_global_env("hyper_parameters.optimizer.class")
+            opt_lr = envs.get_global_env(
+                "hyper_parameters.optimizer.learning_rate")
+            opt_strategy = envs.get_global_env(
+                "hyper_parameters.optimizer.strategy")
+            with fluid.program_guard(train_program, startup_program):
+                with fluid.unique_name.guard():
+                    with fluid.scope_guard(scope):
+                        model_path = model_dict["model"].replace(
+                            "{workspace}",
+                            envs.path_adapter(self._env["workspace"]))
+                        model = envs.lazy_instance_by_fliename(
+                            model_path, "Model")(self._env)
+                        model._data_var = model.input_data(
+                            dataset_name=model_dict["dataset_name"])
+                        if envs.get_global_env("dataset." + dataset_name +
+                                               ".type") == "DataLoader":
+                            model._init_dataloader(is_infer=False)
+                            self._get_dataloader(dataset_name,
+                                                 model._data_loader)
+                        model.net(model._data_var, False)
+                        optimizer = model._build_optimizer(opt_name, opt_lr,
+                                                           opt_strategy)
+                        optimizer.minimize(model._cost)
+            self._model[model_dict["name"]][0] = train_program
+            self._model[model_dict["name"]][1] = startup_program
+            self._model[model_dict["name"]][2] = scope
+            self._model[model_dict["name"]][3] = model
+            self._model[model_dict["name"]][4] = train_program.clone()
+
+        for dataset in self._env["dataset"]:
+            if dataset["type"] != "DataLoader":
+                self._dataset[dataset["name"]] = self._create_dataset(dataset[
+                    "name"])
+
+        context['status'] = 'startup_pass'
+
+    def startup(self, context):
+        for model_dict in self._env["phase"]:
+            with fluid.scope_guard(self._model[model_dict["name"]][2]):
+                self._exe.run(self._model[model_dict["name"]][1])
+        context['status'] = 'train_pass'
+
+    def executor_train(self, context):
+        epochs = int(
+            envs.get_global_env("runner." + self._runner_name + ".epochs"))
+        for j in range(epochs):
+            for model_dict in self._env["phase"]:
+                if j == 0:
+                    with fluid.scope_guard(self._model[model_dict["name"]][2]):
+                        train_prog = self._model[model_dict["name"]][0]
+                        startup_prog = self._model[model_dict["name"]][1]
+                        with fluid.program_guard(train_prog, startup_prog):
+                            self.load()
+                reader_name = model_dict["dataset_name"]
+                name = "dataset." + reader_name + "."
+                begin_time = time.time()
+                if envs.get_global_env(name + "type") == "DataLoader":
+                    self._executor_dataloader_train(model_dict)
+                else:
+                    self._executor_dataset_train(model_dict)
+                with fluid.scope_guard(self._model[model_dict["name"]][2]):
+                    train_prog = self._model[model_dict["name"]][4]
+                    startup_prog = self._model[model_dict["name"]][1]
+                    with fluid.program_guard(train_prog, startup_prog):
+                        self.save(j)
+                end_time = time.time()
+                seconds = end_time - begin_time
+            print("epoch {} done, time elasped: {}".format(j, seconds))
+        context['status'] = "terminal_pass"
+
+    def _executor_dataset_train(self, model_dict):
+        reader_name = model_dict["dataset_name"]
+        model_name = model_dict["name"]
+        model_class = self._model[model_name][3]
+        fetch_vars = []
+        fetch_alias = []
+        fetch_period = int(
+            envs.get_global_env("runner." + self._runner_name +
+                                ".print_interval", 20))
+        metrics = model_class.get_metrics()
+        if metrics:
+            fetch_vars = metrics.values()
+            fetch_alias = metrics.keys()
+        scope = self._model[model_name][2]
+        program = self._model[model_name][0]
+        reader = self._dataset[reader_name]
+        with fluid.scope_guard(scope):
+            self._exe.train_from_dataset(
+                program=program,
+                dataset=reader,
+                fetch_list=fetch_vars,
+                fetch_info=fetch_alias,
+                print_period=fetch_period)
+
+    def _executor_dataloader_train(self, model_dict):
+        reader_name = model_dict["dataset_name"]
+        model_name = model_dict["name"]
+        model_class = self._model[model_name][3]
+        program = self._model[model_name][0].clone()
+        program = fluid.compiler.CompiledProgram(program).with_data_parallel(
+            loss_name=model_class.get_avg_cost().name)
+        fetch_vars = []
+        fetch_alias = []
+        fetch_period = int(
+            envs.get_global_env("runner." + self._runner_name +
+                                ".print_interval", 20))
+        metrics = model_class.get_metrics()
+        if metrics:
+            fetch_vars = metrics.values()
+            fetch_alias = metrics.keys()
+        metrics_varnames = []
+        metrics_format = []
+        metrics_format.append("{}: {{}}".format("batch"))
+        for name, var in metrics.items():
+            metrics_varnames.append(var.name)
+            metrics_format.append("{}: {{}}".format(name))
+        metrics_format = ", ".join(metrics_format)
+
+        reader = self._model[model_name][3]._data_loader
+        reader.start()
+        batch_id = 0
+        scope = self._model[model_name][2]
+        with fluid.scope_guard(scope):
+            try:
+                while True:
+                    metrics_rets = self._exe.run(program=program,
+                                                 fetch_list=metrics_varnames)
+                    metrics = [batch_id]
+                    metrics.extend(metrics_rets)
+
+                    if batch_id % fetch_period == 0 and batch_id != 0:
+                        print(metrics_format.format(*metrics))
+                    batch_id += 1
+            except fluid.core.EOFException:
+                reader.reset()
+
+    def terminal(self, context):
+        context['is_exit'] = True
+
+    def load(self, is_fleet=False):
+        dirname = envs.get_global_env(
+            "runner." + self._runner_name + ".init_model_path", None)
+        if dirname is None or dirname == "":
+            return
+        print("going to load ", dirname)
+        if is_fleet:
+            fleet.load_persistables(self._exe, dirname)
+        else:
+            fluid.io.load_persistables(self._exe, dirname)
+
+    def save(self, epoch_id, is_fleet=False):
+        def need_save(epoch_id, epoch_interval, is_last=False):
+            if is_last:
+                return True
+            if epoch_id == -1:
+                return False
+
+            return epoch_id % epoch_interval == 0
+
+        def save_inference_model():
+            name = "runner." + self._runner_name + "."
+            save_interval = int(
+                envs.get_global_env(name + "save_inference_interval", -1))
+            if not need_save(epoch_id, save_interval, False):
+                return
+            feed_varnames = envs.get_global_env(
+                name + "save_inference_feed_varnames", [])
+            fetch_varnames = envs.get_global_env(
+                name + "save_inference_fetch_varnames", [])
+            if feed_varnames is None or fetch_varnames is None or feed_varnames == "" or fetch_varnames == "" or \
+               len(feed_varnames) == 0 or len(fetch_varnames) == 0:
+                return
+            fetch_vars = [
+                fluid.default_main_program().global_block().vars[varname]
+                for varname in fetch_varnames
+            ]
+            dirname = envs.get_global_env(name + "save_inference_path", None)
+
+            assert dirname is not None
+            dirname = os.path.join(dirname, str(epoch_id))
+
+            if is_fleet:
+                fleet.save_inference_model(self._exe, dirname, feed_varnames,
+                                           fetch_vars)
+            else:
+                fluid.io.save_inference_model(dirname, feed_varnames,
+                                              fetch_vars, self._exe)
+
+        def save_persistables():
+            name = "runner." + self._runner_name + "."
+            save_interval = int(
+                envs.get_global_env(name + "save_checkpoint_interval", -1))
+            if not need_save(epoch_id, save_interval, False):
+                return
+            dirname = envs.get_global_env(name + "save_checkpoint_path", None)
+            if dirname is None or dirname == "":
+                return
+            dirname = os.path.join(dirname, str(epoch_id))
+            if is_fleet:
+                fleet.save_persistables(self._exe, dirname)
+            else:
+                fluid.io.save_persistables(self._exe, dirname)
+
+        save_persistables()
+        save_inference_model()
--- a/fleet_rec/core/trainers/tdm_cluster_trainer.py
+++ b/fleet_rec/core/trainers/tdm_cluster_trainer.py
-# -*- coding=utf-8 -*-
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,22 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Training use fluid with one node only.
 """

 from __future__ import print_function
+
 import logging
+
 import numpy as np
 import paddle.fluid as fluid
 from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
-from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
-from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker
-
-from fleetrec.core.utils import envs
-from fleetrec.core.trainers.cluster_trainer import ClusterTrainer

+from paddlerec.core.utils import envs
+from paddlerec.core.trainers.cluster_trainer import ClusterTrainer

 logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger("fluid")
@@ -38,8 +35,8 @@ special_param = ["TDM_Tree_Travel", "TDM_Tree_Layer", "TDM_Tree_Info"]
 class TDMClusterTrainer(ClusterTrainer):
    def server(self, context):
        namespace = "train.startup"
-        init_model_path = envs.get_global_env(
-            "cluster.init_model_path", "", namespace)
+        init_model_path = envs.get_global_env("cluster.init_model_path", "",
+                                              namespace)
        assert init_model_path != "", "Cluster train must has init_model for TDM"
        fleet.init_server(init_model_path)
        logger.info("TDM: load model from {}".format(init_model_path))
@@ -50,26 +47,28 @@ class TDMClusterTrainer(ClusterTrainer):
        self._exe.run(fleet.startup_program)

        namespace = "train.startup"
-        load_tree = envs.get_global_env(
-            "tree.load_tree", True, namespace)
-        self.tree_layer_path = envs.get_global_env(
-            "tree.tree_layer_path", "", namespace)
-        self.tree_travel_path = envs.get_global_env(
-            "tree.tree_travel_path", "", namespace)
-        self.tree_info_path = envs.get_global_env(
-            "tree.tree_info_path", "", namespace)
-
-        save_init_model = envs.get_global_env(
-            "cluster.save_init_model", False, namespace)
-        init_model_path = envs.get_global_env(
-            "cluster.init_model_path", "", namespace)
+        load_tree = envs.get_global_env("tree.load_tree", True, namespace)
+
+        self.tree_layer_path = envs.get_global_env("tree.tree_layer_path", "",
+                                                   namespace)
+
+        self.tree_travel_path = envs.get_global_env("tree.tree_travel_path",
+                                                    "", namespace)
+
+        self.tree_info_path = envs.get_global_env("tree.tree_info_path", "",
+                                                  namespace)
+
+        save_init_model = envs.get_global_env("cluster.save_init_model", False,
+                                              namespace)
+        init_model_path = envs.get_global_env("cluster.init_model_path", "",
+                                              namespace)

        if load_tree:
-            # 将明文树结构及数据，set到组网中的Variale中
-            # 不使用NumpyInitialize方法是考虑到树结构相关数据size过大，有性能风险
+            # covert tree to tensor, set it into Fluid's variable.
            for param_name in special_param:
-                param_t = fluid.global_scope().find_var(param_name).get_tensor()
-                param_array = self.tdm_prepare(param_name)
+                param_t = fluid.global_scope().find_var(param_name).get_tensor(
+                )
+                param_array = self._tdm_prepare(param_name)
                param_t.set(param_array.astype('int32'), self._place)

        if save_init_model:
@@ -80,27 +79,27 @@ class TDMClusterTrainer(ClusterTrainer):

        context['status'] = 'train_pass'

-    def tdm_prepare(self, param_name):
+    def _tdm_prepare(self, param_name):
        if param_name == "TDM_Tree_Travel":
-            travel_array = self.tdm_travel_prepare()
+            travel_array = self._tdm_travel_prepare()
            return travel_array
        elif param_name == "TDM_Tree_Layer":
-            layer_array, _ = self.tdm_layer_prepare()
+            layer_array, _ = self._tdm_layer_prepare()
            return layer_array
        elif param_name == "TDM_Tree_Info":
-            info_array = self.tdm_info_prepare()
+            info_array = self._tdm_info_prepare()
            return info_array
        else:
            raise " {} is not a special tdm param name".format(param_name)

-    def tdm_travel_prepare(self):
+    def _tdm_travel_prepare(self):
        """load tdm tree param from npy/list file"""
        travel_array = np.load(self.tree_travel_path)
-        logger.info("TDM Tree leaf node nums: {}".format(
-            travel_array.shape[0]))
+        logger.info("TDM Tree leaf node nums: {}".format(travel_array.shape[
+            0]))
        return travel_array

-    def tdm_layer_prepare(self):
+    def _tdm_layer_prepare(self):
        """load tdm tree param from npy/list file"""
        layer_list = []
        layer_list_flat = []
@@ -120,7 +119,7 @@ class TDMClusterTrainer(ClusterTrainer):
            [len(i) for i in layer_list]))
        return layer_array, layer_list

-    def tdm_info_prepare(self):
+    def _tdm_info_prepare(self):
        """load tdm tree param from list file"""
        info_array = np.load(self.tree_info_path)
        return info_array
--- a/fleet_rec/core/trainers/tdm_single_trainer.py
+++ b/fleet_rec/core/trainers/tdm_single_trainer.py
-# -*- coding=utf-8 -*-
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -12,50 +11,53 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Training use fluid with one node only.
 """

 from __future__ import print_function
 import logging
-import paddle.fluid as fluid

-from fleetrec.core.trainers.transpiler_trainer import TranspileTrainer
-from fleetrec.core.trainers.single_trainer import SingleTrainer
-from fleetrec.core.utils import envs
 import numpy as np
+import paddle.fluid as fluid
+from paddlerec.core.trainers.single_trainer import SingleTrainer
+from paddlerec.core.utils import envs

 logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
 logger = logging.getLogger("fluid")
 logger.setLevel(logging.INFO)
-special_param = ["TDM_Tree_Travel", "TDM_Tree_Layer",
-                 "TDM_Tree_Info", "TDM_Tree_Emb"]
+special_param = [
+    "TDM_Tree_Travel", "TDM_Tree_Layer", "TDM_Tree_Info", "TDM_Tree_Emb"
+]


 class TDMSingleTrainer(SingleTrainer):
    def startup(self, context):
        namespace = "train.startup"
-        load_persistables = envs.get_global_env(
-            "single.load_persistables", False, namespace)
+        load_persistables = envs.get_global_env("single.load_persistables",
+                                                False, namespace)
+
        persistables_model_path = envs.get_global_env(
            "single.persistables_model_path", "", namespace)

-        load_tree = envs.get_global_env(
-            "tree.load_tree", False, namespace)
-        self.tree_layer_path = envs.get_global_env(
-            "tree.tree_layer_path", "", namespace)
-        self.tree_travel_path = envs.get_global_env(
-            "tree.tree_travel_path", "", namespace)
-        self.tree_info_path = envs.get_global_env(
-            "tree.tree_info_path", "", namespace)
-        self.tree_emb_path = envs.get_global_env(
-            "tree.tree_emb_path", "", namespace)
-
-        save_init_model = envs.get_global_env(
-            "single.save_init_model", False, namespace)
-        init_model_path = envs.get_global_env(
-            "single.init_model_path", "", namespace)
+        load_tree = envs.get_global_env("tree.load_tree", False, namespace)
+
+        self.tree_layer_path = envs.get_global_env("tree.tree_layer_path", "",
+                                                   namespace)
+
+        self.tree_travel_path = envs.get_global_env("tree.tree_travel_path",
+                                                    "", namespace)
+
+        self.tree_info_path = envs.get_global_env("tree.tree_info_path", "",
+                                                  namespace)
+
+        self.tree_emb_path = envs.get_global_env("tree.tree_emb_path", "",
+                                                 namespace)
+
+        save_init_model = envs.get_global_env("single.save_init_model", False,
+                                              namespace)
+        init_model_path = envs.get_global_env("single.init_model_path", "",
+                                              namespace)
        self._exe.run(fluid.default_startup_program())

        if load_persistables:
@@ -68,11 +70,11 @@ class TDMSingleTrainer(SingleTrainer):
                persistables_model_path))

        if load_tree:
-            # 将明文树结构及数据，set到组网中的Variale中
-            # 不使用NumpyInitialize方法是考虑到树结构相关数据size过大，有性能风险
+            # covert tree to tensor, set it into Fluid's variable.
            for param_name in special_param:
-                param_t = fluid.global_scope().find_var(param_name).get_tensor()
-                param_array = self.tdm_prepare(param_name)
+                param_t = fluid.global_scope().find_var(param_name).get_tensor(
+                )
+                param_array = self._tdm_prepare(param_name)
                if param_name == 'TDM_Tree_Emb':
                    param_t.set(param_array.astype('float32'), self._place)
                else:
@@ -86,37 +88,37 @@ class TDMSingleTrainer(SingleTrainer):

        context['status'] = 'train_pass'

-    def tdm_prepare(self, param_name):
+    def _tdm_prepare(self, param_name):
        if param_name == "TDM_Tree_Travel":
-            travel_array = self.tdm_travel_prepare()
+            travel_array = self._tdm_travel_prepare()
            return travel_array
        elif param_name == "TDM_Tree_Layer":
-            layer_array, _ = self.tdm_layer_prepare()
+            layer_array, _ = self._tdm_layer_prepare()
            return layer_array
        elif param_name == "TDM_Tree_Info":
-            info_array = self.tdm_info_prepare()
+            info_array = self._tdm_info_prepare()
            return info_array
        elif param_name == "TDM_Tree_Emb":
-            emb_array = self.tdm_emb_prepare()
+            emb_array = self._tdm_emb_prepare()
            return emb_array
        else:
            raise " {} is not a special tdm param name".format(param_name)

-    def tdm_travel_prepare(self):
+    def _tdm_travel_prepare(self):
        """load tdm tree param from npy/list file"""
        travel_array = np.load(self.tree_travel_path)
-        logger.info("TDM Tree leaf node nums: {}".format(
-            travel_array.shape[0]))
+        logger.info("TDM Tree leaf node nums: {}".format(travel_array.shape[
+            0]))
        return travel_array

-    def tdm_emb_prepare(self):
+    def _tdm_emb_prepare(self):
        """load tdm tree param from npy/list file"""
        emb_array = np.load(self.tree_emb_path)
-        logger.info("TDM Tree node nums from emb: {}".format(
-            emb_array.shape[0]))
+        logger.info("TDM Tree node nums from emb: {}".format(emb_array.shape[
+            0]))
        return emb_array

-    def tdm_layer_prepare(self):
+    def _tdm_layer_prepare(self):
        """load tdm tree param from npy/list file"""
        layer_list = []
        layer_list_flat = []
@@ -136,7 +138,7 @@ class TDMSingleTrainer(SingleTrainer):
            [len(i) for i in layer_list]))
        return layer_array, layer_list

-    def tdm_info_prepare(self):
+    def _tdm_info_prepare(self):
        """load tdm tree param from list file"""
        info_array = np.load(self.tree_info_path)
        return info_array
--- a/fleet_rec/core/trainers/transpiler_trainer.py
+++ b/fleet_rec/core/trainers/transpiler_trainer.py
@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Training use fluid with DistributeTranspiler
 """
@@ -20,23 +19,31 @@ import os
 import paddle.fluid as fluid
 from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet

-from fleetrec.core.trainer import Trainer
-from fleetrec.core.utils import envs
-from fleetrec.core.utils import dataloader_instance
+from paddlerec.core.trainer import Trainer
+from paddlerec.core.utils import envs
+from paddlerec.core.utils import dataloader_instance
+from paddlerec.core.reader import SlotReader


 class TranspileTrainer(Trainer):
    def __init__(self, config=None):
        Trainer.__init__(self, config)
+        device = envs.get_global_env("train.device", "cpu")
+        if device == 'gpu':
+            self._place = fluid.CUDAPlace(0)
+            self._exe = fluid.Executor(self._place)
        self.processor_register()
        self.model = None
        self.inference_models = []
        self.increment_models = []

    def processor_register(self):
-        print("Need implement by trainer, `self.regist_context_processor('uninit', self.instance)` must be the first")
+        print(
+            "Need implement by trainer, `self.regist_context_processor('uninit', self.instance)` must be the first"
+        )

    def _get_dataloader(self, state="TRAIN"):
+
        if state == "TRAIN":
            dataloader = self.model._data_loader
            namespace = "train.reader"
@@ -46,45 +53,86 @@ class TranspileTrainer(Trainer):
            namespace = "evaluate.reader"
            class_name = "EvaluateReader"

+        sparse_slots = envs.get_global_env("sparse_slots", None, namespace)
+        dense_slots = envs.get_global_env("dense_slots", None, namespace)
+
        batch_size = envs.get_global_env("batch_size", None, namespace)
-        reader_class = envs.get_global_env("class", None, namespace)
        print("batch_size: {}".format(batch_size))
-        reader = dataloader_instance.dataloader(
-            reader_class, state, self._config_yaml)

-        reader_class = envs.lazy_instance_by_fliename(reader_class, class_name)
-        reader_ins = reader_class(self._config_yaml)
+        if sparse_slots is None and dense_slots is None:
+            reader_class = envs.get_global_env("class", None, namespace)
+            reader = dataloader_instance.dataloader(reader_class, state,
+                                                    self._config_yaml)
+            reader_class = envs.lazy_instance_by_fliename(reader_class,
+                                                          class_name)
+            reader_ins = reader_class(self._config_yaml)
+        else:
+            reader = dataloader_instance.slotdataloader("", state,
+                                                        self._config_yaml)
+            reader_ins = SlotReader(self._config_yaml)
+
        if hasattr(reader_ins, 'generate_batch_from_trainfiles'):
            dataloader.set_sample_list_generator(reader)
        else:
            dataloader.set_sample_generator(reader, batch_size)
+
+        debug_mode = envs.get_global_env("reader_debug_mode", False, namespace)
+        if debug_mode:
+            print("--- DataLoader Debug Mode Begin , show pre 10 data ---")
+            for idx, line in enumerate(reader()):
+                print(line)
+                if idx >= 9:
+                    break
+            print("--- DataLoader Debug Mode End , show pre 10 data ---")
+            exit(0)
        return dataloader

+    def _get_dataset_ins(self):
+        count = 0
+        for f in self.files:
+            for _, _ in enumerate(open(f, 'r')):
+                count += 1
+        return count
+
    def _get_dataset(self, state="TRAIN"):
        if state == "TRAIN":
            inputs = self.model.get_inputs()
            namespace = "train.reader"
-            train_data_path = envs.get_global_env(
-                "train_data_path", None, namespace)
+            train_data_path = envs.get_global_env("train_data_path", None,
+                                                  namespace)
        else:
            inputs = self.model.get_infer_inputs()
            namespace = "evaluate.reader"
-            train_data_path = envs.get_global_env(
-                "test_data_path", None, namespace)
+            train_data_path = envs.get_global_env("test_data_path", None,
+                                                  namespace)
+
+        sparse_slots = envs.get_global_env("sparse_slots", None, namespace)
+        dense_slots = envs.get_global_env("dense_slots", None, namespace)

        threads = int(envs.get_runtime_environ("train.trainer.threads"))
        batch_size = envs.get_global_env("batch_size", None, namespace)
        reader_class = envs.get_global_env("class", None, namespace)
        abs_dir = os.path.dirname(os.path.abspath(__file__))
        reader = os.path.join(abs_dir, '../utils', 'dataset_instance.py')
-        pipe_cmd = "python {} {} {} {}".format(
-            reader, reader_class, state, self._config_yaml)

-        if train_data_path.startswith("fleetrec::"):
+        if sparse_slots is None and dense_slots is None:
+            pipe_cmd = "python {} {} {} {}".format(reader, reader_class, state,
+                                                   self._config_yaml)
+        else:
+            if sparse_slots is None:
+                sparse_slots = "#"
+            if dense_slots is None:
+                dense_slots = "#"
+            padding = envs.get_global_env("padding", 0, namespace)
+            pipe_cmd = "python {} {} {} {} {} {} {} {}".format(
+                reader, "slot", "slot", self._config_yaml, namespace, \
+                sparse_slots.replace(" ", "#"), dense_slots.replace(" ", "#"), str(padding))
+
+        if train_data_path.startswith("paddlerec::"):
            package_base = envs.get_runtime_environ("PACKAGE_BASE")
            assert package_base is not None
-            train_data_path = os.path.join(
-                package_base, train_data_path.split("::")[1])
+            train_data_path = os.path.join(package_base,
+                                           train_data_path.split("::")[1])

        dataset = fluid.DatasetFactory().create_dataset()
        dataset.set_use_var(inputs)
@@ -95,8 +143,18 @@ class TranspileTrainer(Trainer):
            os.path.join(train_data_path, x)
            for x in os.listdir(train_data_path)
        ]
+        self.files = file_list
+        dataset.set_filelist(self.files)
+
+        debug_mode = envs.get_global_env("reader_debug_mode", False, namespace)
+        if debug_mode:
+            print("--- Dataset Debug Mode Begin , show pre 10 data of {}---".
+                  format(file_list[0]))
+            os.system("cat {} | {} | head -10".format(file_list[0], pipe_cmd))
+            print("--- Dataset Debug Mode End , show pre 10 data of {}---".
+                  format(file_list[0]))
+            exit(0)

-        dataset.set_filelist(file_list)
        return dataset

    def save(self, epoch_id, namespace, is_fleet=False):
@@ -115,26 +173,30 @@ class TranspileTrainer(Trainer):

            if not need_save(epoch_id, save_interval, False):
                return
-            
-          #  print("save inference model is not supported now.")
-          #  return

-            feed_varnames = envs.get_global_env("save.inference.feed_varnames", None, namespace)
-            fetch_varnames = envs.get_global_env("save.inference.fetch_varnames", None, namespace)
+            feed_varnames = envs.get_global_env("save.inference.feed_varnames",
+                                                None, namespace)
+            fetch_varnames = envs.get_global_env(
+                "save.inference.fetch_varnames", None, namespace)
            if feed_varnames is None or fetch_varnames is None:
                return

-            fetch_vars = [fluid.default_main_program().global_block().vars[varname] for varname in fetch_varnames]
-            dirname = envs.get_global_env("save.inference.dirname", None, namespace)
+            fetch_vars = [
+                fluid.default_main_program().global_block().vars[varname]
+                for varname in fetch_varnames
+            ]
+            dirname = envs.get_global_env("save.inference.dirname", None,
+                                          namespace)

            assert dirname is not None
            dirname = os.path.join(dirname, str(epoch_id))

            if is_fleet:
-                fleet.save_inference_model(self._exe, dirname, feed_varnames, fetch_vars)
+                fleet.save_inference_model(self._exe, dirname, feed_varnames,
+                                           fetch_vars)
            else:
-                fluid.io.save_inference_model(
-                    dirname, feed_varnames, fetch_vars, self._exe)
+                fluid.io.save_inference_model(dirname, feed_varnames,
+                                              fetch_vars, self._exe)
            self.inference_models.append((epoch_id, dirname))

        def save_persistables():
@@ -144,8 +206,8 @@ class TranspileTrainer(Trainer):
            if not need_save(epoch_id, save_interval, False):
                return

-            dirname = envs.get_global_env(
-                "save.increment.dirname", None, namespace)
+            dirname = envs.get_global_env("save.increment.dirname", None,
+                                          namespace)

            assert dirname is not None
            dirname = os.path.join(dirname, str(epoch_id))
@@ -203,17 +265,29 @@ class TranspileTrainer(Trainer):
        metrics_format = ", ".join(metrics_format)
        self._exe.run(startup_program)

-        for (epoch, model_dir) in self.increment_models:
-            print("Begin to infer epoch {}, model_dir: {}".format(epoch, model_dir))
+        model_list = self.increment_models
+
+        evaluate_only = envs.get_global_env(
+            'evaluate_only', False, namespace='evaluate')
+        if evaluate_only:
+            model_list = [(0, envs.get_global_env(
+                'evaluate_model_path', "", namespace='evaluate'))]
+
+        is_return_numpy = envs.get_global_env(
+            'is_return_numpy', True, namespace='evaluate')
+
+        for (epoch, model_dir) in model_list:
+            print("Begin to infer No.{} model, model_dir: {}".format(
+                epoch, model_dir))
            program = infer_program.clone()
            fluid.io.load_persistables(self._exe, model_dir, program)
            reader.start()
            batch_id = 0
            try:
                while True:
-                    metrics_rets = self._exe.run(
-                        program=program,
-                        fetch_list=metrics_varnames)
+                    metrics_rets = self._exe.run(program=program,
+                                                 fetch_list=metrics_varnames,
+                                                 return_numpy=is_return_numpy)

                    metrics = [epoch, batch_id]
                    metrics.extend(metrics_rets)

--- a/core/utils/__init__.py
+++ b/core/utils/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/fleet_rec/core/utils/dataloader_instance.py
+++ b/fleet_rec/core/utils/dataloader_instance.py
@@ -14,11 +14,91 @@
 from __future__ import print_function

 import os
-import sys
+from paddlerec.core.utils.envs import lazy_instance_by_fliename
+from paddlerec.core.utils.envs import get_global_env
+from paddlerec.core.utils.envs import get_runtime_environ
+from paddlerec.core.reader import SlotReader

-from fleetrec.core.utils.envs import lazy_instance_by_fliename
-from fleetrec.core.utils.envs import get_global_env
-from fleetrec.core.utils.envs import get_runtime_environ
+
+def dataloader_by_name(readerclass, dataset_name, yaml_file):
+    reader_class = lazy_instance_by_fliename(readerclass, "TrainReader")
+    name = "dataset." + dataset_name + "."
+    data_path = get_global_env(name + "data_path")
+
+    if data_path.startswith("paddlerec::"):
+        package_base = get_runtime_environ("PACKAGE_BASE")
+        assert package_base is not None
+        data_path = os.path.join(package_base, data_path.split("::")[1])
+
+    files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)]
+    reader = reader_class(yaml_file)
+    reader.init()
+
+    def gen_reader():
+        for file in files:
+            with open(file, 'r') as f:
+                for line in f:
+                    line = line.rstrip('\n')
+                    iter = reader.generate_sample(line)
+                    for parsed_line in iter():
+                        if parsed_line is None:
+                            continue
+                        else:
+                            values = []
+                            for pased in parsed_line:
+                                values.append(pased[1])
+                            yield values
+
+    def gen_batch_reader():
+        return reader.generate_batch_from_trainfiles(files)
+
+    if hasattr(reader, 'generate_batch_from_trainfiles'):
+        return gen_batch_reader()
+    return gen_reader
+
+
+def slotdataloader_by_name(readerclass, dataset_name, yaml_file):
+    name = "dataset." + dataset_name + "."
+    reader_name = "SlotReader"
+    data_path = get_global_env(name + "data_path")
+
+    if data_path.startswith("paddlerec::"):
+        package_base = get_runtime_environ("PACKAGE_BASE")
+        assert package_base is not None
+        data_path = os.path.join(package_base, data_path.split("::")[1])
+
+    files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)]
+    sparse = get_global_env(name + "sparse_slots", "#")
+    if sparse == "":
+        sparse = "#"
+    dense = get_global_env(name + "dense_slots", "#")
+    if dense == "":
+        dense = "#"
+    padding = get_global_env(name + "padding", 0)
+    reader = SlotReader(yaml_file)
+    reader.init(sparse, dense, int(padding))
+
+    def gen_reader():
+        for file in files:
+            with open(file, 'r') as f:
+                for line in f:
+                    line = line.rstrip('\n')
+                    iter = reader.generate_sample(line)
+                    for parsed_line in iter():
+                        if parsed_line is None:
+                            continue
+                        else:
+                            values = []
+                            for pased in parsed_line:
+                                values.append(pased[1])
+                            yield values
+
+    def gen_batch_reader():
+        return reader.generate_batch_from_trainfiles(files)
+
+    if hasattr(reader, 'generate_batch_from_trainfiles'):
+        return gen_batch_reader()
+    return gen_reader


 def dataloader(readerclass, train, yaml_file):
@@ -31,7 +111,7 @@ def dataloader(readerclass, train, yaml_file):
        namespace = "evaluate.reader"
        data_path = get_global_env("test_data_path", None, namespace)

-    if data_path.startswith("fleetrec::"):
+    if data_path.startswith("paddlerec::"):
        package_base = get_runtime_environ("PACKAGE_BASE")
        assert package_base is not None
        data_path = os.path.join(package_base, data_path.split("::")[1])
@@ -63,3 +143,53 @@ def dataloader(readerclass, train, yaml_file):
    if hasattr(reader, 'generate_batch_from_trainfiles'):
        return gen_batch_reader()
    return gen_reader
+
+
+def slotdataloader(readerclass, train, yaml_file):
+    if train == "TRAIN":
+        reader_name = "SlotReader"
+        namespace = "train.reader"
+        data_path = get_global_env("train_data_path", None, namespace)
+    else:
+        reader_name = "SlotReader"
+        namespace = "evaluate.reader"
+        data_path = get_global_env("test_data_path", None, namespace)
+
+    if data_path.startswith("paddlerec::"):
+        package_base = get_runtime_environ("PACKAGE_BASE")
+        assert package_base is not None
+        data_path = os.path.join(package_base, data_path.split("::")[1])
+
+    files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)]
+
+    sparse = get_global_env("sparse_slots", "#", namespace)
+    if sparse == "":
+        sparse = "#"
+    dense = get_global_env("dense_slots", "#", namespace)
+    if dense == "":
+        dense = "#"
+    padding = get_global_env("padding", 0, namespace)
+    reader = SlotReader(yaml_file)
+    reader.init(sparse, dense, int(padding))
+
+    def gen_reader():
+        for file in files:
+            with open(file, 'r') as f:
+                for line in f:
+                    line = line.rstrip('\n')
+                    iter = reader.generate_sample(line)
+                    for parsed_line in iter():
+                        if parsed_line is None:
+                            continue
+                        else:
+                            values = []
+                            for pased in parsed_line:
+                                values.append(pased[1])
+                            yield values
+
+    def gen_batch_reader():
+        return reader.generate_batch_from_trainfiles(files)
+
+    if hasattr(reader, 'generate_batch_from_trainfiles'):
+        return gen_batch_reader()
+    return gen_reader
--- a/fleet_rec/core/utils/dataset.py
+++ b/fleet_rec/core/utils/dataset.py
@@ -13,18 +13,18 @@
 # limitations under the License.

 import abc
-import time
 import datetime
+import time

 import paddle.fluid as fluid

-from fleetrec.core.utils import fs as fs
-from fleetrec.core.utils import util as util
+from paddlerec.core.utils import fs as fs
+from paddlerec.core.utils import util as util


-class Dataset(object):
+class DatasetHolder(object):
    """
-    Dataset Base
+    Dataset Holder
    """
    __metaclass__ = abc.ABCMeta

@@ -62,7 +62,7 @@ class Dataset(object):
        pass


-class TimeSplitDataset(Dataset):
+class TimeSplitDatasetHolder(DatasetHolder):
    """
    Dataset with time split dir.  root_path/$DAY/$HOUR
    """
@@ -74,11 +74,17 @@ class TimeSplitDataset(Dataset):
        Dataset.__init__(self, config)
        if 'data_donefile' not in config or config['data_donefile'] is None:
            config['data_donefile'] = config['data_path'] + "/to.hadoop.done"
-        self._path_generator = util.PathGenerator({'templates': [
-            {'name': 'data_path', 'template': config['data_path']},
-            {'name': 'donefile_path', 'template': config['data_donefile']}
-        ]})
-        self._split_interval = config['split_interval']  # data split N mins per dir
+        self._path_generator = util.PathGenerator({
+            'templates': [{
+                'name': 'data_path',
+                'template': config['data_path']
+            }, {
+                'name': 'donefile_path',
+                'template': config['data_donefile']
+            }]
+        })
+        self._split_interval = config[
+            'split_interval']  # data split N mins per dir
        self._data_file_handler = fs.FileHandler(config)

    def _format_data_time(self, daytime_str, time_window_mins):
@@ -91,7 +97,8 @@ class TimeSplitDataset(Dataset):
            return None, 0

        if mins_of_day % self._split_interval != 0:
-            skip_mins = self._split_interval - (mins_of_day % self._split_interval)
+            skip_mins = self._split_interval - (mins_of_day %
+                                                self._split_interval)
            data_time = data_time + datetime.timedelta(minutes=skip_mins)
            time_window_mins = time_window_mins - skip_mins
        return data_time, time_window_mins
@@ -106,17 +113,24 @@ class TimeSplitDataset(Dataset):
            True/False
        """
        is_ready = True
-        data_time, windows_mins = self._format_data_time(daytime_str, time_window_mins)
+        data_time, windows_mins = self._format_data_time(daytime_str,
+                                                         time_window_mins)
        while time_window_mins > 0:
-            file_path = self._path_generator.generate_path('donefile_path', {'time_format': data_time})
+            file_path = self._path_generator.generate_path(
+                'donefile_path', {'time_format': data_time})
            if not self._data_file_handler.is_exist(file_path):
                is_ready = False
                break
            time_window_mins = time_window_mins - self._split_interval
-            data_time = data_time + datetime.timedelta(minutes=self._split_interval)
+            data_time = data_time + datetime.timedelta(
+                minutes=self._split_interval)
        return is_ready

-    def get_file_list(self, daytime_str, time_window_mins, node_num=1, node_idx=0):
+    def get_file_list(self,
+                      daytime_str,
+                      time_window_mins,
+                      node_num=1,
+                      node_idx=0):
        """
        data in  [daytime_str, daytime_str + time_window_mins], random shard to node_num, return shard[node_idx]
        Args:
@@ -128,36 +142,32 @@ class TimeSplitDataset(Dataset):
            list, data_shard[node_idx]
        """
        data_file_list = []
-        data_time, windows_mins = self._format_data_time(daytime_str, time_window_mins)
+        data_time, windows_mins = self._format_data_time(daytime_str,
+                                                         time_window_mins)
        while time_window_mins > 0:
-            file_path = self._path_generator.generate_path('data_path', {'time_format': data_time})
+            file_path = self._path_generator.generate_path(
+                'data_path', {'time_format': data_time})
            sub_file_list = self._data_file_handler.ls(file_path)
            for sub_file in sub_file_list:
                sub_file_name = self._data_file_handler.get_file_name(sub_file)
-                if not sub_file_name.startswith(self._config['filename_prefix']):
+                if not sub_file_name.startswith(self._config[
+                        'filename_prefix']):
                    continue
                if hash(sub_file_name) % node_num == node_idx:
                    data_file_list.append(sub_file)
            time_window_mins = time_window_mins - self._split_interval
-            data_time = data_time + datetime.timedelta(minutes=self._split_interval)
+            data_time = data_time + datetime.timedelta(
+                minutes=self._split_interval)
        return data_file_list

-
-class FluidTimeSplitDataset(TimeSplitDataset):
-    """
-    A Dataset with time split for PaddleFluid
-    """
-
-    def __init__(self, config):
-        """ """
-        TimeSplitDataset.__init__(self, config)
-
    def _alloc_dataset(self, file_list):
        """ """
-        dataset = fluid.DatasetFactory().create_dataset(self._config['dataset_type'])
+        dataset = fluid.DatasetFactory().create_dataset(self._config[
+            'dataset_type'])
        dataset.set_batch_size(self._config['batch_size'])
        dataset.set_thread(self._config['load_thread'])
-        dataset.set_hdfs_config(self._config['fs_name'], self._config['fs_ugi'])
+        dataset.set_hdfs_config(self._config['fs_name'],
+                                self._config['fs_ugi'])
        dataset.set_pipe_command(self._config['data_converter'])
        dataset.set_filelist(file_list)
        dataset.set_use_var(self._config['data_vars'])
@@ -173,7 +183,9 @@ class FluidTimeSplitDataset(TimeSplitDataset):
            while self.check_ready(begin_time, windown_min) == False:
                print("dataset not ready, time:" + begin_time)
                time.sleep(30)
-            file_list = self.get_file_list(begin_time, windown_min, params['node_num'], params['node_idx'])
+            file_list = self.get_file_list(begin_time, windown_min,
+                                           params['node_num'],
+                                           params['node_idx'])
            self._datasets[begin_time] = self._alloc_dataset(file_list)
            self._datasets[begin_time].load_into_memory()
        else:
@@ -186,9 +198,12 @@ class FluidTimeSplitDataset(TimeSplitDataset):
        windown_min = params['time_window_min']
        if begin_time not in self._datasets:
            if self.check_ready(begin_time, windown_min):
-                file_list = self.get_file_list(begin_time, windown_min, params['node_num'], params['node_idx'])
+                file_list = self.get_file_list(begin_time, windown_min,
+                                               params['node_num'],
+                                               params['node_idx'])
                self._datasets[begin_time] = self._alloc_dataset(file_list)
-                self._datasets[begin_time].preload_into_memory(self._config['preload_thread'])
+                self._datasets[begin_time].preload_into_memory(self._config[
+                    'preload_thread'])
                return True
        return False


--- a/fleet_rec/core/utils/dataset_instance.py
+++ b/fleet_rec/core/utils/dataset_instance.py
@@ -11,23 +11,39 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 from __future__ import print_function
 import sys

-from fleetrec.core.utils.envs import lazy_instance_by_fliename
+from paddlerec.core.utils.envs import lazy_instance_by_fliename
+from paddlerec.core.reader import SlotReader

-if len(sys.argv) != 4:
-    raise ValueError("reader only accept 3 argument: 1. reader_class 2.train/evaluate 3.yaml_abs_path")
+if len(sys.argv) < 4:
+    raise ValueError(
+        "reader only accept 3 argument: 1. reader_class 2.train/evaluate/slotreader 3.yaml_abs_path"
+    )

 reader_package = sys.argv[1]

-if sys.argv[2] == "TRAIN":
+if sys.argv[2].upper() == "TRAIN":
    reader_name = "TrainReader"
-else:
+elif sys.argv[2].upper() == "EVALUATE":
    reader_name = "EvaluateReader"
+else:
+    reader_name = "SlotReader"
+    namespace = sys.argv[4]
+    sparse_slots = sys.argv[5].replace("?", " ")
+    dense_slots = sys.argv[6].replace("?", " ")
+    padding = int(sys.argv[7])

 yaml_abs_path = sys.argv[3]
-reader_class = lazy_instance_by_fliename(reader_package, reader_name)
-reader = reader_class(yaml_abs_path)
-reader.init()
-reader.run_from_stdin()
+
+if reader_name != "SlotReader":
+    reader_class = lazy_instance_by_fliename(reader_package, reader_name)
+    reader = reader_class(yaml_abs_path)
+    reader.init()
+    reader.run_from_stdin()
+else:
+    reader = SlotReader(yaml_abs_path)
+    reader.init(sparse_slots, dense_slots, padding)
+    reader.run_from_stdin()
--- a/fleet_rec/core/utils/envs.py
+++ b/fleet_rec/core/utils/envs.py
@@ -12,21 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import os
+from contextlib import closing
 import copy
-import sys
+import os
 import socket
-from contextlib import closing
+import sys
+
 global_envs = {}


-def flatten_environs(envs):
+def flatten_environs(envs, separator="."):
    flatten_dict = {}
    assert isinstance(envs, dict)

    def fatten_env_namespace(namespace_nests, local_envs):
        if not isinstance(local_envs, dict):
-            global_k = ".".join(namespace_nests)
+            global_k = separator.join(namespace_nests)
            flatten_dict[global_k] = str(local_envs)
        else:
            for k, v in local_envs.items():
@@ -35,7 +36,7 @@ def flatten_environs(envs):
                    nests.append(k)
                    fatten_env_namespace(nests, v)
                else:
-                    global_k = ".".join(namespace_nests + [k])
+                    global_k = separator.join(namespace_nests + [k])
                    flatten_dict[global_k] = str(v)

    for k, v in envs.items():
@@ -67,19 +68,28 @@ def set_global_envs(envs):
                nests = copy.deepcopy(namespace_nests)
                nests.append(k)
                fatten_env_namespace(nests, v)
+            elif (k == "dataset" or k == "phase" or
+                  k == "runner") and isinstance(v, list):
+                for i in v:
+                    if i.get("name") is None:
+                        raise ValueError("name must be in dataset list ", v)
+                    nests = copy.deepcopy(namespace_nests)
+                    nests.append(k)
+                    nests.append(i["name"])
+                    fatten_env_namespace(nests, i)
            else:
                global_k = ".".join(namespace_nests + [k])
                global_envs[global_k] = v

-    for k, v in envs.items():
-        fatten_env_namespace([k], v)
+    fatten_env_namespace([], envs)


 def get_global_env(env_name, default_value=None, namespace=None):
    """
    get os environment value
    """
-    _env_name = env_name if namespace is None else ".".join([namespace, env_name])
+    _env_name = env_name if namespace is None else ".".join(
+        [namespace, env_name])
    return global_envs.get(_env_name, default_value)


@@ -87,22 +97,32 @@ def get_global_envs():
    return global_envs


+def path_adapter(path):
+    if path.startswith("paddlerec."):
+        package = get_runtime_environ("PACKAGE_BASE")
+        l_p = path.split("paddlerec.")[1].replace(".", "/")
+        return os.path.join(package, l_p)
+    else:
+        return path
+
+
+def windows_path_converter(path):
+    if get_platform() == "WINDOWS":
+        return path.replace("/", "\\")
+    else:
+        return path.replace("\\", "/")
+
+
 def update_workspace():
-    workspace = global_envs.get("train.workspace", None)
+    workspace = global_envs.get("workspace")
    if not workspace:
        return
-
-    # is fleet inner models
-    if workspace.startswith("fleetrec."):
-        fleet_package = get_runtime_environ("PACKAGE_BASE")
-        workspace_dir = workspace.split("fleetrec.")[1].replace(".", "/")
-        path = os.path.join(fleet_package, workspace_dir)
-    else:
-        path = workspace
+    workspace = path_adapter(workspace)

    for name, value in global_envs.items():
        if isinstance(value, str):
-            value = value.replace("{workspace}", path)
+            value = value.replace("{workspace}", workspace)
+            value = windows_path_converter(value)
            global_envs[name] = value


@@ -127,7 +147,7 @@ def pretty_print_envs(envs, header=None):
    if header:
        draws += h_format.format(header[0], header[1])
    else:
-        draws += h_format.format("fleetrec Global Envs", "Value")
+        draws += h_format.format("paddlerec Global Envs", "Value")

    draws += line + "\n"

@@ -147,7 +167,8 @@ def pretty_print_envs(envs, header=None):

 def lazy_instance_by_package(package, class_name):
    models = get_global_env("train.model.models")
-    model_package = __import__(package, globals(), locals(), package.split("."))
+    model_package = __import__(package,
+                               globals(), locals(), package.split("."))
    instance = getattr(model_package, class_name)
    return instance

@@ -157,7 +178,8 @@ def lazy_instance_by_fliename(abs, class_name):
    sys.path.append(dirname)
    package = os.path.splitext(os.path.basename(abs))[0]

-    model_package = __import__(package, globals(), locals(), package.split("."))
+    model_package = __import__(package,
+                               globals(), locals(), package.split("."))
    instance = getattr(model_package, class_name)
    return instance

@@ -172,11 +194,12 @@ def get_platform():
    if 'Windows' in plats:
        return "WINDOWS"

+
 def find_free_port():
    def __free_port():
-        with closing(socket.socket(socket.AF_INET,
-                                   socket.SOCK_STREAM)) as s:
+        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
            s.bind(('', 0))
            return s.getsockname()[1]
+
    new_port = __free_port()
    return new_port
--- a/fleet_rec/core/utils/fs.py
+++ b/fleet_rec/core/utils/fs.py
@@ -13,11 +13,12 @@
 # limitations under the License.

 import os
+
 from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient


 def is_afs_path(path):
-    """R 
+    """is_afs_path
    """
    if path.startswith("afs") or path.startswith("hdfs"):
        return True
@@ -28,12 +29,12 @@ class LocalFSClient(object):
    """
    Util for local disk file_system io 
    """
-    
+
    def __init__(self):
        """R
        """
        pass
-    
+
    def write(self, content, path, mode):
        """
        write to file
@@ -43,7 +44,7 @@ class LocalFSClient(object):
            mode(string): w/a  w:clear_write a:append_write
        """
        temp_dir = os.path.dirname(path)
-        if not os.path.exists(temp_dir): 
+        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)
        f = open(path, mode)
        f.write(content)
@@ -75,7 +76,7 @@ class LocalFSClient(object):
        """R
        """
        os.system("rm -rf " + path)
-    
+
    def is_exist(self, path):
        """R
        """
@@ -94,13 +95,14 @@ class FileHandler(object):
    """
    A Smart file handler. auto judge local/afs by path 
    """
+
    def __init__(self, config):
        """R
        """
        if 'fs_name' in config:
-            hadoop_home="$HADOOP_HOME"
+            hadoop_home = "$HADOOP_HOME"
            hdfs_configs = {
-                "hadoop.job.ugi": config['fs_ugi'], 
+                "hadoop.job.ugi": config['fs_ugi'],
                "fs.default.name": config['fs_name']
            }
            self._hdfs_client = HDFSClient(hadoop_home, hdfs_configs)
@@ -131,7 +133,9 @@ class FileHandler(object):
            if mode.find('a') >= 0:
                org_content = self._hdfs_client.cat(dest_path)
            content = content + org_content
-            self._local_fs_client.write(content, temp_local_file, mode) #fleet hdfs_client only support upload, so write tmp file
+            self._local_fs_client.write(
+                content, temp_local_file, mode
+            )  # fleet hdfs_client only support upload, so write tmp file
            self._hdfs_client.delete(dest_path + ".tmp")
            self._hdfs_client.upload(dest_path + ".tmp", temp_local_file)
            self._hdfs_client.delete(dest_path + ".bak")
@@ -139,7 +143,7 @@ class FileHandler(object):
            self._hdfs_client.rename(dest_path + ".tmp", dest_path)
        else:
            self._local_fs_client.write(content, dest_path, mode)
-    
+
    def cat(self, path):
        """R
        """
@@ -148,19 +152,20 @@ class FileHandler(object):
            return hdfs_cat
        else:
            return self._local_fs_client.cat(path)
-    
+
    def ls(self, path):
        """R
        """
        files = []
        if is_afs_path(path):
            files = self._hdfs_client.ls(path)
-            files = [path + '/' + self.get_file_name(fi) for fi in files]  # absulte path
+            files = [path + '/' + self.get_file_name(fi)
+                     for fi in files]  # absulte path
        else:
            files = self._local_fs_client.ls(path)
            files = [path + '/' + fi for fi in files]  # absulte path
        return files
-    
+
    def cp(self, org_path, dest_path):
        """R
        """
@@ -170,6 +175,6 @@ class FileHandler(object):
            return self._local_fs_client.cp(org_path, dest_path)
        if not org_is_afs and dest_is_afs:
            return self._hdfs_client.upload(dest_path, org_path)
-        if org_is_afs and not dest_is_afs: 
+        if org_is_afs and not dest_is_afs:
            return self._hdfs_client.download(org_path, dest_path)
        print("Not Suppor hdfs cp currently")
--- a/fleet_rec/core/utils/table.py
+++ b/fleet_rec/core/utils/table.py
@@ -12,16 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import copy
-import yaml
-

 class TableMeta(object):
    """
    Simple ParamTable Meta, Contain table_id
    """
    TableId = 1
-    
+
    @staticmethod
    def alloc_new_table(table_id):
        """

--- a/fleet_rec/core/utils/util.py
+++ b/fleet_rec/core/utils/util.py
@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import datetime
 import os
 import time
-import datetime

 from paddle import fluid
-from fleetrec.core.utils import fs as fs
+
+from paddlerec.core.utils import fs as fs


 def save_program_proto(path, program=None):
+
    if program is None:
        _program = fluid.default_main_program()
    else:
@@ -174,7 +176,8 @@ class PathGenerator(object):
        """
        if template_name in self._templates:
            if 'time_format' in param:
-                str = param['time_format'].strftime(self._templates[template_name])
+                str = param['time_format'].strftime(self._templates[
+                    template_name])
                return str.format(**param)
            return self._templates[template_name].format(**param)
        else:
@@ -197,31 +200,39 @@ class TimeTrainPass(object):
            self._begin_day = make_datetime(day_fields[0].strip())
            if len(day_fields) == 1 or len(day_fields[1]) == 0:
                # 100 years, meaning to continuous running
-                self._end_day = self._begin_day + datetime.timedelta(days=36500)
+                self._end_day = self._begin_day + datetime.timedelta(
+                    days=36500)
            else:
                # example: 2020212+10 
                run_day = int(day_fields[1].strip())
-                self._end_day = self._begin_day + datetime.timedelta(days=run_day)
+                self._end_day = self._begin_day + datetime.timedelta(
+                    days=run_day)
        else:
            # example: {20191001..20191031}
-            days = os.popen("echo -n " + self._config['days']).read().split(" ")
+            days = os.popen("echo -n " + self._config['days']).read().split(
+                " ")
            self._begin_day = make_datetime(days[0])
            self._end_day = make_datetime(days[len(days) - 1])
        self._checkpoint_interval = self._config['checkpoint_interval']
        self._dump_inference_interval = self._config['dump_inference_interval']
-        self._interval_per_pass = self._config['train_time_interval']  # train N min data per pass
+        self._interval_per_pass = self._config[
+            'train_time_interval']  # train N min data per pass

        self._pass_id = 0
        self._inference_pass_id = 0
        self._pass_donefile_handler = None
        if 'pass_donefile_name' in self._config:
-            self._train_pass_donefile = global_config['output_path'] + '/' + self._config['pass_donefile_name']
+            self._train_pass_donefile = global_config[
+                'output_path'] + '/' + self._config['pass_donefile_name']
            if fs.is_afs_path(self._train_pass_donefile):
-                self._pass_donefile_handler = fs.FileHandler(global_config['io']['afs'])
+                self._pass_donefile_handler = fs.FileHandler(global_config[
+                    'io']['afs'])
            else:
-                self._pass_donefile_handler = fs.FileHandler(global_config['io']['local_fs'])
+                self._pass_donefile_handler = fs.FileHandler(global_config[
+                    'io']['local_fs'])

-            last_done = self._pass_donefile_handler.cat(self._train_pass_donefile).strip().split('\n')[-1]
+            last_done = self._pass_donefile_handler.cat(
+                self._train_pass_donefile).strip().split('\n')[-1]
            done_fileds = last_done.split('\t')
            if len(done_fileds) > 4:
                self._base_key = done_fileds[1]
@@ -235,15 +246,18 @@ class TimeTrainPass(object):
        """
        return 24 * 60 / self._interval_per_pass

-    def save_train_progress(self, day, pass_id, base_key, model_path, is_checkpoint):
+    def save_train_progress(self, day, pass_id, base_key, model_path,
+                            is_checkpoint):
        """R
        """
        if is_checkpoint:
            self._checkpoint_pass_id = pass_id
            self._checkpoint_model_path = model_path
-        done_content = "%s\t%s\t%s\t%s\t%d\n" % (day, base_key,
-                                                 self._checkpoint_model_path, self._checkpoint_pass_id, pass_id)
-        self._pass_donefile_handler.write(done_content, self._train_pass_donefile, 'a')
+        done_content = "%s\t%s\t%s\t%s\t%d\n" % (
+            day, base_key, self._checkpoint_model_path,
+            self._checkpoint_pass_id, pass_id)
+        self._pass_donefile_handler.write(done_content,
+                                          self._train_pass_donefile, 'a')
        pass

    def init_pass_by_id(self, date_str, pass_id):
@@ -285,12 +299,14 @@ class TimeTrainPass(object):
        if self._pass_id < 1:
            self.init_pass_by_time(self._begin_day.strftime("%Y%m%d%H%M"))
        else:
-            next_time = self._current_train_time + datetime.timedelta(minutes=self._interval_per_pass)
+            next_time = self._current_train_time + datetime.timedelta(
+                minutes=self._interval_per_pass)
            if (next_time - self._end_day).total_seconds() > 0:
                has_next = False
            else:
                self.init_pass_by_time(next_time.strftime("%Y%m%d%H%M"))
-        if has_next and (self._inference_pass_id < self._pass_id or self._pass_id < old_pass_id):
+        if has_next and (self._inference_pass_id < self._pass_id or
+                         self._pass_id < old_pass_id):
            self._inference_pass_id = self._pass_id - 1
        return has_next

@@ -318,9 +334,11 @@ class TimeTrainPass(object):
        Return:
            date(current_train_time + delta_day)
        """
-        return (self._current_train_time + datetime.timedelta(days=delta_day)).strftime("%Y%m%d")
+        return (self._current_train_time + datetime.timedelta(days=delta_day)
+                ).strftime("%Y%m%d")

    def timestamp(self, delta_day=0):
        """R
        """
-        return (self._current_train_time + datetime.timedelta(days=delta_day)).timestamp()
+        return (self._current_train_time + datetime.timedelta(days=delta_day)
+                ).timestamp()
--- a/doc/.DS_Store
+++ b/doc/.DS_Store
--- a/doc/__init__.py
+++ b/doc/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/doc/benchmark.md
+++ b/doc/benchmark.md
+# PaddleRec Benchmark
+
+`PaddleRec`中各模型在各种模式下的效果及性能数据将随版本迭代不断更新，欢迎持续关注并监督，如有任何问题，欢迎在[Github Issue](https://github.com/PaddlePaddle/PaddleRec/issues)提出。
+
+## [召回模型介绍及Benchmark](../models/recall/readme.md)
+
+## [排序模型介绍及Benchmark](../models/rank/readme.md)
+
+## [内容理解模型介绍及Benchmark](../models/contentunderstanding/readme.md)
+
+## [多任务模型介绍及Benchmark](../models/multitask/readme.md)
+
+## [树模型介绍及Benchamrk](../models/treebased/README.md)
--- a/doc/contribute.md
+++ b/doc/contribute.md
+# PaddleRec 贡献代码
+> 占位
--- a/doc/custom_dataset_reader.md
+++ b/doc/custom_dataset_reader.md
+# PaddleRec 推荐数据集格式
+
+当你的数据集格式为[slot:feasign]*这种模式，或者可以预处理为这种格式时，可以直接使用PaddleRec内置的Reader。
+好处是不用自己写Reader了，各个model之间的数据格式也都可以统一成一样的格式。
+
+## 数据格式说明
+
+假如你的原始数据格式为
+
+```bash
+<label> <integer feature 1> ... <integer feature 13> <categorical feature 1> ... <categorical feature 26>
+```
+
+其中```<label>```表示广告是否被点击，点击用1表示，未点击用0表示。```<integer feature>```代表数值特征（连续特征），共有13个连续特征。
+并且每个特征有一个特征值。
+```<categorical feature>```代表分类特征（离散特征），共有26个离散特征。相邻两个特征用```\t```分隔。
+
+假设这13个连续特征（dense slot）的name如下：
+
+```
+D1 D2 D3 D4 D4 D6 D7 D8 D9 D10 D11 D12 D13
+```
+
+这26个离散特征（sparse slot）的name如下：
+```
+S1 S2 S3 S4 S5 S6 S7 S8 S9 S10 S11 S12 S13 S14 S15 S16 S17 S18 S19 S20 S21 S22 S23 S24 S25 S26
+```
+
+那么下面这条样本（1个label + 13个dense值 + 26个feasign）
+```
+1 0.1 0.4 0.2 0.3 0.5 0.8 0.3 0.2 0.1 0.5 0.6 0.3 0.9 60 16 91 50 52 52 28 69 63 33 87 69 48 59 27 12 95 36 37 41 17 3 86 19 88 60
+```
+
+可以转换成：
+```
+label:1 D1:0.1 D2:0.4 D3:0.2 D4:0.3 D5:0.5 D6:0.8 D7:0.3 D8:0.2 D9:0.1 D10:0.5 D11:0.6 D12:0.3 D13:0.9 S14:60 S15:16 S16:91 S17:50 S18:52 S19:52 S20:28 S21:69 S22:63 S23:33 S24:87 S25:69 S26:48 S27:59 S28:27 S29:12 S30:95 S31:36 S32:37 S33:41 S34:17 S35:3 S36:86 S37:19 S38:88 S39:60
+```
+
+注意：上面各个slot:feasign字段之间的顺序没有要求，比如```D1:0.1 D2:0.4```改成```D2:0.4 D1:0.1```也可以。
+
+
+## 配置
+
+reader中需要配置```sparse_slots```与```dense_slots```，例如
+
+```
+  workspace: xxxx
+
+  reader:
+    batch_size: 2
+    train_data_path: "{workspace}/data/train_data"
+    sparse_slots: "label S1 S2 S3 S4 S5 S6 S7 S8 S9 S10 S11 S12 S13 S14 S15 S16 S17 S18 S19 S20 S21 S22 S23 S24 S25 S26"
+    dense_slots: "D1:1 D2:1 D3:1 D4:1 D4:1 D6:1 D7:1 D8:1 D9:1 D10:1 D11:1 D12:1 D13:1"
+
+  model:
+    xxxxx
+```
+
+sparse_slots表示稀疏特征的列表，以空格分开。
+
+dense_slots表示稠密特征的列表，以空格分开。每个字段的格式是```[dense_slot_name]:[dim1,dim2,dim3...]```，其中```dim1,dim2,dim3...```表示shape
+
+
+配置好了之后，这些slot对应的variable就可以在model中的如下变量啦：
+```
+self._sparse_data_var
+
+self._dense_data_var
+```
+
+# PaddleRec 自定义数据集及Reader
+
+用户自定义数据集及配置异步Reader，需要关注以下几个步骤：
+
+* [数据集整理](#数据集整理)
+* [在模型组网中加入输入占位符](#在模型组网中加入输入占位符)
+* [Reader实现](#Reader的实现)
+* [在yaml文件中配置Reader](#在yaml文件中配置reader)
+
+我们以CTR-DNN模型为例，给出了从数据整理，变量定义，Reader写法，调试的完整历程。
+
+* [数据及Reader示例-DNN](#数据及Reader示例-DNN)
+
+
+## 数据集整理
+
+PaddleRec支持模型自定义数据集。
+
+关于数据的tips：
+1. 数据量：
+
+    PaddleRec面向大规模数据设计，可以轻松支持亿级的数据读取，工业级的数据读写api：`dataset`在搜索、推荐、信息流等业务得到了充分打磨。
+2. 文件类型:
+
+    支持任意直接可读的文本数据，`dataset`同时支持`.gz`格式的文本压缩数据，无需额外代码，可直接读取。数据样本应以`\n`为标志，按行组织。
+
+3. 文件存放位置：
+
+    文件通常存放在训练节点本地，但同时，`dataset`支持使用`hadoop`远程读取数据，数据无需下载到本地，为dataset配置hadoop相关账户及地址即可。
+4. 数据类型
+
+    Reader处理的是以行为单位的`string`数据，喂入网络的数据需要转为`int`,`float`的数值数据，不支持`string`喂入网络，不建议明文保存及处理训练数据。
+5. Tips
+
+    Dataset模式下，训练线程与数据读取线程的关系强相关，为了多线程充分利用，`强烈建议将文件合理的拆为多个小文件`，尤其是在分布式训练场景下，可以均衡各个节点的数据量，同时加快数据的下载速度。
+
+## 在模型组网中加入输入占位符
+
+Reader读取文件后，产出的数据喂入网络，需要有占位符进行接收。占位符在Paddle中使用`fluid.data`或`fluid.layers.data`进行定义。`data`的定义可以参考[fluid.data](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/fluid_cn/data_cn.html#data)以及[fluid.layers.data](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/layers_cn/data_cn.html#data)。
+
+加入您希望输入三个数据，分别是维度32的数据A，维度变长的稀疏数据B，以及一个一维的标签数据C，并希望梯度可以经过该变量向前传递，则示例如下：
+
+数据A的定义：
+```python
+var_a = fluid.data(name='A', shape= [-1, 32], dtype='float32')
+```
+
+数据B的定义，变长数据的使用可以参考[LoDTensor](https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#cn-user-guide-lod-tensor)：
+```python
+var_b = fluid.data(name='B', shape=[-1, 1], lod_level=1, dtype='int64')
+```
+
+数据C的定义：
+```python
+var_c = fluid.data(name='C', shape=[-1, 1], dtype='int32')
+var_c.stop_gradient = False
+```
+
+当我们完成以上三个数据的定义后，在PaddleRec的模型定义中，还需将其加入model基类成员变量`self._data_var`
+
+```python
+self._data_var.append(var_a)
+self._data_var.append(var_b)
+self._data_var.append(var_c)
+```
+至此，我们完成了在组网中定义输入数据的工作。
+
+## Reader的实现
+
+### Reader的实现范式
+
+Reader的逻辑需要一个单独的python文件进行描述。我们试写一个`test_reader.py`，实现的具体流程如下：
+1. 首先我们需要引入Reader基类
+
+    ```python
+    from paddlerec.core.reader import Reader
+    ```
+2. 创建一个子类，继承Reader的基类，训练所需Reader命名为`TrainerReader`
+    ```python
+    class TrainerReader(Reader):
+        def init(self):
+            pass
+
+        def generator_sample(self, line):
+            pass
+    ```
+
+3. 在`init(self)`函数中声明一些在数据读取中会用到的变量，必要时可以在`config.yaml`文件中配置变量，利用`env.get_global_env()`拿到。
+   
+    比如，我们希望从yaml文件中读取一个数据预处理变量`avg=10`，目的是将数据A的数据缩小10倍，可以这样实现：
+
+    首先更改yaml文件，在某个space下加入该变量
+
+    ```yaml
+    ...
+    train:
+        reader:
+            avg: 10
+    ...
+    ```
+
+
+    再更改Reader的init函数
+
+    ```python
+    from paddlerec.core.utils import envs
+    class TrainerReader(Reader):
+        def init(self):
+            self.avg = envs.get_global_env("avg", None, "train.reader")
+
+        def generator_sample(self, line):
+            pass
+    ```
+
+4. 继承并实现基类中的`generate_sample(self, line)`函数，逐行读取数据。
+   - 该函数应返回一个可以迭代的reader方法(带有yield的函数不再是一个普通的函数，而是一个生成器generator，成为了可以迭代的对象，等价于一个数组、链表、文件、字符串etc.)
+   - 在这个可以迭代的函数中，如示例代码中的`def reader()`，我们定义数据读取的逻辑。以行为单位的数据进行截取，转换及预处理。
+   - 最后，我们需要将数据整理为特定的格式，才能够被PaddleRec的Reader正确读取，并灌入的训练的网络中。简单来说，数据的输出顺序与我们在网络中创建的`inputs`必须是严格一一对应的，并转换为类似字典的形式。
+    
+    示例： 假设数据ABC在文本数据中，每行以这样的形式存储：
+    ```shell
+    0.1,0.2,0.3...3.0,3.1,3.2 \t 99999,99998,99997 \t 1 \n
+    ```
+
+    则示例代码如下：
+    ```python
+    from paddlerec.core.utils import envs
+    class TrainerReader(Reader):
+        def init(self):
+            self.avg = envs.get_global_env("avg", None, "train.reader")
+
+        def generator_sample(self, line):
+            
+            def reader(self, line):
+                # 先分割 '\n'， 再以 '\t'为标志分割为list
+                variables = (line.strip('\n')).split('\t')
+
+                # A是第一个元素，并且每个数据之间使用','分割
+                var_a = variables[0].split(',') # list
+                var_a = [float(i) / self.avg for i in var_a] # 将str数据转换为float
+                
+
+                # B是第二个元素，同样以 ',' 分割
+                var_b = variables[1].split(',') # list
+                var_b = [int(i) for i in var_b] # 将str数据转换为int
+
+                # C是第三个元素, 只有一个元素，没有分割符
+                var_c = variables[2]
+                var_c = int(var_c) # 将str数据转换为int
+                var_c = [var_c] # 将单独的数据元素置入list中
+
+                # 将数据与数据名结合，组织为dict的形式
+                # 如下，output形式为{ A: var_a, B: var_b, C: var_c}
+                variable_name = ['A', 'B', 'C']
+                output = zip(variable_name, [var_a] + [var_b] + [var_c])
+
+                # 将数据输出，使用yield方法，将该函数变为了一个可迭代的对象
+                yield output
+
+    ```
+    
+    至此，我们完成了Reader的实现。
+
+
+### 在yaml文件中配置Reader
+
+在模型的yaml配置文件中，主要的修改是三个，如下
+
+```yaml
+reader:
+    batch_size: 2
+    class: "{workspace}/reader.py"
+    train_data_path: "{workspace}/data/train_data"
+    reader_debug_mode: False
+```
+
+batch_size: 顾名思义，是小批量训练时的样本大小
+class: 运行改模型所需reader的路径
+train_data_path: 训练数据所在文件夹
+reader_debug_mode: 测试reader语法，及输出是否符合预期的debug模式的开关
+
+
+## 数据及Reader示例-DNN
+
+Reader代码来源于[criteo_reader.py](../models/rank/criteo_reader.py), 组网代码来源于[model.py](../models/rank/dnn/model.py)
+
+### Criteo数据集格式
+
+CTR-DNN训练及测试数据集选用[Display Advertising Challenge](https://www.kaggle.com/c/criteo-display-ad-challenge/)所用的Criteo数据集。该数据集包括两部分：训练集和测试集。训练集包含一段时间内Criteo的部分流量，测试集则对应训练数据后一天的广告点击流量。
+每一行数据格式如下所示：
+```bash
+<label> <integer feature 1> ... <integer feature 13> <categorical feature 1> ... <categorical feature 26>
+```
+其中```<label>```表示广告是否被点击，点击用1表示，未点击用0表示。```<integer feature>```代表数值特征（连续特征），共有13个连续特征。```<categorical feature>```代表分类特征（离散特征），共有26个离散特征。相邻两个特征用```\t```分隔，缺失特征用空格表示。测试集中```<label>```特征已被移除。
+
+### Criteo数据集的预处理
+
+数据预处理共包括两步：
+- 将原始训练集按9:1划分为训练集和验证集
+- 数值特征（连续特征）需进行归一化处理，但需要注意的是，对每一个特征```<integer feature i>```，归一化时用到的最大值并不是用全局最大值，而是取排序后95%位置处的特征值作为最大值，同时保留极值。
+
+### CTR网络输入的定义
+
+正如前所述，Criteo数据集中，分为连续数据与离散（稀疏）数据，所以整体而言，CTR-DNN模型的数据输入层包括三个，分别是：`dense_input`用于输入连续数据，维度由超参数`dense_feature_dim`指定，数据类型是归一化后的浮点型数据。`sparse_input_ids`用于记录离散数据，在Criteo数据集中，共有26个slot，所以我们创建了名为`C1~C26`的26个稀疏参数输入，并设置`lod_level=1`，代表其为变长数据，数据类型为整数；最后是每条样本的`label`，代表了是否被点击，数据类型是整数，0代表负样例，1代表正样例。
+
+在Paddle中数据输入的声明使用`paddle.fluid.layers.data()`，会创建指定类型的占位符，数据IO会依据此定义进行数据的输入。
+
+稀疏参数输入的定义:
+```python
+def sparse_inputs():
+    ids = envs.get_global_env("hyper_parameters.sparse_inputs_slots", None, self._namespace)
+
+    sparse_input_ids = [
+        fluid.layers.data(name="S" + str(i),
+                            shape=[1],
+                            lod_level=1,
+                            dtype="int64") for i in range(1, ids)
+    ]
+    return sparse_input_ids
+```
+
+稠密参数输入的定义：
+```python
+def dense_input():
+    dim = envs.get_global_env("hyper_parameters.dense_input_dim", None, self._namespace)
+
+    dense_input_var = fluid.layers.data(name="D",
+                                        shape=[dim],
+                                        dtype="float32")
+    return dense_input_var
+```
+
+标签的定义：
+```python
+def label_input():
+    label = fluid.layers.data(name="click", shape=[1], dtype="int64")
+    return label
+```
+
+组合起来，正确的声明他们：
+```python
+self.sparse_inputs = sparse_inputs()
+self.dense_input = dense_input()
+self.label_input = label_input()
+
+self._data_var.append(self.dense_input)
+
+for input in self.sparse_inputs:
+    self._data_var.append(input)
+
+self._data_var.append(self.label_input)
+
+```
+
+
+### Criteo Reader写法
+
+```python
+# 引入PaddleRec的Reader基类
+from paddlerec.core.reader import Reader
+# 引入PaddleRec的读取yaml配置文件的方法
+from paddlerec.core.utils import envs
+
+# 定义TrainReader，需要继承 paddlerec.core.reader.Reader
+class TrainReader(Reader):
+
+    # 数据预处理逻辑，继承自基类
+    # 如果无需处理， 使用pass跳过该函数的执行
+    def init(self):
+        self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        self.cont_max_ = [20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
+        self.cont_diff_ = [20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
+        self.hash_dim_ = envs.get_global_env("hyper_parameters.sparse_feature_number", None, "train.model")
+        self.continuous_range_ = range(1, 14)
+        self.categorical_range_ = range(14, 40)
+
+    # 读取数据方法，继承自基类
+    # 实现可以迭代的reader函数，逐行处理数据
+    def generate_sample(self, line):
+        """
+        Read the data line by line and process it as a dictionary
+        """
+
+        def reader():
+            """
+            This function needs to be implemented by the user, based on data format
+            """
+            features = line.rstrip('\n').split('\t')
+
+            dense_feature = []
+            sparse_feature = []
+            for idx in self.continuous_range_:
+                if features[idx] == "":
+                    dense_feature.append(0.0)
+                else:
+                    dense_feature.append(
+                        (float(features[idx]) - self.cont_min_[idx - 1]) /
+                        self.cont_diff_[idx - 1])
+
+            for idx in self.categorical_range_:
+                sparse_feature.append(
+                    [hash(str(idx) + features[idx]) % self.hash_dim_])
+            label = [int(features[0])]
+            feature_name = ["D"]
+            for idx in self.categorical_range_:
+                feature_name.append("S" + str(idx - 13))
+            feature_name.append("label")
+            yield zip(feature_name, [dense_feature] + sparse_feature + [label])
+
+        return reader
+```
+
+
+### 调试Reader
+
+在Linux下运行时，默认启动`Dataset`模式，在Win/Mac下运行时，默认启动`Dataloader`模式。
+
+通过在`config.yaml`中添加或修改`reader_debug_mode=True`打开debug模式，只会结合组网运行reader的部分，读取10条样本，并print，方便您观察格式是否符合预期或隐藏bug。
+```yaml
+reader:
+    batch_size: 2
+    class: "{workspace}/../criteo_reader.py"
+    train_data_path: "{workspace}/data/train"
+    reader_debug_mode: True
+```
+
+修改后，使用paddlerec.run执行该修改后的yaml文件，可以观察输出。
+```bash
+python -m paddlerec.run -m ./models/rank/dnn/config.yaml -e single
+```
+
+### Dataset调试
+
+dataset输出的数据格式如下：
+` dense_input:size ; dense_input:value ; sparse_input:size ; sparse_input:value ; ... ; sparse_input:size ; sparse_input:value ; label:size ; label:value `
+
+基本规律是对于每个变量，会先输出其维度大小，再输出其具体值。
+
+直接debug `criteo_reader`理想的输出为(截取了一个片段)：
+```bash
+...
+13 0.0 0.00497512437811 0.05 0.08 0.207421875 0.028 0.35 0.08 0.082 0.0 0.4 0.0 0.08 1 737395 1 210498 1 903564 1 286224 1 286835 1 906818 1 90
+6116 1 67180 1 27346 1 51086 1 142177 1 95024 1 157883 1 873363 1 600281 1 812592 1 228085 1 35900 1 880474 1 984402 1 100885 1 26235 1 410878 1 798162 1 499868 1 306163 1 0
+...
+```
+可以看到首先输出的是13维的dense参数，随后是分立的sparse参数，最后一个是1维的label，数值为0，输出符合预期。
+
+>使用Dataset的一些注意事项
+> - Dataset的基本原理：将数据print到缓存，再由C++端的代码实现读取，因此，我们不能在dataset的读取代码中，加入与数据读取无关的print信息，会导致C++端拿到错误的数据信息。
+> - dataset目前只支持在`unbuntu`及`CentOS`等标准Linux环境下使用，在`Windows`及`Mac`下使用时，会产生预料之外的错误，请知悉。
+
+### DataLoader调试
+
+dataloader的输出格式为`list: [ list[var_1], list[var_2], ... , list[var_3]]`，每条样本的数据会被放在一个 **list[list]** 中，list[0]为第一个variable。
+
+直接debug `criteo_reader`理想的输出为(截取了一个片段)：
+```bash
+...
+[[0.0, 0.004975124378109453, 0.05, 0.08, 0.207421875, 0.028, 0.35, 0.08, 0.082, 0.0, 0.4, 0.0, 0.08], [560746], [902436], [262029], [182633], [368411], [735166], [321120], [39572], [185732], [140298], [926671], [81559], [461249], [728372], [915018], [907965], [818961], [850958], [311492], [980340], [254960], [175041], [524857], [764893], [526288], [220126], [0]]
+...
+```
+可以看到首先输出的是13维的dense参数的list，随后是分立的sparse参数，各自在一个list中，最后一个是1维的label的list，数值为0，输出符合预期。
--- a/doc/design.md
+++ b/doc/design.md
+# PaddleRec 设计
+
+
+## PaddleRec 整体设计概览
+PaddleRec将推荐模型的训练与预测流程，整体抽象为了五个大模块：
+
+* [Engine 流程执行引擎](#engine)
+* [Trainer 流程具体定义](#trainer)
+* [Model 模型组网定义](#model)
+* [Reader 数据读取定义](#reader)
+* [Metric 精度指标打印](#metric)
+
+层级结构，以及一键启动训练时的调用关系如下图所示：
+
+<p align="center">
+<img align="center" src="imgs/design.png">
+<p>
+
+core的文件结构如下，后续分别对各个模块进行介绍。
+```
+.core
+├── engine/            运行引擎实现
+├── metrics/           全局指标实现
+├── modules/           自定义op实现
+├── trainers/          运行流程实现
+├── utils/             辅助工具
+├── factory.py         运行流程的注册
+├── layer.py           自定义op基类定义
+├── metric.py          Metric基类定义
+├── model.py           Model基类定义
+├── reader.py          Reader基类定义
+└── trainer.py         Trainer基类定义
+```
+
+
+## Engine
+
+Engine是整体训练的执行引擎，与组网逻辑及数据无关，只与当前运行模式、运行环境及运行设备有关。
+
+运行模式具体是指：
+- 单机运行
+- 分布式运行
+- 本地模拟分布式
+
+运行环境是指：
+- Linux
+- Windows
+- Mac
+
+运行设备是指：
+- CPU
+- GPU
+- AI芯片
+
+在用户调用`python -m paddlerec.run`时，首先会根据`yaml`文件中的配置信息选择合适的执行引擎， 以下代码位于[run.py](../run.py)：
+```python
+engine_registry()
+which_engine = get_engine(args)
+engine = which_engine(args)
+engine.run()
+```
+
+我们以`single engine`为例，概览engine的行为：
+```python
+def single_engine(args):
+    trainer = get_trainer_prefix(args) + "SingleTrainer"
+    single_envs = {}
+    single_envs["train.trainer.trainer"] = trainer
+    single_envs["train.trainer.threads"] = "2"
+    single_envs["train.trainer.engine"] = "single"
+    single_envs["train.trainer.device"] = args.device
+    single_envs["train.trainer.platform"] = envs.get_platform()
+    print("use {} engine to run model: {}".format(trainer, args.model))
+
+    set_runtime_envs(single_envs, args.model)
+    trainer = TrainerFactory.create(args.model)
+    return trainer
+```
+single_engine被调用后，主要进行了以下两个工作：
+
+1. 根据`yaml`配置文件，设置了**当前进程的环境变量**，后续的所有流程都依赖于环境变量。
+2. 根据模型及环境，指定并初始化了运行流程所用的`Trainer`
+
+进一步细化第一步工作
+- 本地模拟分布式引擎会在单机环境变量的基础上，额外设置本地模拟分布式的环境变量，比如：为各个进程设置不同通信端口，分配ID。最后会启动多个`Trainer`完成本地模拟分布式的工作。
+- 分布式引擎会在单机环境变量的基础上，基于运行参数`-b --backend`所指定的脚本或配置文件，完成分布式任务的文件打包，上传，提交等操作。该脚本格式与分布式任务运行的集群有关，如MPI/K8S/PaddleCloud等，用户可以自定义分布式运行逻辑。
+
+Engine的自定义实现，可以参考[local_cluster.py](../core/engine/local_cluster.py)
+
+## Trainer
+
+`Trainer`是训练与预测流程的具体实现，会run模型中定义的各个流程，与model、reader、metric紧密相关。PaddleRec以有限状态机的逻辑定义了训练中的各个阶段，不同的Trainer子类会分别实现阶段中的特殊需求。有限状态机的流程在`def processor_register()`中注册。
+
+我们以SingleTrainer为例，概览Trainer行为：
+
+```python 
+class SingleTrainer(TranspileTrainer):
+    def processor_register(self):
+        self.regist_context_processor('uninit', self.instance)
+        self.regist_context_processor('init_pass', self.init)
+        self.regist_context_processor('startup_pass', self.startup)
+        if envs.get_platform() == "LINUX" and envs.get_global_env("dataset_class", None, "train.reader") != "DataLoader":
+            self.regist_context_processor('train_pass', self.dataset_train)
+        else:
+            self.regist_context_processor('train_pass', self.dataloader_train)
+
+        self.regist_context_processor('infer_pass', self.infer)
+        self.regist_context_processor('terminal_pass', self.terminal)
+```
+
+SingleTrainer首先注册了完成任务所需的步骤，各步骤首先按照注册顺序加入`Trainer`基类中名为`status_processor`的字典，运行的先后顺序，可以在每个执行步骤中改变`context['status']`的值，指定下一步运行哪个步骤。
+
+SingleTrainer指定了以下6个步骤：
+1. uninit：默认排在首位，通过环境变量决定model的对象
+1. init_pass：调用model_的接口，生成模型的组网，初始化fetch及metric的变量
+2. startup_pass：初始化模型组网中的各个参数，run(fluid.default_startup_program)
+3. train_pass：会根据环境分别调用`dataset`与`dataloader`进行训练的流程。
+4. infer_pass：在训练结束后，会对训练保存的模型在测试集上验证效果
+5. terminal_pass：打印全局变量及预测结果等自定义的信息。
+
+Trainer的自定义实现，可以参照[single_trainer.py](../core/trainers/single_trainer.py)
+
+## Model
+
+Model定义了各个模型实现的范式，模型只要继承并实现基类中的函数，并给一些成员赋值，就可以保证模型被Trainer正确调用。
+
+我们首先看一下Model基类中的部分重要定义，对模型的实现流程有初步概念。
+
+```python
+class Model(object):
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, config):
+        self._cost = None
+        self._metrics = {}
+        self._data_var = []
+        self._infer_data_var = []
+        self._infer_results = {}
+        self._data_loader = None
+        self._infer_data_loader = None
+        self._fetch_interval = 20
+        self._namespace = "train.model"
+        self._platform = envs.get_platform()
+
+    def get_inputs(self):
+        return self._data_var
+
+    @abc.abstractmethod
+    def train_net(self):
+        pass
+
+    @abc.abstractmethod
+    def infer_net(self):
+        pass
+
+    def get_avg_cost(self):
+    return self._cost
+
+```
+
+每个模型都一定需要继承`def train_net`与`def infer_net`，并且给`self._data_var`与`self._cost`成员赋值，指定模型入口，实现组网的整体逻辑。若有更多或更复杂的需求，可以参照下面的接口，分别继承各个函数，并实现需要的功能：
+
+```python
+def get_infer_inputs(self):
+    return self._infer_data_var
+
+def get_infer_results(self):
+    return self._infer_results
+
+def get_metrics(self):
+    return self._metrics
+
+def get_fetch_period(self):
+    return self._fetch_interval
+```
+
+model的具体实现，可以参考dnn的示例[model.py](../../models/rank/dnn/../../../paddlerec/core/model.py)
+
+
+## Reader
+
+PaddleRec会根据运行环境，分别指定不同的数据IO方式。在Linux下，优先使用`Dataset`，Win及Mac优先使用`Dataloader`。
+
+
+Dataset的使用介绍可以参考[DatasetFactory](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/dataset_cn/DatasetFactory_cn.html)
+
+Dataloader的使用介绍可以参考[异步数据读取](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/data_preparing/use_py_reader.html)
+
+
+考虑到以上两种高效的数据IO方式仍然有很高的学习门槛，PaddleRec将两种数据读取方式进行了更高层次的封装，用户需要实现的仅是每行数据的处理逻辑，剩下的工作交给PaddleRec的Reader基类完成。
+
+首先浏览以下Reader基类的定义，有一个初步的印象：
+
+```python
+class Reader(dg.MultiSlotDataGenerator):
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, config):
+        dg.MultiSlotDataGenerator.__init__(self)
+
+        if os.path.isfile(config):
+            with open(config, 'r') as rb:
+                _config = yaml.load(rb.read(), Loader=yaml.FullLoader)
+        else:
+            raise ValueError("reader config only support yaml")
+
+        envs.set_global_envs(_config)
+        envs.update_workspace()
+
+    @abc.abstractmethod
+    def init(self):
+        pass
+
+    @abc.abstractmethod
+    def generate_sample(self, line):
+        pass
+
+```
+
+用户需要关注并实现的是`def init(self)`与`def generate_sample(self,line)`函数，分别执行数据读取中预处理所需变量的初始化，以及每一行string的切分及处理逻辑。
+
+当用户定义好以上两个函数，完成自己的Reader后，PaddleRec分别使用
+- [dataset_instance.py](../core/utils/dataset_instance.py)
+- [dataloader_instance.py](../core/utils/dataloader_instance.py)
+
+完成reader的构建工作。
+
+Reader数据处理的逻辑，可以参考[criteo_reader.py](../../models/rank/../../paddlerec/models/rank/criteo_reader.py)
+
+
+
+## Metric
+
+训练必然伴随着训练指标的打印，当单机运行时，打印相关信息比较简单。但分布式训练时，单机指标与全局指标往往有很大diff，比如`auc`以及正逆序`pn`。PaddleRec面向大规模分布式训练，将指标打印的逻辑抽象出来单独实现，以解决分布式训练时全局指标打印的问题。
+
+Metric基类定义了基本的接口，如下：
+```python
+class Metric(object):
+    __metaclass__ = abc.ABCMeta
+
+    def __init__(self, config):
+        """ init """
+        pass
+
+    @abc.abstractmethod
+    def clear(self, scope, params):
+        """
+        clear current value
+        Args:
+            scope: value container
+            params: extend varilable for clear
+        """
+        pass
+
+    @abc.abstractmethod
+    def calculate(self, scope, params):
+        """
+        calculate result
+        Args:
+            scope: value container
+            params: extend varilable for clear
+        """
+        pass
+
+    @abc.abstractmethod
+    def get_result(self):
+        """
+        Return:
+            result(dict) : calculate result
+        """
+        pass
+
+    @abc.abstractmethod
+    def get_result_to_string(self):
+        """
+        Return:
+            result(string) : calculate result with string format, for output
+        """
+        pass
+```
+
+全局指标的计算及输出，需要分别继承并实现以上四个成员函数。具体实现的例子，可以参考[auc_metric.py](../core/metrics/auc_metrics.py)
--- a/doc/development.md
+++ b/doc/development.md
--- a/doc/distributed_train.md
+++ b/doc/distributed_train.md
+# PaddleRec 分布式训练
+
+## PaddleRec分布式运行
+> 占位
+### 本地模拟分布式
+> 占位
+
+### K8S集群运行分布式
+> 占位
--- a/doc/faq.md
+++ b/doc/faq.md
+# 常见问题FAQ
+> 占位
--- a/doc/imgs/cnn-ckim2014.png
+++ b/doc/imgs/cnn-ckim2014.png
--- a/doc/imgs/dcn.png
+++ b/doc/imgs/dcn.png
--- a/doc/imgs/deepfm.png
+++ b/doc/imgs/deepfm.png
--- a/doc/imgs/design.png
+++ b/doc/imgs/design.png
--- a/doc/imgs/din.png
+++ b/doc/imgs/din.png
--- a/doc/imgs/dssm.png
+++ b/doc/imgs/dssm.png
--- a/doc/imgs/esmm.png
+++ b/doc/imgs/esmm.png
--- a/doc/imgs/fleet-ps.png
+++ b/doc/imgs/fleet-ps.png
--- a/doc/imgs/gnn.png
+++ b/doc/imgs/gnn.png
--- a/doc/imgs/gru4rec.png
+++ b/doc/imgs/gru4rec.png
--- a/doc/imgs/listwise.png
+++ b/doc/imgs/listwise.png
--- a/doc/imgs/logo.png
+++ b/doc/imgs/logo.png
--- a/doc/imgs/mmoe.png
+++ b/doc/imgs/mmoe.png
--- a/doc/imgs/multiview-simnet.png
+++ b/doc/imgs/multiview-simnet.png
--- a/doc/imgs/ncf.png
+++ b/doc/imgs/ncf.png
--- a/doc/imgs/overview.png
+++ b/doc/imgs/overview.png
--- a/doc/imgs/ps-overview.png
+++ b/doc/imgs/ps-overview.png
--- a/doc/imgs/rec-overview.png
+++ b/doc/imgs/rec-overview.png
--- a/doc/imgs/share-bottom.png
+++ b/doc/imgs/share-bottom.png
--- a/doc/imgs/ssr.png
+++ b/doc/imgs/ssr.png
--- a/doc/imgs/structure.png
+++ b/doc/imgs/structure.png
--- a/doc/imgs/tagspace.png
+++ b/doc/imgs/tagspace.png
--- a/doc/imgs/wide&deep.png
+++ b/doc/imgs/wide&deep.png
--- a/doc/imgs/word2vec.png
+++ b/doc/imgs/word2vec.png
--- a/doc/imgs/xdeepfm.png
+++ b/doc/imgs/xdeepfm.png
--- a/doc/imgs/youtube_dnn.png
+++ b/doc/imgs/youtube_dnn.png
--- a/doc/local_train.md
+++ b/doc/local_train.md
--- a/doc/model_list.md
+++ b/doc/model_list.md
--- a/doc/optimization_model.md
+++ b/doc/optimization_model.md
--- a/doc/predict.md
+++ b/doc/predict.md
--- a/doc/ps_background.md
+++ b/doc/ps_background.md
--- a/doc/rec_background.md
+++ b/doc/rec_background.md
--- a/doc/yaml.md
+++ b/doc/yaml.md
--- a/fleet_rec/check.py
+++ b/fleet_rec/check.py
--- a/fleet_rec/core/__init__.py
+++ b/fleet_rec/core/__init__.py
--- a/fleet_rec/core/engine/__init__.py
+++ b/fleet_rec/core/engine/__init__.py
--- a/fleet_rec/core/engine/engine.py
+++ b/fleet_rec/core/engine/engine.py
--- a/fleet_rec/core/model.py
+++ b/fleet_rec/core/model.py
--- a/fleet_rec/core/modules/__init__.py
+++ b/fleet_rec/core/modules/__init__.py
--- a/fleet_rec/core/modules/coding/__init__.py
+++ b/fleet_rec/core/modules/coding/__init__.py
--- a/fleet_rec/core/modules/coding/layers.py
+++ b/fleet_rec/core/modules/coding/layers.py
--- a/fleet_rec/core/modules/modul/__init__.py
+++ b/fleet_rec/core/modules/modul/__init__.py
--- a/fleet_rec/core/trainers/__init__.py
+++ b/fleet_rec/core/trainers/__init__.py
--- a/fleet_rec/core/trainers/ctr_modul_trainer.py
+++ b/fleet_rec/core/trainers/ctr_modul_trainer.py
--- a/fleet_rec/core/trainers/single_trainer.py
+++ b/fleet_rec/core/trainers/single_trainer.py
--- a/fleet_rec/tests/__init__.py
+++ b/fleet_rec/tests/__init__.py
--- a/models/contentunderstanding/__init__.py
+++ b/models/contentunderstanding/__init__.py
--- a/models/contentunderstanding/classification/__init__.py
+++ b/models/contentunderstanding/classification/__init__.py
--- a/models/rank/tagspace/config.yaml
+++ b/models/rank/tagspace/config.yaml
--- a/models/rank/text_classification/train_data/part-0
+++ b/models/rank/text_classification/train_data/part-0
--- a/models/contentunderstanding/classification/model.py
+++ b/models/contentunderstanding/classification/model.py
--- a/models/contentunderstanding/classification/reader.py
+++ b/models/contentunderstanding/classification/reader.py
--- a/models/contentunderstanding/readme.md
+++ b/models/contentunderstanding/readme.md
--- a/models/contentunderstanding/tagspace/__init__.py
+++ b/models/contentunderstanding/tagspace/__init__.py
--- a/models/rank/text_classification/config.yaml
+++ b/models/rank/text_classification/config.yaml
--- a/models/rank/tagspace/test_data/small_test.csv
+++ b/models/rank/tagspace/test_data/small_test.csv
--- a/models/rank/tagspace/train_data/small_train.csv
+++ b/models/rank/tagspace/train_data/small_train.csv
--- a/models/rank/tagspace/model.py
+++ b/models/rank/tagspace/model.py
--- a/models/rank/tagspace/reader.py
+++ b/models/rank/tagspace/reader.py
--- a/models/match/__init__.py
+++ b/models/match/__init__.py
--- a/models/match/dssm/config.yaml
+++ b/models/match/dssm/config.yaml
--- a/models/match/dssm/model.py
+++ b/models/match/dssm/model.py
--- a/models/match/dssm/synthetic_evaluate_reader.py
+++ b/models/match/dssm/synthetic_evaluate_reader.py
--- a/models/match/dssm/synthetic_reader.py
+++ b/models/match/dssm/synthetic_reader.py
--- a/models/match/multiview-simnet/__init__.py
+++ b/models/match/multiview-simnet/__init__.py
--- a/models/recall/multiview-simnet/config.yaml
+++ b/models/recall/multiview-simnet/config.yaml
--- a/models/recall/multiview-simnet/data/test/test.txt
+++ b/models/recall/multiview-simnet/data/test/test.txt
--- a/models/recall/multiview-simnet/data/train/train.txt
+++ b/models/recall/multiview-simnet/data/train/train.txt
--- a/models/match/multiview-simnet/data_process.sh
+++ b/models/match/multiview-simnet/data_process.sh
--- a/models/recall/multiview-simnet/evaluate_reader.py
+++ b/models/recall/multiview-simnet/evaluate_reader.py
--- a/models/recall/multiview-simnet/generate_synthetic_data.py
+++ b/models/recall/multiview-simnet/generate_synthetic_data.py
--- a/models/recall/multiview-simnet/model.py
+++ b/models/recall/multiview-simnet/model.py
--- a/models/recall/multiview-simnet/reader.py
+++ b/models/recall/multiview-simnet/reader.py
--- a/models/match/readme.md
+++ b/models/match/readme.md
--- a/models/multitask/__init__.py
+++ b/models/multitask/__init__.py
--- a/models/multitask/esmm/config.yaml
+++ b/models/multitask/esmm/config.yaml
--- a/models/multitask/esmm/data/train/small.csv
+++ b/models/multitask/esmm/data/train/small.csv
--- a/models/multitask/esmm/esmm_reader.py
+++ b/models/multitask/esmm/esmm_reader.py
--- a/models/multitask/esmm/model.py
+++ b/models/multitask/esmm/model.py
--- a/models/multitask/mmoe/census_reader.py
+++ b/models/multitask/mmoe/census_reader.py
--- a/models/multitask/mmoe/config.yaml
+++ b/models/multitask/mmoe/config.yaml
--- a/models/rank/wide_deep/create_data.sh
+++ b/models/rank/wide_deep/create_data.sh
--- a/models/multitask/share-bottom/data/train/train_data
+++ b/models/multitask/share-bottom/data/train/train_data
--- a/models/multitask/mmoe/model.py
+++ b/models/multitask/mmoe/model.py
--- a/models/multitask/readme.md
+++ b/models/multitask/readme.md
--- a/models/multitask/share-bottom/census_reader.py
+++ b/models/multitask/share-bottom/census_reader.py
--- a/models/multitask/share-bottom/config.yaml
+++ b/models/multitask/share-bottom/config.yaml
--- a/models/multitask/mmoe/data/train/train_data
+++ b/models/multitask/mmoe/data/train/train_data
--- a/models/multitask/share-bottom/model.py
+++ b/models/multitask/share-bottom/model.py
--- a/models/rank/__init__.py
+++ b/models/rank/__init__.py
--- a/models/rank/dcn/config.yaml
+++ b/models/rank/dcn/config.yaml
--- a/models/rank/dcn/data/download.py
+++ b/models/rank/dcn/data/download.py
--- a/models/rank/dcn/criteo_reader.py
+++ b/models/rank/dcn/criteo_reader.py
--- a/models/rank/dcn/data/preprocess.py
+++ b/models/rank/dcn/data/preprocess.py
--- a/models/rank/dcn/data/run.sh
+++ b/models/rank/dcn/data/run.sh
--- a/models/rank/dcn/data/sample_data/cat_feature_num.txt
+++ b/models/rank/dcn/data/sample_data/cat_feature_num.txt
--- a/models/rank/dcn/data/sample_data/infer/infer_sample_data
+++ b/models/rank/dcn/data/sample_data/infer/infer_sample_data
--- a/models/rank/dcn/data/sample_data/train/sample_train.txt
+++ b/models/rank/dcn/data/sample_data/train/sample_train.txt
--- a/models/rank/dcn/data/sample_data/vocab/C1.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C1.txt
--- a/models/rank/dcn/data/sample_data/vocab/C10.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C10.txt
--- a/models/rank/dcn/data/sample_data/vocab/C11.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C11.txt
--- a/models/rank/dcn/data/sample_data/vocab/C12.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C12.txt
--- a/models/rank/dcn/data/sample_data/vocab/C13.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C13.txt
--- a/models/rank/dcn/data/sample_data/vocab/C14.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C14.txt
--- a/models/rank/dcn/data/sample_data/vocab/C15.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C15.txt
--- a/models/rank/dcn/data/sample_data/vocab/C16.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C16.txt
--- a/models/rank/dcn/data/sample_data/vocab/C17.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C17.txt
--- a/models/rank/dcn/data/sample_data/vocab/C18.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C18.txt
--- a/models/rank/dcn/data/sample_data/vocab/C19.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C19.txt
--- a/models/rank/dcn/data/sample_data/vocab/C2.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C2.txt
--- a/models/rank/dcn/data/sample_data/vocab/C20.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C20.txt
--- a/models/rank/dcn/data/sample_data/vocab/C21.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C21.txt
--- a/models/rank/dcn/data/sample_data/vocab/C22.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C22.txt
--- a/models/rank/dcn/data/sample_data/vocab/C23.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C23.txt
--- a/models/rank/dcn/data/sample_data/vocab/C24.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C24.txt
--- a/models/rank/dcn/data/sample_data/vocab/C25.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C25.txt
--- a/models/rank/dcn/data/sample_data/vocab/C26.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C26.txt
--- a/models/rank/dcn/data/sample_data/vocab/C3.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C3.txt
--- a/models/rank/dcn/data/sample_data/vocab/C4.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C4.txt
--- a/models/rank/dcn/data/sample_data/vocab/C5.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C5.txt
--- a/models/rank/dcn/data/sample_data/vocab/C6.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C6.txt
--- a/models/rank/dcn/data/sample_data/vocab/C7.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C7.txt
--- a/models/rank/dcn/data/sample_data/vocab/C8.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C8.txt
--- a/models/rank/dcn/data/sample_data/vocab/C9.txt
+++ b/models/rank/dcn/data/sample_data/vocab/C9.txt
--- a/models/rank/dcn/model.py
+++ b/models/rank/dcn/model.py
--- a/models/rank/deepfm/config.yaml
+++ b/models/rank/deepfm/config.yaml
--- a/models/rank/deepfm/data/download_preprocess.py
+++ b/models/rank/deepfm/data/download_preprocess.py
--- a/models/rank/deepfm/criteo_reader.py
+++ b/models/rank/deepfm/criteo_reader.py
--- a/models/rank/deepfm/data/preprocess.py
+++ b/models/rank/deepfm/data/preprocess.py
--- a/models/rank/deepfm/data/run.sh
+++ b/models/rank/deepfm/data/run.sh
--- a/models/rank/deepfm/data/sample_data/feat_dict_10.pkl2
+++ b/models/rank/deepfm/data/sample_data/feat_dict_10.pkl2
--- a/models/rank/deepfm/data/sample_data/train/sample_train.txt
+++ b/models/rank/deepfm/data/sample_data/train/sample_train.txt
--- a/models/rank/deepfm/model.py
+++ b/models/rank/deepfm/model.py
--- a/models/rank/din/config.yaml
+++ b/models/rank/din/config.yaml
--- a/models/rank/din/data/build_dataset.py
+++ b/models/rank/din/data/build_dataset.py
--- a/models/rank/din/data/convert_pd.py
+++ b/models/rank/din/data/convert_pd.py
--- a/models/rank/din/data/remap_id.py
+++ b/models/rank/din/data/remap_id.py
--- a/models/rank/din/model.py
+++ b/models/rank/din/model.py
--- a/models/rank/din/reader.py
+++ b/models/rank/din/reader.py
--- a/models/rank/dnn/README.md
+++ b/models/rank/dnn/README.md
--- a/models/rank/dnn/config.yaml
+++ b/models/rank/dnn/config.yaml
--- a/models/rank/dnn/data/download.sh
+++ b/models/rank/dnn/data/download.sh
--- a/models/rank/criteo_reader.py
+++ b/models/rank/criteo_reader.py
--- a/models/rank/dnn/data/run.sh
+++ b/models/rank/dnn/data/run.sh
--- a/models/rank/dnn/data/sample_data/train/sample_train.txt
+++ b/models/rank/dnn/data/sample_data/train/sample_train.txt
--- a/models/rank/dnn/data/test/sample_test.txt
+++ b/models/rank/dnn/data/test/sample_test.txt
--- a/models/rank/dnn/data/train/sample_train.txt
+++ b/models/rank/dnn/data/train/sample_train.txt
--- a/models/rank/dnn/model.py
+++ b/models/rank/dnn/model.py
--- a/models/rank/readme.md
+++ b/models/rank/readme.md
--- a/models/rank/text_classification/model.py
+++ b/models/rank/text_classification/model.py
--- a/models/rank/text_classification/reader.py
+++ b/models/rank/text_classification/reader.py
--- a/models/rank/wide_deep/config.yaml
+++ b/models/rank/wide_deep/config.yaml
--- a/models/rank/wide_deep/data/create_data.sh
+++ b/models/rank/wide_deep/data/create_data.sh
--- a/models/rank/wide_deep/data/data_preparation.py
+++ b/models/rank/wide_deep/data/data_preparation.py
--- a/models/rank/wide_deep/data/get_slot_data.py
+++ b/models/rank/wide_deep/data/get_slot_data.py
--- a/models/rank/wide_deep/data/run.sh
+++ b/models/rank/wide_deep/data/run.sh
--- a/models/rank/wide_deep/data/sample_data/train/train_data.txt
+++ b/models/rank/wide_deep/data/sample_data/train/train_data.txt
--- a/models/rank/wide_deep/model.py
+++ b/models/rank/wide_deep/model.py
--- a/models/rank/xdeepfm/config.yaml
+++ b/models/rank/xdeepfm/config.yaml
--- a/models/rank/xdeepfm/data/download.py
+++ b/models/rank/xdeepfm/data/download.py
--- a/models/rank/xdeepfm/criteo_reader.py
+++ b/models/rank/xdeepfm/criteo_reader.py
--- a/models/rank/xdeepfm/data/run.sh
+++ b/models/rank/xdeepfm/data/run.sh
--- a/models/rank/xdeepfm/data/sample_data/train/sample_train.txt
+++ b/models/rank/xdeepfm/data/sample_data/train/sample_train.txt
--- a/models/rank/xdeepfm/model.py
+++ b/models/rank/xdeepfm/model.py
--- a/models/recall/__init__.py
+++ b/models/recall/__init__.py
--- a/models/recall/gnn/config.yaml
+++ b/models/recall/gnn/config.yaml
--- a/models/recall/gnn/data_process.sh
+++ b/models/recall/gnn/data_process.sh
--- a/models/recall/gnn/evaluate_reader.py
+++ b/models/recall/gnn/evaluate_reader.py
--- a/models/recall/gnn/model.py
+++ b/models/recall/gnn/model.py
--- a/models/recall/gnn/raw_data/convert_data.py
+++ b/models/recall/gnn/raw_data/convert_data.py
--- a/models/recall/gnn/raw_data/download.py
+++ b/models/recall/gnn/raw_data/download.py
--- a/models/recall/gnn/reader.py
+++ b/models/recall/gnn/reader.py
--- a/models/recall/gru4rec/config.yaml
+++ b/models/recall/gru4rec/config.yaml
--- a/models/recall/gru4rec/hdfs.log
+++ b/models/recall/gru4rec/hdfs.log
--- a/models/recall/gru4rec/model.py
+++ b/models/recall/gru4rec/model.py
--- a/models/recall/gru4rec/rsc15_reader.py
+++ b/models/recall/gru4rec/rsc15_reader.py
--- a/models/recall/multiview-simnet/data_process.sh
+++ b/models/recall/multiview-simnet/data_process.sh
--- a/models/recall/ncf/__init__.py
+++ b/models/recall/ncf/__init__.py
--- a/models/recall/ncf/config.yaml
+++ b/models/recall/ncf/config.yaml
--- a/models/recall/ncf/data/test/small_data.txt
+++ b/models/recall/ncf/data/test/small_data.txt
--- a/models/recall/ncf/data/train/small_data.txt
+++ b/models/recall/ncf/data/train/small_data.txt
--- a/models/recall/ncf/model.py
+++ b/models/recall/ncf/model.py
--- a/models/rank/wide_deep/reader.py
+++ b/models/rank/wide_deep/reader.py
--- a/fleet_rec/core/reader.py
+++ b/fleet_rec/core/reader.py
--- a/models/recall/readme.md
+++ b/models/recall/readme.md
--- a/models/recall/ssr/config.yaml
+++ b/models/recall/ssr/config.yaml
--- a/models/recall/ssr/model.py
+++ b/models/recall/ssr/model.py
--- a/models/recall/ssr/ssr_infer_reader.py
+++ b/models/recall/ssr/ssr_infer_reader.py
--- a/models/recall/ssr/ssr_reader.py
+++ b/models/recall/ssr/ssr_reader.py
--- a/models/recall/tdm/__init__.py
+++ b/models/recall/tdm/__init__.py
--- a/models/recall/word2vec/config.yaml
+++ b/models/recall/word2vec/config.yaml
--- a/models/recall/word2vec/model.py
+++ b/models/recall/word2vec/model.py
--- a/models/recall/word2vec/prepare_data.sh
+++ b/models/recall/word2vec/prepare_data.sh
--- a/models/recall/word2vec/preprocess.py
+++ b/models/recall/word2vec/preprocess.py
--- a/models/recall/word2vec/w2v_evaluate_reader.py
+++ b/models/recall/word2vec/w2v_evaluate_reader.py
--- a/models/recall/word2vec/w2v_reader.py
+++ b/models/recall/word2vec/w2v_reader.py
--- a/models/recall/youtube_dnn/__init__.py
+++ b/models/recall/youtube_dnn/__init__.py
--- a/fleet_rec/core/layer.py
+++ b/fleet_rec/core/layer.py
--- a/models/recall/youtube_dnn/data/test/small_data.txt
+++ b/models/recall/youtube_dnn/data/test/small_data.txt
--- a/models/recall/youtube_dnn/data/train/samll_data.txt
+++ b/models/recall/youtube_dnn/data/train/samll_data.txt
--- a/models/recall/youtube_dnn/model.py
+++ b/models/recall/youtube_dnn/model.py
--- a/models/recall/youtube_dnn/random_reader.py
+++ b/models/recall/youtube_dnn/random_reader.py
--- a/models/rerank/__init__.py
+++ b/models/rerank/__init__.py
--- a/models/rerank/listwise/__init__.py
+++ b/models/rerank/listwise/__init__.py
--- a/models/rerank/listwise/config.yaml
+++ b/models/rerank/listwise/config.yaml
--- a/models/rerank/listwise/data/test/small_data.txt
+++ b/models/rerank/listwise/data/test/small_data.txt
--- a/models/rerank/listwise/data/train/small_data.txt
+++ b/models/rerank/listwise/data/train/small_data.txt
--- a/models/rerank/listwise/model.py
+++ b/models/rerank/listwise/model.py
--- a/models/rerank/listwise/random_infer_reader.py
+++ b/models/rerank/listwise/random_infer_reader.py
--- a/models/rerank/listwise/random_reader.py
+++ b/models/rerank/listwise/random_reader.py
--- a/models/rerank/readme.md
+++ b/models/rerank/readme.md
--- a/models/treebased/README.md
+++ b/models/treebased/README.md
--- a/models/treebased/__init__.py
+++ b/models/treebased/__init__.py
--- a/models/treebased/tdm/README.md
+++ b/models/treebased/tdm/README.md
--- a/models/treebased/tdm/__init__.py
+++ b/models/treebased/tdm/__init__.py
--- a/models/recall/tdm/config.yaml
+++ b/models/recall/tdm/config.yaml
--- a/models/recall/tdm/data/test/demo_fake_test.txt
+++ b/models/recall/tdm/data/test/demo_fake_test.txt
--- a/models/recall/tdm/data/train/demo_fake_input.txt
+++ b/models/recall/tdm/data/train/demo_fake_input.txt
--- a/models/treebased/tdm/img/demo_network.png
+++ b/models/treebased/tdm/img/demo_network.png
--- a/models/treebased/tdm/img/demo_tree.png
+++ b/models/treebased/tdm/img/demo_tree.png
--- a/models/treebased/tdm/img/dnn-net.png
+++ b/models/treebased/tdm/img/dnn-net.png
--- a/models/treebased/tdm/img/input-net.png
+++ b/models/treebased/tdm/img/input-net.png
--- a/models/recall/tdm/model.py
+++ b/models/recall/tdm/model.py
--- a/models/recall/tdm/tdm_evaluate_reader.py
+++ b/models/recall/tdm/tdm_evaluate_reader.py
--- a/models/recall/tdm/tdm_reader.py
+++ b/models/recall/tdm/tdm_reader.py
--- a/models/recall/tdm/tree/layer_list.txt
+++ b/models/recall/tdm/tree/layer_list.txt
--- a/models/recall/tdm/tree/travel_list.npy
+++ b/models/recall/tdm/tree/travel_list.npy
--- a/models/recall/tdm/tree/tree_emb.npy
+++ b/models/recall/tdm/tree/tree_emb.npy
--- a/models/recall/tdm/tree/tree_info.npy
+++ b/models/recall/tdm/tree/tree_info.npy
--- a/readme.md
+++ b/readme.md
--- a/fleet_rec/run.py
+++ b/fleet_rec/run.py
--- a/setup.py
+++ b/setup.py
--- a/tests/__init__.py
+++ b/tests/__init__.py
--- a/tools/__init__.py
+++ b/tools/__init__.py
--- a/tools/build_script.sh
+++ b/tools/build_script.sh
--- a/tools/codestyle/copyright.hook
+++ b/tools/codestyle/copyright.hook
--- a/tools/codestyle/pylint_pre_commit.hook
+++ b/tools/codestyle/pylint_pre_commit.hook
--- a/tools/tools.py
+++ b/tools/tools.py