From d3688c2d9f77d9eab42ab0f0d3372208dc1d384d Mon Sep 17 00:00:00 2001
From: frankwhzhang <frankwhzhang@126.com>
Date: Tue, 27 Nov 2018 17:12:33 +0800
Subject: [PATCH] fix cluster_train.py v2

---
 fluid/PaddleRec/gru4rec/README.md             |  89 ++++--
 fluid/PaddleRec/gru4rec/__init__.py           |   0
 fluid/PaddleRec/gru4rec/cluster_train.py      | 142 +++++++++
 fluid/PaddleRec/gru4rec/cluster_train.sh      |  62 ++++
 fluid/PaddleRec/gru4rec/infer.py              |  56 ++--
 fluid/PaddleRec/gru4rec/net.py                |  50 ++++
 fluid/PaddleRec/gru4rec/small_test.txt        | 100 -------
 fluid/PaddleRec/gru4rec/small_train.txt       | 100 -------
 .../gru4rec/test_data/small_test.txt          | 100 +++++++
 fluid/PaddleRec/gru4rec/text2paddle.py        |  82 ++++++
 fluid/PaddleRec/gru4rec/train.py              | 277 +++++++-----------
 .../gru4rec/train_data/small_train.txt        | 100 +++++++
 fluid/PaddleRec/gru4rec/utils.py              | 108 +++----
 fluid/PaddleRec/gru4rec/vocab.txt             |   1 +
 14 files changed, 771 insertions(+), 496 deletions(-)
 delete mode 100644 fluid/PaddleRec/gru4rec/__init__.py
 create mode 100644 fluid/PaddleRec/gru4rec/cluster_train.py
 create mode 100644 fluid/PaddleRec/gru4rec/cluster_train.sh
 create mode 100644 fluid/PaddleRec/gru4rec/net.py
 delete mode 100644 fluid/PaddleRec/gru4rec/small_test.txt
 delete mode 100644 fluid/PaddleRec/gru4rec/small_train.txt
 create mode 100644 fluid/PaddleRec/gru4rec/test_data/small_test.txt
 create mode 100644 fluid/PaddleRec/gru4rec/text2paddle.py
 create mode 100644 fluid/PaddleRec/gru4rec/train_data/small_train.txt
 create mode 100644 fluid/PaddleRec/gru4rec/vocab.txt

diff --git a/fluid/PaddleRec/gru4rec/README.md b/fluid/PaddleRec/gru4rec/README.md
index 6b3c9c66..e88e6172 100644
--- a/fluid/PaddleRec/gru4rec/README.md
+++ b/fluid/PaddleRec/gru4rec/README.md
@@ -7,10 +7,15 @@
 ├── README.md            # 文档
 ├── train.py             # 训练脚本
 ├── infer.py             # 预测脚本
+├── net.py               # 网络结构
+├── text2paddle.py       # 文本数据转paddle数据
+├── cluster_train.py     # 多机训练
+├── cluster_train.sh     # 多机训练脚本
 ├── utils                # 通用函数
 ├── convert_format.py    # 转换数据格式
-├── small_train.txt      # 小样本训练集
-└── small_test.txt       # 小样本测试集
+├── vocab.txt            # 小样本字典
+├── train_data           # 小样本训练目录
+└── test_data            # 小样本测试目录
 
 ```
 
@@ -25,7 +30,9 @@ GRU4REC模型的介绍可以参阅论文[Session-based Recommendations with Recu
 
 session-based推荐应用场景非常广泛，比如用户的商品浏览、新闻点击、地点签到等序列数据。
 
+运行样例程序可跳过'RSC15 数据下载及预处理'部分
 ## RSC15 数据下载及预处理
+
 运行命令 下载RSC15官网数据集
 ```
 curl -Lo yoochoose-data.7z https://s3-eu-west-1.amazonaws.com/yc-rdata/yoochoose-data.7z
@@ -79,45 +86,49 @@ python convert_format.py
 214717867 214717867
 ```
 
+根据训练和测试文件生成字典和对应的paddle输入文件
+
+注意需要将训练文件放到一个目录下面，测试文件放到一个目录下面,同时支持多训练文件
+```
+python text2paddle.py raw_train_data/ raw_test_data/ train_data test_data vocab.txt
+```
+
+转化后生成的格式如下，可参考train_data/small_train.txt
+```
+197 196 198 236
+93 93 384 362 363 43
+336 364 407
+421 322
+314 388
+128 58
+138 138
+46 46 46
+34 34 57 57 57 342 228 321 346 357 59 376
+110 110
+```
+
 ## 训练
 '--use_cuda 1' 表示使用gpu, 缺省表示使用cpu '--parallel 1' 表示使用多卡，缺省表示使用单卡
 
+具体的参数配置可运行 
+```
+python train.py -h
+```
+
 GPU 环境
-运行命令 `CUDA_VISIBLE_DEVICES=0 python train.py train_file test_file --use_cuda 1` 开始训练模型。
+运行命令开始训练模型。
 ```
-CUDA_VISIBLE_DEVICES=0 python train.py small_train.txt small_test.txt --use_cuda 1
+CUDA_VISIBLE_DEVICES=0 python train.py --train_dir train_data/ --use_cuda 1 
 ```
 CPU 环境
-运行命令 `python train.py train_file test_file` 开始训练模型。
-```
-python train.py small_train.txt small_test.txt
+运行命令开始训练模型。
 ```
-
-当前支持的参数可参见[train.py](./train.py) `train_net` 函数
-```python
-    batch_size = 50                 # batch大小 推荐500（）
-    args = parse_args()  
-    vocab, train_reader, test_reader = utils.prepare_data(
-        train_file, test_file,batch_size=batch_size * get_cards(args),\
-        buffer_size=1000, word_freq_threshold=0)        # buffer_size 局部序列长度排序
-    train(
-        train_reader=train_reader,  
-        vocab=vocab,
-        network=network,
-        hid_size=100,               # embedding and hidden size
-        base_lr=0.01,               # base learning rate
-        batch_size=batch_size,
-        pass_num=10,                # the number of passed for training
-        use_cuda=use_cuda,          # whether to use GPU card
-        parallel=parallel,          # whether to be parallel
-        model_dir="model_recall20", # directory to save model
-        init_low_bound=-0.1,        # uniform parameter initialization lower bound
-        init_high_bound=0.1)        # uniform parameter initialization upper bound
+python train.py --train_dir train_data/
 ```
 
 ## 自定义网络结构
 
-可在[train.py](./train.py) `network` 函数中调整网络结构，当前的网络结构如下：
+可在[net.py](./net.py) `network` 函数中调整网络结构，当前的网络结构如下：
 ```python
 emb = fluid.layers.embedding(
     input=src,
@@ -206,9 +217,10 @@ model saved in model_recall20/epoch_1
 ```
 
 ## 预测
-运行命令 `CUDA_VISIBLE_DEVICES=0 python infer.py model_dir start_epoch last_epoch(inclusive) train_file test_file` 开始预测.其中，start_epoch指定开始预测的轮次，last_epoch指定结束的轮次，例如
-```python
-CUDA_VISIBLE_DEVICES=0 python infer.py model 1 10 small_train.txt small_test.txt
+运行命令 开始预测.
+
+```
+CUDA_VISIBLE_DEVICES=0 python infer.py --test_dir test_data/ --model_dir model_recall20/ --start_index 1 --last_index 10 --use_cuda 1
 ```
 
 ## 预测结果示例
@@ -224,3 +236,16 @@ model:model_r@20/epoch_8 recall@20:0.679 time_cost(s):12.37
 model:model_r@20/epoch_9 recall@20:0.680 time_cost(s):12.22
 model:model_r@20/epoch_10 recall@20:0.681 time_cost(s):12.2
 ```
+
+
+## 多机训练
+厂内用户可以参考[wiki](http://wiki.baidu.com/pages/viewpage.action?pageId=628300529)利用paddlecloud 配置多机环境
+
+可参考cluster_train.py 配置其他多机环境
+
+运行命令本地模拟多机场景
+```
+sh cluster_train.sh
+```
+
+注意本地模拟需要关闭代理
diff --git a/fluid/PaddleRec/gru4rec/__init__.py b/fluid/PaddleRec/gru4rec/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/fluid/PaddleRec/gru4rec/cluster_train.py b/fluid/PaddleRec/gru4rec/cluster_train.py
new file mode 100644
index 00000000..18b6e0df
--- /dev/null
+++ b/fluid/PaddleRec/gru4rec/cluster_train.py
@@ -0,0 +1,142 @@
+import os
+import sys
+import time
+import six
+import numpy as np
+import math
+import argparse
+import paddle.fluid as fluid
+import paddle
+import time
+import utils
+import net
+
+SEED = 102
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("gru4rec benchmark.")
+    parser.add_argument(
+        '--train_dir', type=str, default='train_data', help='train file address')
+    parser.add_argument(
+        '--vocab_path', type=str, default='vocab.txt', help='vocab file address')
+    parser.add_argument(
+        '--is_local', type=int, default=1, help='whether local')
+    parser.add_argument(
+        '--hid_size', type=int, default=100, help='hid size')
+    parser.add_argument(
+        '--model_dir', type=str, default='model_recall20', help='model dir')
+    parser.add_argument(
+        '--batch_size', type=int, default=5, help='num of batch size')
+    parser.add_argument(
+        '--pass_num', type=int, default=10, help='num of epoch')
+    parser.add_argument(
+        '--print_batch', type=int, default=10, help='num of print batch')
+    parser.add_argument(
+        '--use_cuda', type=int, default=0, help='whether use gpu')
+    parser.add_argument(
+        '--base_lr', type=float, default=0.01, help='learning rate')
+    parser.add_argument(
+        '--num_devices', type=int, default=1, help='Number of GPU devices')
+    parser.add_argument(
+        '--role', type=str, default='pserver', help='trainer or pserver')
+    parser.add_argument(
+        '--endpoints', type=str, default='127.0.0.1:6000', help='The pserver endpoints, like: 127.0.0.1:6000, 127.0.0.1:6001')
+    parser.add_argument(
+        '--current_endpoint', type=str, default='127.0.0.1:6000', help='The current_endpoint')
+    parser.add_argument(
+        '--trainer_id', type=int, default=0, help='trainer id ,only trainer_id=0 save model')
+    parser.add_argument(
+        '--trainers', type=int, default=1, help='The num of trianers, (default: 1)')
+    args = parser.parse_args()
+    return args
+
+def get_cards(args):
+    return args.num_devices
+
+def train():
+    """ do training """
+    args = parse_args()
+    hid_size = args.hid_size
+    train_dir = args.train_dir
+    vocab_path = args.vocab_path
+    use_cuda = True if args.use_cuda else False
+    print("use_cuda:", use_cuda)
+    batch_size = args.batch_size
+    vocab_size, train_reader = utils.prepare_data(
+        train_dir, vocab_path, batch_size=batch_size * get_cards(args),\
+        buffer_size=1000, word_freq_threshold=0, is_train=True)
+
+    # Train program
+    src_wordseq, dst_wordseq, avg_cost, acc = net.network(vocab_size=vocab_size, hid_size=hid_size)
+
+    # Optimization to minimize lost
+    sgd_optimizer = fluid.optimizer.SGD(learning_rate=args.base_lr)
+    sgd_optimizer.minimize(avg_cost)
+    
+    
+    def train_loop(main_program):
+        """ train network """
+        pass_num = args.pass_num
+        model_dir = args.model_dir
+        fetch_list = [avg_cost.name]
+
+        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        exe.run(fluid.default_startup_program())
+        total_time = 0.0
+        for pass_idx in six.moves.xrange(pass_num):
+            epoch_idx = pass_idx + 1
+            print("epoch_%d start" % epoch_idx)
+
+            t0 = time.time()
+            i = 0
+            newest_ppl = 0
+            for data in train_reader():
+                i += 1
+                lod_src_wordseq = utils.to_lodtensor([dat[0] for dat in data],
+                                                     place)
+                lod_dst_wordseq = utils.to_lodtensor([dat[1] for dat in data],
+                                                     place)
+                ret_avg_cost = exe.run(main_program,
+                                            feed={  "src_wordseq": lod_src_wordseq,
+                                                    "dst_wordseq": lod_dst_wordseq},
+                                            fetch_list=fetch_list)
+                avg_ppl = np.exp(ret_avg_cost[0])
+                newest_ppl = np.mean(avg_ppl)
+                if i % args.print_batch == 0:
+                    print("step:%d ppl:%.3f" % (i, newest_ppl))
+
+            t1 = time.time()
+            total_time += t1 - t0
+            print("epoch:%d num_steps:%d time_cost(s):%f" %
+                  (epoch_idx, i, total_time / epoch_idx))
+            save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
+            feed_var_names = ["src_wordseq", "dst_wordseq"]
+            fetch_vars = [avg_cost, acc]
+            if args.trainer_id == 0:
+                fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe)
+                print("model saved in %s" % save_dir)
+        print("finish training")
+
+    if args.is_local:
+        print("run local training")
+        train_loop(fluid.default_main_program())
+
+    else:
+        print("run distribute training")
+        t = fluid.DistributeTranspiler()
+        t.transpile(args.trainer_id, pservers=args.endpoints, trainers=args.trainers)
+        if args.role == "pserver":
+            print("run psever")
+            pserver_prog = t.get_pserver_program(args.current_endpoint)
+            pserver_startup = t.get_startup_program(args.current_endpoint,
+                                                    pserver_prog)
+            exe = fluid.Executor(fluid.CPUPlace())
+            exe.run(pserver_startup)
+            exe.run(pserver_prog)
+        elif args.role == "trainer":
+            print("run trainer")
+            train_loop(t.get_trainer_program())
+if __name__ == "__main__":
+    train()
diff --git a/fluid/PaddleRec/gru4rec/cluster_train.sh b/fluid/PaddleRec/gru4rec/cluster_train.sh
new file mode 100644
index 00000000..2711ffad
--- /dev/null
+++ b/fluid/PaddleRec/gru4rec/cluster_train.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+#export GLOG_v=30
+#export GLOG_logtostderr=1
+
+# start pserver0
+python cluster_train.py \
+    --train_dir train_data \
+    --model_dir cluster_model \
+    --vocab_path vocab.txt \
+    --batch_size 5 \
+    --is_local 0 \
+    --role pserver \
+    --endpoints 127.0.0.1:6000,127.0.0.1:6001 \
+    --current_endpoint 127.0.0.1:6000 \
+    --trainers 2 \
+    > pserver0.log 2>&1 &
+
+# start pserver1
+python cluster_train.py \
+    --train_dir train_data \
+    --model_dir cluster_model \
+    --vocab_path vocab.txt \
+    --batch_size 5 \
+    --is_local 0 \
+    --role pserver \
+    --endpoints 127.0.0.1:6000,127.0.0.1:6001 \
+    --current_endpoint 127.0.0.1:6001 \
+    --trainers 2 \
+    > pserver1.log 2>&1 &
+
+# start trainer0
+#CUDA_VISIBLE_DEVICES=1 python cluster_train.py \
+python cluster_train.py \
+    --train_dir train_data \
+    --model_dir cluster_model \
+    --vocab_path vocab.txt \
+    --batch_size 5 \
+    --print_batch 10 \
+    --use_cuda 0 \
+    --is_local 0 \
+    --role trainer \
+    --endpoints 127.0.0.1:6000,127.0.0.1:6001 \
+    --trainers 2 \
+    --trainer_id 0 \
+    > trainer0.log 2>&1 &
+
+# start trainer1
+#CUDA_VISIBLE_DEVICES=2 python cluster_train.py \
+python cluster_train.py \
+    --train_dir train_data \
+    --model_dir cluster_model \
+    --vocab_path vocab.txt \
+    --batch_size 5 \
+    --print_batch 10 \
+    --use_cuda 0 \
+    --is_local 0 \
+    --role trainer \
+    --endpoints 127.0.0.1:6000,127.0.0.1:6001 \
+    --trainers 2 \
+    --trainer_id 1 \
+    > trainer1.log 2>&1 &
diff --git a/fluid/PaddleRec/gru4rec/infer.py b/fluid/PaddleRec/gru4rec/infer.py
index ac1a63b4..ccb2ce09 100644
--- a/fluid/PaddleRec/gru4rec/infer.py
+++ b/fluid/PaddleRec/gru4rec/infer.py
@@ -1,3 +1,4 @@
+import argparse
 import sys
 import time
 import math
@@ -10,6 +11,22 @@ import paddle
 
 import utils
 
+def parse_args():
+    parser = argparse.ArgumentParser("gru4rec benchmark.")
+    parser.add_argument(
+        '--test_dir', type=str, default='test_data', help='test file address')
+    parser.add_argument(
+        '--start_index', type=int, default='1', help='start index')  
+    parser.add_argument(
+        '--last_index', type=int, default='10', help='end index')  
+    parser.add_argument(
+        '--model_dir', type=str, default='model_recall20', help='model dir')  
+    parser.add_argument(
+        '--use_cuda', type=int, default='1', help='whether use cuda')  
+    parser.add_argument(
+        '--batch_size', type=int, default='5', help='batch_size')  
+    args = parser.parse_args()
+    return args
 
 def infer(test_reader, use_cuda, model_path):
     """ inference function """
@@ -41,7 +58,7 @@ def infer(test_reader, use_cuda, model_path):
                     label_data, axis=0).astype("int64"))
             accum_num_sum += (data_length)
             accum_num_recall += (data_length * acc_)
-            if step_id % 100 == 0:
+            if step_id % 1 == 0:
                 print("step:%d  " % (step_id), accum_num_recall / accum_num_sum)
         t1 = time.time()
         print("model:%s recall@20:%.3f time_cost(s):%.2f" %
@@ -49,31 +66,18 @@ def infer(test_reader, use_cuda, model_path):
 
 
 if __name__ == "__main__":
-    if len(sys.argv) != 6:
-        print(
-            "Usage: %s model_dir start_epoch last_epoch(inclusive) train_file test_file"
-        )
-        exit(0)
-    train_file = ""
-    test_file = ""
-    model_dir = sys.argv[1]
-    try:
-        start_index = int(sys.argv[2])
-        last_index = int(sys.argv[3])
-        train_file = sys.argv[4]
-        test_file = sys.argv[5]
-    except:
-        iprint(
-            "Usage: %s model_dir start_ipoch last_epoch(inclusive) train_file test_file"
-        )
-        exit(-1)
-    vocab, train_reader, test_reader = utils.prepare_data(
-        train_file,
-        test_file,
-        batch_size=5,
-        buffer_size=1000,
-        word_freq_threshold=0)
+    args = parse_args()
+    start_index = args.start_index
+    last_index = args.last_index
+    test_dir = args.test_dir
+    model_dir = args.model_dir
+    batch_size = args.batch_size
+    use_cuda = True if args.use_cuda else False
+    print("start index: ", start_index, " last_index:" ,last_index)
+    vocab_size, test_reader = utils.prepare_data(
+        test_dir, "", batch_size=batch_size,
+        buffer_size=1000, word_freq_threshold=0, is_train=False)
 
     for epoch in xrange(start_index, last_index + 1):
         epoch_path = model_dir + "/epoch_" + str(epoch)
-        infer(test_reader=test_reader, use_cuda=True, model_path=epoch_path)
+        infer(test_reader=test_reader, use_cuda=use_cuda, model_path=epoch_path)
diff --git a/fluid/PaddleRec/gru4rec/net.py b/fluid/PaddleRec/gru4rec/net.py
new file mode 100644
index 00000000..fea2b3e9
--- /dev/null
+++ b/fluid/PaddleRec/gru4rec/net.py
@@ -0,0 +1,50 @@
+import paddle.fluid as fluid
+
+def network(vocab_size,
+        hid_size=100,
+        init_low_bound=-0.04,
+        init_high_bound=0.04):
+    """ network definition """
+    emb_lr_x = 10.0
+    gru_lr_x = 1.0
+    fc_lr_x = 1.0
+    # Input data
+    src_wordseq = fluid.layers.data(
+        name="src_wordseq", shape=[1], dtype="int64", lod_level=1)
+    dst_wordseq = fluid.layers.data(
+        name="dst_wordseq", shape=[1], dtype="int64", lod_level=1)
+
+    emb = fluid.layers.embedding(
+        input=src_wordseq,
+        size=[vocab_size, hid_size],
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Uniform(
+                low=init_low_bound, high=init_high_bound),
+            learning_rate=emb_lr_x),
+        is_sparse=True)
+    fc0 = fluid.layers.fc(input=emb,
+                          size=hid_size * 3,
+                          param_attr=fluid.ParamAttr(
+                              initializer=fluid.initializer.Uniform(
+                                  low=init_low_bound, high=init_high_bound),
+                              learning_rate=gru_lr_x))
+    gru_h0 = fluid.layers.dynamic_gru(
+        input=fc0,
+        size=hid_size,
+        param_attr=fluid.ParamAttr(
+            initializer=fluid.initializer.Uniform(
+                low=init_low_bound, high=init_high_bound),
+            learning_rate=gru_lr_x))
+
+    fc = fluid.layers.fc(input=gru_h0,
+                         size=vocab_size,
+                         act='softmax',
+                         param_attr=fluid.ParamAttr(
+                             initializer=fluid.initializer.Uniform(
+                                 low=init_low_bound, high=init_high_bound),
+                             learning_rate=fc_lr_x))
+
+    cost = fluid.layers.cross_entropy(input=fc, label=dst_wordseq)
+    acc = fluid.layers.accuracy(input=fc, label=dst_wordseq, k=20)
+    avg_cost = fluid.layers.mean(x=cost)
+    return src_wordseq, dst_wordseq, avg_cost, acc
diff --git a/fluid/PaddleRec/gru4rec/small_test.txt b/fluid/PaddleRec/gru4rec/small_test.txt
deleted file mode 100644
index 0cdaa7cc..00000000
--- a/fluid/PaddleRec/gru4rec/small_test.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-214586805 214509260 
-214857547 214857268 214857260 
-214859848 214857787 
-214687963 214531502 214687963 
-214696532 214859034 214858850 
-214857570 214857810 214857568 214857787 214857182 
-214857562 214857570 214857562 214857568 
-214859132 214545928 214859132 214551913 
-214858843 214859859 214858912 214858691 214859900 
-214561888 214561888 
-214688430 214688435 214688430 
-214536302 214531376 214531659 214531440 214531466 214513382 214550996 
-214854930 214854930 
-214858856 214690775 214859306 
-214859872 214858912 214858689 
-214859310 214859338 214859338 214859942 214859293 214859889 214859338 214859889 214859075 214859338 214859338 214859889 
-214574906 214574906 
-214859342 214859342 214858777 214851155 214851152 214572433 
-214537127 214857257 
-214857570 214857570 214857568 214857562 214857015 
-214854352 214854352 214854354 
-214738466 214855010 214857605 214856552 214574906 214857765 214849299 
-214858365 214859900 214859126 214858689 214859126 214859126 214857759 214858850 214859895 214859300 
-214857260 214561481 214848995 214849052 214865212 
-214857596 214819412 214819412 
-214849342 214849342 
-214859902 214854845 214854845 214854825 
-214859306 214859126 214859126 
-214644962 214644960 214644958 
-214696432 214696434 
-214708372 214508287 214684093 
-214857015 214857015 214858847 214690130 
-214858787 214859855 
-214858847 214696532 214859304 214854845 
-214586805 214586805 
-214857568 214857570 
-214696532 214858850 214859034 214569238 214568120 214854165 214684785 214854262 214567327 
-214602729 214857568 214857596 
-214859122 214858687 214859122 214859872 
-214555607 214836225 214836225 214836223 
-214849299 214829724 214855010 214829801 214574906 214586722 214684307 214857570 
-214859872 214695525 
-214845947 214586722 214829801 
-214829312 214546123 
-214849055 214849052 
-214509260 214587932 214596435 214644960 214696432 214696434 214545928 214857030 214636329 214832604 214574906 
-214586805 214586805 
-214587932 214587932 
-214857568 214857549 214854894 
-214836819 214836819 214595855 214595855 
-214858787 214858787 
-214854860 214857701 
-214848750 214643908 
-214858847 214859872 214859038 214859855 214690130 
-214847780 214696817 214717305 
-214509260 214509260 
-214853122 214853122 214853122 214853323 
-214858847 214858631 214858691 
-214859859 214819807 214853072 214853072 214819730 
-214820450 214705115 214586805 
-214858787 214859036 
-214829842 214864967 
-214846033 214850949 
-214587932 214586805 214509260 214696432 214855110 214545928 
-214858856 214859081 214859306 214858854 
-214690839 214690839 214711277 214839607 214582942 214582942 
-214857030 214832604 
-214857570 214855046 214859870 214577475 214858687 214656380 
-214854845 214854845 214854684 214859893 214854845 214854778 
-214850630 214848159 214848159 214848159 214848159 214848159 214848159 214848159 
-214856248 214856248 
-214858365 214858905 214858905 
-214712274 214855046 
-214845947 214845947 214831946 214717511 214846014 214854729 
-214561462 214561462 214561481 214561481 
-214836819 214853250 
-214858854 214859915 214859306 214854300 
-214857660 214857787 214539307 214855010 214855046 214849299 214856981 214849055 
-214855046 214854877 214568102 214539523 214579762 214539347 214641127 214600995 214833733 214600995 214684633 214645121 214658040 214712276 214857660 214687895 214854313 214857517 
-214845962 214853165 214846119 
-214854146 214859034 
-214819412 214819412 214819412 214819412 
-214849747 214578350 214561991 
-214854341 214854341 
-214644855 214644857 214531153 
-214644960 214862167 
-214640490 214600918 214600922 
-214854710 214857759 214859306 
-214858843 214859297 214858631 214859117 214858689 214858912 214859902 214690127 
-214586805 214586805 
-214859306 214859306 214859126 
-214859034 214696532 214858850 214859126 214859859 214859034 214859859 214858850 
-214857782 214849048 214857787 
-214854148 214857787 214854877 
-214858631 214858631 214690127 214859034 214858850 214859117 214858631 214859300 214858843 214859859 214859859 
-214646036 214646036 
-214858847 214858631 214690127 214859297 
-214861603 214700002 214700000 214835117 214700000 214857830 214700000 214712235 214700000 214700002 214510700 214835713 214712235 214853321 
-214854855 214854815 214854815 
-214857185 214854637 214829765 214848384 214829765 214856546 214848596 214835167 214563335 214553837 214536185 214855982 214845515 214550844 214712006 
diff --git a/fluid/PaddleRec/gru4rec/small_train.txt b/fluid/PaddleRec/gru4rec/small_train.txt
deleted file mode 100644
index a570fd29..00000000
--- a/fluid/PaddleRec/gru4rec/small_train.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-214536502 214536500 214536506 214577561 
-214662742 214662742 214825110 214757390 214757407 214551617 
-214716935 214774687 214832672 
-214836765 214706482 
-214701242 214826623 
-214826835 214826715 
-214838855 214838855 
-214576500 214576500 214576500 
-214821275 214821275 214821371 214821371 214821371 214717089 214563337 214706462 214717436 214743335 214826837 214819762 
-214717867 214717867 
-214836761 214684513 214836761 
-214577732 214587013 214577732 
-214826897 214820441 
-214684093 214684093 214684093 
-214561790 214561790 214611457 214611457 
-214577732 214577732 
-214838503 214838503 214838503 214838503 214838503 214548744 
-214718203 214718203 214718203 214718203 
-214837485 214837485 214837485 214837487 214837487 214821315 214586711 214821305 214821307 214844357 214821341 214821309 214551617 214551617 214612920 214837487 
-214613743 214613743 214539110 214539110 
-214827028 214827017 214537796 214840762 214707930 214707930 214585652 214536197 214536195 214646169 
-214579288 214714790 214676070 214601407 
-214532036 214700432 
-214836789 214836789 214710804 
-214537967 214537967 
-214718246 214826835 
-214835257 214835265 
-214834865 214571188 214571188 214571188 214820225 214820225 214820225 214820225 214820225 214820225 214820225 214820225 214706441 214706441 214706441 214706441 
-214652878 214716737 214652878 
-214684721 214680356 
-214551594 214586970 
-214826769 214537967 
-214819745 214819745 
-214691587 214587915 
-214821277 214821277 214821277 214821277 214821277 
-214716932 214716932 214716932 214716932 214716932 214716932 
-214712235 214581489 214602605 
-214820441 214826897 214826702 214684513 214838100 214544357 214551626 214691484 
-214545935 214819438 214839907 214835917 214836210 
-214698491 214523692 
-214695307 214695305 214538317 214677448 
-214819468 214716977 214716977 214716977 214716977 214716939 
-214544355 214601212 214601212 214601212 
-214716982 214716984 
-214844248 214844248 
-214515834 214515830 
-214717318 214717318 
-214832557 214559660 214559660 214819520 214586540 
-214587797 214835775 214844109 
-214714794 214601407 214826619 214746427 214821300 214717562 214826927 214748334 214826908 214800262 
-214709645 214709645 214709645 214709645 214709645 
-214532072 214532070 
-214827022 214840419 
-214716984 214832657 
-214662975 214537779 214840762 
-214821277 214821277 214821277 
-214748300 214748293 
-214826955 214826606 214687642 
-214832559 214832559 214832559 214821017 214821017 214572234 214826715 214826715 
-214509135 214536853 214509133 214509135 214509135 214509135 214717877 214826615 214716982 
-214819472 214687685 
-214821285 214821285 214826801 214826801 
-214826705 214826705 
-214668590 214826872 
-214652220 214840483 214840483 214717286 214558807 214821300 214826908 214826908 214826908 214554637 214819430 214819430 214826837 214826837 214820392 214820392 214586694 214819376 214553844 214601229 214555500 214695127 214819760 214717850 214718385 214743369 214743369 
-214648475 214648340 214648438 214648455 214712936 214712887 214696149 214717097 214534352 214534352 214717097 
-214560099 214560099 214560099 214832750 214560099 
-214685621 214684093 214546097 214685623 
-214819685 214839907 214839905 214811752 
-214717007 214717003 214716928 
-214820842 214819490 
-214555869 214537185 
-214840599 214835735 
-214838100 214706216 
-214829737 214821315 
-214748293 214748293 
-214712272 214820450 
-214821380 214821380 
-214826799 214827005 214718390 214718396 214826627 
-214841060 214841060 
-214687768 214706445 
-214811752 214811754 
-214594678 214594680 214594680 
-214821369 214821369 214697771 214697512 214697413 214697409 214652409 214537127 214537127 214820237 214820237 214709645 214699213 214820237 214820237 214820237 214709645 214537127 
-214554358 214716950 
-214821275 214829741 
-214829741 214820842 214821279 214703790 
-214716954 214838366 
-214821022 214820814 
-214684721 214821369 214826833 214819472 
-214821315 214821305 
-214826702 214821275 
-214717847 214819719 214748336 
-214536440 214536437 
-214512416 214512416 
-214839313 214839313 214839313 
-214826705 214826705 
-214510044 214510044 214510044 214582387 214537535 214584812 214537535 214584810 
-214600989 214704180 
-214705693 214696824 214705682 214696817 214705691 214705693 214711710 214705691 214705691 214687539 214705687 214744796 214681648 214717307 214577750 214650382 214744796 214696817 214705682 214711710 
diff --git a/fluid/PaddleRec/gru4rec/test_data/small_test.txt b/fluid/PaddleRec/gru4rec/test_data/small_test.txt
new file mode 100644
index 00000000..b4bf7189
--- /dev/null
+++ b/fluid/PaddleRec/gru4rec/test_data/small_test.txt
@@ -0,0 +1,100 @@
+0 16 
+475 473 155 
+491 21 
+96 185 96 
+29 14 13 
+5 481 11 21 470 
+70 5 70 11 
+167 42 167 217 
+72 15 73 161 172 
+82 82 
+97 297 97 
+193 182 186 183 184 177 214 
+152 152 
+163 298 7 
+39 73 71 
+490 23 23 496 488 74 23 74 486 23 23 74 
+17 17 
+170 170 483 444 443 234 
+25 472 
+5 5 11 70 69 
+149 149 455 
+356 68 477 468 17 479 66 
+159 172 6 71 6 6 158 13 494 169 
+155 44 438 144 500 
+156 9 9 
+146 146 
+173 10 10 461 
+7 6 6 
+269 48 268 
+50 100 
+323 174 18 
+69 69 22 98 
+38 171 
+22 29 489 10 
+0 0 
+11 5 
+29 13 14 232 231 451 289 452 229 
+260 11 156 
+166 160 166 39 
+223 134 134 420 
+66 401 68 132 17 84 287 5 
+39 304 
+65 84 132 
+400 211 
+145 144 
+16 28 254 48 50 100 42 154 262 133 17 
+0 0 
+28 28 
+11 476 464 
+61 61 86 86 
+38 38 
+463 478 
+437 265 
+22 39 485 171 98 
+434 51 344 
+16 16 
+67 67 67 448 
+22 12 161 
+15 377 147 147 374 
+119 317 0 
+38 484 
+403 499 
+432 442 
+28 0 16 50 465 42 
+163 487 7 162 
+99 99 325 423 83 83 
+154 133 
+5 37 492 235 160 279 
+10 10 457 493 10 460 
+441 4 4 4 4 4 4 4 
+153 153 
+159 164 164 
+328 37 
+65 65 404 347 431 459 
+80 80 44 44 
+61 446 
+162 495 7 453 
+157 21 204 68 37 66 469 145 
+37 151 230 206 240 205 264 87 409 87 288 270 280 329 157 296 454 474 
+430 445 433 
+449 14 
+9 9 9 9 
+440 238 226 
+148 148 
+266 267 181 
+48 498 
+263 255 256 
+458 158 7 
+72 168 12 165 71 73 173 49 
+0 0 
+7 7 6 
+14 29 13 6 15 14 15 13 
+480 439 21 
+450 21 151 
+12 12 49 14 13 165 12 169 72 15 15 
+91 91 
+22 12 49 168 
+497 101 30 411 30 482 30 53 30 101 176 415 53 447 
+462 150 150 
+471 456 131 435 131 467 436 412 227 218 190 466 429 213 326 
diff --git a/fluid/PaddleRec/gru4rec/text2paddle.py b/fluid/PaddleRec/gru4rec/text2paddle.py
new file mode 100644
index 00000000..563a8cad
--- /dev/null
+++ b/fluid/PaddleRec/gru4rec/text2paddle.py
@@ -0,0 +1,82 @@
+import sys
+import six
+import collections
+import os
+
+def word_count(input_file, word_freq=None):
+    """
+    compute word count from corpus
+    """
+    if word_freq is None:
+        word_freq = collections.defaultdict(int)
+
+    for l in input_file:
+        for w in l.strip().split():
+            word_freq[w] += 1
+
+    return word_freq
+
+
+def build_dict(min_word_freq=0, train_dir="", test_dir=""):
+    """
+    Build a word dictionary from the corpus,  Keys of the dictionary are words,
+    and values are zero-based IDs of these words.
+    """
+    word_freq = collections.defaultdict(int)
+    files = os.listdir(train_dir)
+    for fi in files:
+        with open(train_dir + '/' + fi, "r") as f:
+            word_freq = word_count(f, word_freq)
+    files = os.listdir(test_dir)
+    for fi in files:
+        with open(test_dir + '/' + fi, "r") as f:
+            word_freq = word_count(f, word_freq)
+
+    word_freq = [x for x in six.iteritems(word_freq) if x[1] > min_word_freq]
+    word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
+    words, _ = list(zip(*word_freq_sorted))
+    word_idx = dict(list(zip(words, six.moves.range(len(words)))))
+    return word_idx
+
+
+def write_paddle(word_idx, train_dir, test_dir, output_train_dir, output_test_dir):
+    files = os.listdir(train_dir)
+    if not os.path.exists(output_train_dir):
+        os.mkdir(output_train_dir)
+    for fi in files:
+        with open(train_dir + '/' + fi, "r") as f:
+            with open(output_train_dir + '/' + fi, "w") as wf:
+                for l in f:
+                    l = l.strip().split()
+                    l = [word_idx.get(w) for w in l]
+                    for w in l:
+                        wf.write(str(w) + " ")
+                    wf.write("\n")
+
+    files = os.listdir(test_dir)
+    if not os.path.exists(output_test_dir):
+        os.mkdir(output_test_dir)
+    for fi in files:
+        with open(test_dir + '/' + fi, "r") as f:
+            with open(output_test_dir + '/' + fi, "w") as wf:
+                for l in f:
+                    l = l.strip().split()
+                    l = [word_idx.get(w) for w in l]
+                    for w in l:
+                        wf.write(str(w) + " ")
+                    wf.write("\n")
+
+def text2paddle(train_dir, test_dir, output_train_dir, output_test_dir, output_vocab):
+    vocab = build_dict(0, train_dir, test_dir)
+    with open(output_vocab, "w") as wf:
+        wf.write(str(len(vocab)) + "\n")
+        #wf.write(str(vocab))
+    write_paddle(vocab, train_dir, test_dir, output_train_dir, output_test_dir)
+
+
+train_dir = sys.argv[1]
+test_dir = sys.argv[2]
+output_train_dir = sys.argv[3]
+output_test_dir = sys.argv[4]
+output_vocab = sys.argv[5]
+text2paddle(train_dir, test_dir, output_train_dir, output_test_dir, output_vocab)
diff --git a/fluid/PaddleRec/gru4rec/train.py b/fluid/PaddleRec/gru4rec/train.py
index 12bc1bc2..bf35c4e7 100644
--- a/fluid/PaddleRec/gru4rec/train.py
+++ b/fluid/PaddleRec/gru4rec/train.py
@@ -9,201 +9,142 @@ import paddle.fluid as fluid
 import paddle
 import time
 import utils
+import net
 
 SEED = 102
 
 
 def parse_args():
     parser = argparse.ArgumentParser("gru4rec benchmark.")
-    parser.add_argument('train_file')
-    parser.add_argument('test_file')
-    parser.add_argument('--use_cuda', help='whether use gpu')
-    parser.add_argument('--parallel', help='whether parallel')
     parser.add_argument(
-        '--enable_ce',
-        action='store_true',
-        help='If set, run \
-        the task with continuous evaluation logs.')
+        '--train_dir', type=str, default='train_data', help='train file address')
+    parser.add_argument(
+        '--vocab_path', type=str, default='vocab.txt', help='vocab file address')
+    parser.add_argument(
+        '--is_local', type=int, default=1, help='whether local')
+    parser.add_argument(
+        '--hid_size', type=int, default=100, help='hid size')
+    parser.add_argument(
+        '--model_dir', type=str, default='model_recall20', help='model dir')
+    parser.add_argument(
+        '--batch_size', type=int, default=5, help='num of batch size')
+    parser.add_argument(
+        '--print_batch', type=int, default=10, help='num of print batch')
+    parser.add_argument(
+        '--pass_num', type=int, default=10, help='num of epoch')
+    parser.add_argument(
+        '--use_cuda', type=int, default=0, help='whether use gpu')
+    parser.add_argument(
+        '--parallel', type=int, default=0, help='whether parallel')
+    parser.add_argument(
+        '--base_lr', type=float, default=0.01, help='learning rate')
     parser.add_argument(
         '--num_devices', type=int, default=1, help='Number of GPU devices')
     args = parser.parse_args()
     return args
 
+def get_cards(args):
+    return args.num_devices
 
-def network(src, dst, vocab_size, hid_size, init_low_bound, init_high_bound):
-    """ network definition """
-    emb_lr_x = 10.0
-    gru_lr_x = 1.0
-    fc_lr_x = 1.0
-    emb = fluid.layers.embedding(
-        input=src,
-        size=[vocab_size, hid_size],
-        param_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Uniform(
-                low=init_low_bound, high=init_high_bound),
-            learning_rate=emb_lr_x),
-        is_sparse=True)
-
-    fc0 = fluid.layers.fc(input=emb,
-                          size=hid_size * 3,
-                          param_attr=fluid.ParamAttr(
-                              initializer=fluid.initializer.Uniform(
-                                  low=init_low_bound, high=init_high_bound),
-                              learning_rate=gru_lr_x))
-    gru_h0 = fluid.layers.dynamic_gru(
-        input=fc0,
-        size=hid_size,
-        param_attr=fluid.ParamAttr(
-            initializer=fluid.initializer.Uniform(
-                low=init_low_bound, high=init_high_bound),
-            learning_rate=gru_lr_x))
-
-    fc = fluid.layers.fc(input=gru_h0,
-                         size=vocab_size,
-                         act='softmax',
-                         param_attr=fluid.ParamAttr(
-                             initializer=fluid.initializer.Uniform(
-                                 low=init_low_bound, high=init_high_bound),
-                             learning_rate=fc_lr_x))
-
-    cost = fluid.layers.cross_entropy(input=fc, label=dst)
-    acc = fluid.layers.accuracy(input=fc, label=dst, k=20)
-    return cost, acc
-
-
-def train(train_reader,
-          vocab,
-          network,
-          hid_size,
-          base_lr,
-          batch_size,
-          pass_num,
-          use_cuda,
-          parallel,
-          model_dir,
-          init_low_bound=-0.04,
-          init_high_bound=0.04):
-    """ train network """
-
+def train():
+    """ do training """
     args = parse_args()
-    if args.enable_ce:
-        # random seed must set before configuring the network.
-        fluid.default_startup_program().random_seed = SEED
-
-    vocab_size = len(vocab)
-
-    # Input data
-    src_wordseq = fluid.layers.data(
-        name="src_wordseq", shape=[1], dtype="int64", lod_level=1)
-    dst_wordseq = fluid.layers.data(
-        name="dst_wordseq", shape=[1], dtype="int64", lod_level=1)
+    hid_size = args.hid_size
+    train_dir = args.train_dir
+    vocab_path = args.vocab_path
+    use_cuda = True if args.use_cuda else False
+    parallel = True if args.parallel else False
+    print("use_cuda:", use_cuda, "parallel:", parallel)
+    batch_size = args.batch_size
+    vocab_size, train_reader = utils.prepare_data(
+        train_dir, vocab_path, batch_size=batch_size * get_cards(args),\
+        buffer_size=1000, word_freq_threshold=0, is_train=True)
 
     # Train program
-    avg_cost = None
-    cost, acc = network(src_wordseq, dst_wordseq, vocab_size, hid_size,
-                        init_low_bound, init_high_bound)
-    avg_cost = fluid.layers.mean(x=cost)
+    src_wordseq, dst_wordseq, avg_cost, acc = net.network(vocab_size=vocab_size, hid_size=hid_size)
 
     # Optimization to minimize lost
-    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=base_lr)
+    sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.base_lr)
     sgd_optimizer.minimize(avg_cost)
-
+    
     # Initialize executor
     place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    training_role = os.getenv("TRAINING_ROLE", "TRAINER")
+    if training_role == "PSERVER":
+        place = fluid.CPUPlace()
     exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
     if parallel:
         train_exe = fluid.ParallelExecutor(
             use_cuda=use_cuda, loss_name=avg_cost.name)
     else:
         train_exe = exe
-    total_time = 0.0
-    fetch_list = [avg_cost.name]
-    for pass_idx in six.moves.xrange(pass_num):
-        epoch_idx = pass_idx + 1
-        print("epoch_%d start" % epoch_idx)
-
-        t0 = time.time()
-        i = 0
-        newest_ppl = 0
-        for data in train_reader():
-            i += 1
-            lod_src_wordseq = utils.to_lodtensor([dat[0] for dat in data],
-                                                 place)
-            lod_dst_wordseq = utils.to_lodtensor([dat[1] for dat in data],
-                                                 place)
-            ret_avg_cost = train_exe.run(feed={
-                "src_wordseq": lod_src_wordseq,
-                "dst_wordseq": lod_dst_wordseq
-            },
-                                         fetch_list=fetch_list)
-            avg_ppl = np.exp(ret_avg_cost[0])
-            newest_ppl = np.mean(avg_ppl)
-            if i % 10 == 0:
-                print("step:%d ppl:%.3f" % (i, newest_ppl))
+    
+    def train_loop(main_program):
+        """ train network """
+        pass_num = args.pass_num
+        model_dir = args.model_dir
+        fetch_list = [avg_cost.name]
+
+        exe.run(fluid.default_startup_program())
+        total_time = 0.0
+        for pass_idx in six.moves.xrange(pass_num):
+            epoch_idx = pass_idx + 1
+            print("epoch_%d start" % epoch_idx)
+
+            t0 = time.time()
+            i = 0
+            newest_ppl = 0
+            for data in train_reader():
+                i += 1
+                lod_src_wordseq = utils.to_lodtensor([dat[0] for dat in data],
+                                                     place)
+                lod_dst_wordseq = utils.to_lodtensor([dat[1] for dat in data],
+                                                     place)
+                ret_avg_cost = train_exe.run(main_program,
+                                            feed={  "src_wordseq": lod_src_wordseq,
+                                                    "dst_wordseq": lod_dst_wordseq},
+                                            fetch_list=fetch_list)
+                avg_ppl = np.exp(ret_avg_cost[0])
+                newest_ppl = np.mean(avg_ppl)
+                if i % args.print_batch == 0:
+                    print("step:%d ppl:%.3f" % (i, newest_ppl))
+
+            t1 = time.time()
+            total_time += t1 - t0
+            print("epoch:%d num_steps:%d time_cost(s):%f" %
+                  (epoch_idx, i, total_time / epoch_idx))
+            save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
+            feed_var_names = ["src_wordseq", "dst_wordseq"]
+            fetch_vars = [avg_cost, acc]
+            fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe)
+            print("model saved in %s" % save_dir)
+        #exe.close()
+        print("finish training")
+
+    if args.is_local:
+        print("run local training")
+        train_loop(fluid.default_main_program())
 
-        t1 = time.time()
-        total_time += t1 - t0
-        print("epoch:%d num_steps:%d time_cost(s):%f" %
-              (epoch_idx, i, total_time / epoch_idx))
-
-        if pass_idx == pass_num - 1 and args.enable_ce:
-            #Note: The following logs are special for CE monitoring.
-            #Other situations do not need to care about these logs.
-            gpu_num = get_cards(args.enable_ce)
-            if gpu_num == 1:
-                print("kpis    rsc15_pass_duration    %s" %
-                      (total_time / epoch_idx))
-                print("kpis    rsc15_avg_ppl    %s" % newest_ppl)
-            else:
-                print("kpis    rsc15_pass_duration_card%s    %s" % \
-                      (gpu_num, total_time / epoch_idx))
-                print("kpis    rsc15_avg_ppl_card%s    %s" %
-                      (gpu_num, newest_ppl))
-        save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
-        feed_var_names = ["src_wordseq", "dst_wordseq"]
-        fetch_vars = [avg_cost, acc]
-        fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe)
-        print("model saved in %s" % save_dir)
-
-    print("finish training")
-
-
-def get_cards(args):
-    if args.enable_ce:
-        cards = os.environ.get('CUDA_VISIBLE_DEVICES')
-        num = len(cards.split(","))
-        return num
     else:
-        return args.num_devices
-
-
-def train_net():
-    """ do training """
-    args = parse_args()
-    train_file = args.train_file
-    test_file = args.test_file
-    use_cuda = True if args.use_cuda else False
-    parallel = True if args.parallel else False
-    print("use_cuda:", use_cuda, "parallel:", parallel)
-    batch_size = 50
-    vocab, train_reader, test_reader = utils.prepare_data(
-        train_file, test_file,batch_size=batch_size * get_cards(args),\
-        buffer_size=1000, word_freq_threshold=0)
-    train(
-        train_reader=train_reader,
-        vocab=vocab,
-        network=network,
-        hid_size=100,
-        base_lr=0.01,
-        batch_size=batch_size,
-        pass_num=10,
-        use_cuda=use_cuda,
-        parallel=parallel,
-        model_dir="model_recall20",
-        init_low_bound=-0.1,
-        init_high_bound=0.1)
-
-
+        print("run distribute training")
+        port = os.getenv("PADDLE_PORT", "6174")
+        pserver_ips = os.getenv("PADDLE_PSERVERS")  # ip,ip...
+        eplist = []
+        for ip in pserver_ips.split(","):
+            eplist.append(':'.join([ip, port]))
+        pserver_endpoints = ",".join(eplist)  # ip:port,ip:port...
+        trainers = int(os.getenv("PADDLE_TRAINERS_NUM", "0"))
+        current_endpoint = os.getenv("POD_IP") + ":" + port
+        trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
+        t = fluid.DistributeTranspiler()
+        t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
+        if training_role == "PSERVER":
+            pserver_prog = t.get_pserver_program(current_endpoint)
+            pserver_startup = t.get_startup_program(current_endpoint,
+                                                    pserver_prog)
+            exe.run(pserver_startup)
+            exe.run(pserver_prog)
+        elif training_role == "TRAINER":
+            train_loop(t.get_trainer_program())
 if __name__ == "__main__":
-    train_net()
+    train()
diff --git a/fluid/PaddleRec/gru4rec/train_data/small_train.txt b/fluid/PaddleRec/gru4rec/train_data/small_train.txt
new file mode 100644
index 00000000..6252a52c
--- /dev/null
+++ b/fluid/PaddleRec/gru4rec/train_data/small_train.txt
@@ -0,0 +1,100 @@
+197 196 198 236 
+93 93 384 362 363 43 
+336 364 407 
+421 322 
+314 388 
+128 58 
+138 138 
+46 46 46 
+34 34 57 57 57 342 228 321 346 357 59 376 
+110 110 
+135 94 135 
+27 250 27 
+129 118 
+18 18 18 
+81 81 89 89 
+27 27 
+20 20 20 20 20 212 
+33 33 33 33 
+62 62 62 63 63 55 248 124 381 428 383 382 43 43 261 63 
+90 90 78 78 
+399 397 202 141 104 104 245 192 191 271 
+239 332 283 88 
+187 313 
+136 136 324 
+41 41 
+352 128 
+413 414 
+410 45 45 45 1 1 1 1 1 1 1 1 31 31 31 31 
+92 334 92 
+95 285 
+215 249 
+390 41 
+116 116 
+300 252 
+2 2 2 2 2 
+8 8 8 8 8 8 
+53 241 259 
+118 129 126 94 137 208 216 299 
+209 368 139 418 419 
+311 180 
+303 302 203 284 
+369 32 32 32 32 337 
+207 47 47 47 
+106 107 
+143 143 
+179 178 
+109 109 
+405 79 79 371 246 
+251 417 427 
+333 88 387 358 123 348 394 360 36 365 
+3 3 3 3 3 
+189 188 
+398 425 
+107 406 
+281 201 141 
+2 2 2 
+359 54 
+395 385 293 
+60 60 60 121 121 233 58 58 
+24 199 175 24 24 24 351 386 106 
+115 294 
+122 122 127 127 
+35 35 
+282 393 
+277 140 140 343 225 123 36 36 36 221 114 114 59 59 117 117 247 367 219 258 222 301 375 350 353 111 111 
+275 272 273 274 331 330 305 108 76 76 108 
+26 26 26 408 26 
+290 18 210 291 
+372 139 424 113 
+341 340 335 
+120 370 
+224 200 
+426 416 
+137 319 
+402 55 
+54 54 
+327 119 
+125 125 
+391 396 354 355 389 
+142 142 
+295 320 
+113 366 
+253 85 85 
+56 56 310 309 308 307 278 25 25 19 19 3 312 19 19 19 3 25 
+220 338 
+34 130 
+130 120 380 315 
+339 422 
+379 378 
+95 56 392 115 
+55 124 
+126 34 
+349 373 361 
+195 194 
+75 75 
+64 64 64 
+35 35 
+40 40 40 242 77 244 77 243 
+257 316 
+103 306 102 51 52 103 105 52 52 292 318 112 286 345 237 276 112 51 102 105 
diff --git a/fluid/PaddleRec/gru4rec/utils.py b/fluid/PaddleRec/gru4rec/utils.py
index f6c28c9d..5dec9b75 100644
--- a/fluid/PaddleRec/gru4rec/utils.py
+++ b/fluid/PaddleRec/gru4rec/utils.py
@@ -5,7 +5,7 @@ import time
 import numpy as np
 import paddle.fluid as fluid
 import paddle
-
+import os
 
 def to_lodtensor(data, place):
     """ convert to LODtensor """
@@ -22,36 +22,36 @@ def to_lodtensor(data, place):
     res.set_lod([lod])
     return res
 
+def get_vocab_size(vocab_path):
+    with open(vocab_path, "r") as rf:
+        line = rf.readline()
+        return int(line.strip())
 
-def prepare_data(train_filename,
-                 test_filename,
+def prepare_data(file_dir,
+                 vocab_path,
                  batch_size,
                  buffer_size=1000,
                  word_freq_threshold=0,
-                 enable_ce=False):
+                 is_train=True):
     """ prepare the English Pann Treebank (PTB) data """
     print("start constuct word dict")
-    vocab = build_dict(word_freq_threshold, train_filename, test_filename)
-    print("construct word dict done\n")
-    if enable_ce:
-        train_reader = paddle.batch(
-            train(
-                train_filename, vocab, buffer_size, data_type=DataType.SEQ),
-            batch_size)
-    else:
-        train_reader = sort_batch(
+    if is_train:
+        vocab_size = get_vocab_size(vocab_path)
+        reader = sort_batch(
             paddle.reader.shuffle(
                 train(
-                    train_filename, vocab, buffer_size, data_type=DataType.SEQ),
+                    file_dir, buffer_size, data_type=DataType.SEQ),
                 buf_size=buffer_size),
             batch_size,
             batch_size * 20)
-    test_reader = sort_batch(
-        test(
-            test_filename, vocab, buffer_size, data_type=DataType.SEQ),
-        batch_size,
-        batch_size * 20)
-    return vocab, train_reader, test_reader
+    else:
+        reader = sort_batch(
+            test(
+                file_dir, buffer_size, data_type=DataType.SEQ),
+                batch_size,
+                batch_size * 20)
+        vocab_size = 0
+    return vocab_size, reader
 
 
 def sort_batch(reader, batch_size, sort_group_size, drop_last=False):
@@ -103,57 +103,25 @@ def sort_batch(reader, batch_size, sort_group_size, drop_last=False):
 class DataType(object):
     SEQ = 2
 
-
-def word_count(input_file, word_freq=None):
-    """
-    compute word count from corpus
-    """
-    if word_freq is None:
-        word_freq = collections.defaultdict(int)
-
-    for l in input_file:
-        for w in l.strip().split():
-            word_freq[w] += 1
-
-    return word_freq
-
-
-def build_dict(min_word_freq=50, train_filename="", test_filename=""):
-    """
-    Build a word dictionary from the corpus,  Keys of the dictionary are words,
-    and values are zero-based IDs of these words.
-    """
-    with open(train_filename) as trainf:
-        with open(test_filename) as testf:
-            word_freq = word_count(testf, word_count(trainf))
-
-    word_freq = [x for x in six.iteritems(word_freq) if x[1] > min_word_freq]
-    word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
-    words, _ = list(zip(*word_freq_sorted))
-    word_idx = dict(list(zip(words, six.moves.range(len(words)))))
-    return word_idx
-
-
-def reader_creator(filename, word_idx, n, data_type):
+def reader_creator(file_dir, n, data_type):
     def reader():
-        with open(filename) as f:
-            for l in f:
-                if DataType.SEQ == data_type:
-                    l = l.strip().split()
-                    l = [word_idx.get(w) for w in l]
-                    src_seq = l[:len(l) - 1]
-                    trg_seq = l[1:]
-                    if n > 0 and len(src_seq) > n: continue
-                    yield src_seq, trg_seq
-                else:
-                    assert False, 'error data type'
-
+        files = os.listdir(file_dir)
+        for fi in files:
+            with open(file_dir + '/' + fi, "r") as f:
+                for l in f:
+                    if DataType.SEQ == data_type:
+                        l = l.strip().split()
+                        l = [w for w in l]
+                        src_seq = l[:len(l) - 1]
+                        trg_seq = l[1:]
+                        if n > 0 and len(src_seq) > n: continue
+                        yield src_seq, trg_seq
+                    else:
+                        assert False, 'error data type'
     return reader
 
+def train(train_dir, n, data_type=DataType.SEQ):
+    return reader_creator(train_dir, n, data_type)
 
-def train(filename, word_idx, n, data_type=DataType.SEQ):
-    return reader_creator(filename, word_idx, n, data_type)
-
-
-def test(filename, word_idx, n, data_type=DataType.SEQ):
-    return reader_creator(filename, word_idx, n, data_type)
+def test(test_dir, n, data_type=DataType.SEQ):
+    return reader_creator(test_dir, n, data_type)
diff --git a/fluid/PaddleRec/gru4rec/vocab.txt b/fluid/PaddleRec/gru4rec/vocab.txt
new file mode 100644
index 00000000..c15fb720
--- /dev/null
+++ b/fluid/PaddleRec/gru4rec/vocab.txt
@@ -0,0 +1 @@
+501
-- 
GitLab