diff --git a/paddle/gserver/tests/CMakeLists.txt b/paddle/gserver/tests/CMakeLists.txt
index 45edef017e0c9599bf374fb1d997ea5a46e9d486..09e1b949c25f2a0d7909defae5f751541a1335ca 100644
--- a/paddle/gserver/tests/CMakeLists.txt
+++ b/paddle/gserver/tests/CMakeLists.txt
@@ -111,3 +111,12 @@ if(NOT ON_TRAVIS)
                   ${CMAKE_CURRENT_BINARY_DIR}/test_CompareSparse
     WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
 endif()
+
+################ test_CompareTwoNets ######################
+add_unittest_without_exec(test_CompareTwoNets
+    test_CompareTwoNets.cpp)
+add_test(NAME test_CompareTwoNets
+  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d
+        ${PADDLE_SOURCE_DIR}/python:${PADDLE_SOURCE_DIR}/paddle/gserver/tests
+        ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets
+    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
diff --git a/paddle/gserver/tests/sequence_recurrent.py b/paddle/gserver/tests/sequence_recurrent.py
new file mode 100644
index 0000000000000000000000000000000000000000..4895df186bfecc5cb5263676a9cd5bac5039d565
--- /dev/null
+++ b/paddle/gserver/tests/sequence_recurrent.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+######################## data source ################################
+dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
+dict_file = dict()
+for line_count, line in enumerate(open(dict_path, "r")):
+    dict_file[line.strip()] = line_count
+
+define_py_data_sources2(
+    train_list='gserver/tests/Sequence/train.list',
+    test_list=None,
+    module='sequenceGen',
+    obj='process',
+    args={"dict_file": dict_file})
+
+settings(batch_size=5)
+######################## network configure ################################
+dict_dim = len(open(dict_path, 'r').readlines())
+word_dim = 128
+hidden_dim = 128
+label_dim = 3
+
+# This config is designed to be equivalent with sequence_recurrent_group.py
+
+data = data_layer(name="word", size=dict_dim)
+
+emb = embedding_layer(
+    input=data, size=word_dim, param_attr=ParamAttr(name="emb"))
+
+recurrent = recurrent_layer(input=emb, bias_attr=False, act=SoftmaxActivation())
+
+recurrent_last = last_seq(input=recurrent)
+
+with mixed_layer(
+        size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
+    output += full_matrix_projection(input=recurrent_last)
+
+outputs(
+    classification_cost(
+        input=output, label=data_layer(
+            name="label", size=1)))
diff --git a/paddle/gserver/tests/sequence_recurrent_group.py b/paddle/gserver/tests/sequence_recurrent_group.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1d54542e3bc4e89f70d31d5e89c0f44953c9f90
--- /dev/null
+++ b/paddle/gserver/tests/sequence_recurrent_group.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.trainer_config_helpers import *
+
+######################## data source ################################
+dict_path = 'gserver/tests/Sequence/tour_dict_phrase.dict'
+dict_file = dict()
+for line_count, line in enumerate(open(dict_path, "r")):
+    dict_file[line.strip()] = line_count
+
+define_py_data_sources2(
+    train_list='gserver/tests/Sequence/train.list',
+    test_list=None,
+    module='sequenceGen',
+    obj='process',
+    args={"dict_file": dict_file})
+
+settings(batch_size=5)
+######################## network configure ################################
+dict_dim = len(open(dict_path, 'r').readlines())
+word_dim = 128
+hidden_dim = 128
+label_dim = 3
+
+# This config is designed to be equivalent with sequence_recurrent.py
+
+data = data_layer(name="word", size=dict_dim)
+
+emb = embedding_layer(
+    input=data, size=word_dim, param_attr=ParamAttr(name="emb"))
+
+
+def step(y):
+    mem = memory(name="rnn_state", size=hidden_dim)
+    with mixed_layer(
+            name="rnn_state",
+            size=hidden_dim,
+            bias_attr=False,
+            act=SoftmaxActivation()) as out:
+        out += identity_projection(input=y)
+        out += full_matrix_projection(
+            input=mem, param_attr=ParamAttr(name="___recurrent_layer_0__"))
+    return out
+
+
+recurrent = recurrent_group(name="rnn", step=step, input=emb)
+
+recurrent_last = last_seq(input=recurrent)
+
+with mixed_layer(
+        size=label_dim, act=SoftmaxActivation(), bias_attr=True) as output:
+    output += full_matrix_projection(input=recurrent_last)
+
+outputs(
+    classification_cost(
+        input=output, label=data_layer(
+            name="label", size=1)))
diff --git a/paddle/trainer/tests/test_CompareTwoNets.cpp b/paddle/gserver/tests/test_CompareTwoNets.cpp
similarity index 95%
rename from paddle/trainer/tests/test_CompareTwoNets.cpp
rename to paddle/gserver/tests/test_CompareTwoNets.cpp
index 94f65e545d116c802fb4877dc14f07aaaf83a4fb..801d9607565910b1f7f68a9c4532de5877e44f30 100644
--- a/paddle/trainer/tests/test_CompareTwoNets.cpp
+++ b/paddle/gserver/tests/test_CompareTwoNets.cpp
@@ -30,8 +30,6 @@ DECLARE_bool(use_gpu);
 DECLARE_string(config);
 DECLARE_string(nics);
 
-DEFINE_string(config_file_a, "", "config of one network to compare");
-DEFINE_string(config_file_b, "", "config of another network to compare");
 DEFINE_bool(need_high_accuracy,
             false,
             "whether need to run in double accuracy");
@@ -42,6 +40,10 @@ DEFINE_double(
 DECLARE_bool(thread_local_rand_use_global_seed);
 DECLARE_int32(seed);
 
+static const string& config_file_a = "gserver/tests/sequence_recurrent.py";
+static const string& config_file_b =
+    "gserver/tests/sequence_recurrent_group.py";
+
 struct ComData {
   vector<Argument> outArgs;
   vector<ParameterPtr> parameters;
@@ -66,6 +68,7 @@ void calcGradient(ComData& data, const string configFile) {
   DataBatch dataBatch;
   int32_t batchSize = trainer.getConfig().opt_config().batch_size();
 
+  trainer.getDataProvider()->reset();
   trainer.getDataProvider()->setSkipShuffle();
   trainer.getDataProvider()->getNextBatch(batchSize, &dataBatch);
 
@@ -167,11 +170,11 @@ void compareGradient(ComData& comDataA, ComData& comDataB) {
 
 TEST(Trainer, create) {
   ComData dataA;
-  calcGradient(dataA, FLAGS_config_file_a);
+  calcGradient(dataA, config_file_a);
   LOG(INFO) << "\n\nforwardBackward of Network A is finished\n\n";
 
   ComData dataB;
-  calcGradient(dataB, FLAGS_config_file_b);
+  calcGradient(dataB, config_file_b);
   LOG(INFO) << "\n\nforwardBackward of the Network B is finished\n\n";
 
   compareGradient(dataA, dataB);
diff --git a/paddle/trainer/tests/CMakeLists.txt b/paddle/trainer/tests/CMakeLists.txt
index 441df2b57b7729446aeb1fd9ccbd4dad5075f277..3168f3c0ff016bb515d8bb50370d91bf25cbc5da 100644
--- a/paddle/trainer/tests/CMakeLists.txt
+++ b/paddle/trainer/tests/CMakeLists.txt
@@ -28,14 +28,6 @@ if(WITH_PYTHON)
           ${PADDLE_SOURCE_DIR}/paddle/.set_port.sh -p port ${CMAKE_CURRENT_BINARY_DIR}/test_TrainerOnePass
       WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
 endif()
-################ test_CompareTwoNets ######################
-add_unittest_without_exec(test_CompareTwoNets
-    test_CompareTwoNets.cpp)
-add_test(NAME test_CompareTwoNets
-  COMMAND ${PADDLE_SOURCE_DIR}/paddle/.set_python_path.sh -d ${PADDLE_SOURCE_DIR}/python/
-        ${CMAKE_CURRENT_BINARY_DIR}/test_CompareTwoNets
-            --config_file_a=trainer/tests/sample_trainer_config_qb_rnn.conf --config_file_b=trainer/tests/sample_trainer_config_rnn.conf
-    WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}/paddle/)
 
 ############### test_CompareTwoOpts ###################
 add_unittest_without_exec(test_CompareTwoOpts
diff --git a/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf b/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf
deleted file mode 100644
index d19222360c2f424ddb306b155dfef07921098a6b..0000000000000000000000000000000000000000
--- a/paddle/trainer/tests/sample_trainer_config_qb_rnn.conf
+++ /dev/null
@@ -1,154 +0,0 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
-
-# Note: when making change to this file, please make sure
-# sample_trainer_config_rnn.conf is changed accordingly so that the uniitest
-# for comparing these two nets can pass (test_CompareTwoNets)
-
-default_initial_std(0.1)
-default_device(0)
-
-word_dim = 1451594
-l1 = 0
-l2 = 0
-
-model_type("nn")
-
-sparse_update = get_config_arg("sparse_update", bool, False)
-
-TrainData(ProtoData(        
-            type = "proto_sequence",
-            files = ('trainer/tests/train.list'), 
-            ))
-
-Settings(
-    algorithm='sgd',
-    batch_size=100,
-    learning_rate=0.0001,
-    learning_rate_decay_a=4e-08,
-    learning_rate_decay_b=0.0,
-    learning_rate_schedule='poly',
-)
-
-
-wordvec_dim = 128
-layer2_dim = 96
-layer3_dim = 96
-hidden_dim = 128
-
-slot_names = ["qb", "qw", "tb", "tw"]
-
-def ltr_network(network_name,
-                word_dim=word_dim,
-                wordvec_dim=wordvec_dim,
-                layer2_dim=layer2_dim,
-                layer3_dim=layer3_dim,
-                hidden_dim=hidden_dim,
-                slot_names=slot_names,
-                l1=l1,
-                l2=l2):
-
-    slotnum = len(slot_names)
-    for i in xrange(slotnum):
-        Inputs(slot_names[i] + network_name)
-    for i in xrange(slotnum):
-        Layer(
-            name = slot_names[i] + network_name,
-            type = "data",
-            size = word_dim,
-            device = -1,
-        )
-        Layer(
-            name = slot_names[i] + "_embedding_" + network_name,
-            type = "mixed",
-            size = wordvec_dim,
-            bias = False,
-            device = -1,
-            inputs = TableProjection(slot_names[i] + network_name,
-                                     parameter_name = "embedding.w0",
-                                     decay_rate_l1=l1,
-                                     sparse_remote_update = True,
-                                     sparse_update = sparse_update,
-                                     ),
-        )
-        Layer(
-            name = slot_names[i] + "_rnn1_" + network_name,
-            type = "recurrent",
-            active_type = "tanh",
-            bias = Bias(initial_std = 0,
-                        parameter_name = "rnn1.bias"),
-            inputs = Input(slot_names[i] + "_embedding_" + network_name,
-                           parameter_name = "rnn1.w0")
-        )
-        Layer(
-            name = slot_names[i] + "_rnnlast_" + network_name,
-            type = "seqlastins",
-            inputs = [
-                slot_names[i] + "_rnn1_" + network_name,
-            ],
-        )
-
-    Layer(
-        name = "layer2_" + network_name,
-        type = "fc",
-        active_type = "tanh",
-        size = layer2_dim,
-        bias = Bias(parameter_name = "layer2.bias"),
-        inputs = [Input(slot_name + "_rnnlast_" + network_name, 
-                        parameter_name = "_layer2_" + slot_name + ".w", 
-                        decay_rate = l2, 
-                        initial_smart = True) for slot_name in slot_names]
-    )
-    Layer(
-        name = "layer3_" + network_name,
-        type = "fc",
-        active_type = "tanh",
-        size = layer3_dim,
-        bias = Bias(parameter_name = "layer3.bias"),
-        inputs = [
-            Input("layer2_" + network_name, 
-                  parameter_name = "_layer3.w", 
-                  decay_rate = l2, 
-                  initial_smart = True),
-        ]
-    )
-    Layer(
-        name = "output_" + network_name,
-        type = "fc",
-        size = 1,
-        bias = False,
-        inputs = [
-                  Input("layer3_" + network_name,
-                       parameter_name = "_layerO.w"),
-                 ],
-        )
-
-
-ltr_network("left")
-ltr_network("right")
-Inputs("label")
-Layer(
-    name = "label",
-    type = "data",
-    size = 1,
-    )
-Outputs("cost", "qb_rnnlast_left")
-Layer(
-    name = "cost",
-    type = "rank-cost",
-    inputs = ["output_left", "output_right", "label"],
-    )
diff --git a/paddle/trainer/tests/sample_trainer_config_rnn.conf b/paddle/trainer/tests/sample_trainer_config_rnn.conf
deleted file mode 100644
index b720d4d5a6ca59e207832a8c5410c2cb6074c439..0000000000000000000000000000000000000000
--- a/paddle/trainer/tests/sample_trainer_config_rnn.conf
+++ /dev/null
@@ -1,180 +0,0 @@
-#edit-mode: -*- python -*-
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
-
-# Note: when making change to this file, please make sure
-# sample_trainer_config_qb_rnn.conf is changed accordingly so that the uniitest
-# for comparing these two nets can pass (test_CompareTwoNets)
-
-default_initial_std(0.1)
-default_device(0)
-
-word_dim = 1451594
-l1 = 0
-l2 = 0
-
-model_type("recurrent_nn")
-
-sparse_update = get_config_arg("sparse_update", bool, False)
-
-TrainData(ProtoData(
-            type = "proto_sequence",
-            files = ('trainer/tests/train.list'), 
-            ))
-
-Settings(
-    algorithm='sgd',
-    batch_size=100,
-    learning_rate=0.0001,
-    learning_rate_decay_a=4e-08,
-    learning_rate_decay_b=0.0,
-    learning_rate_schedule='poly',
-)
-
-
-wordvec_dim = 128
-layer2_dim = 96
-layer3_dim = 96
-hidden_dim = 128
-
-slot_names = ["qb", "qw", "tb", "tw"]
-
-def SimpleRecurrentLayer(name, 
-                         size, 
-                         active_type, 
-                         bias, 
-                         input_layer_name, 
-                         parameter_name,
-                         seq_reversed = False):
-    RecurrentLayerGroupBegin(name + "_layer_group", 
-                             in_links=[input_layer_name], 
-                             out_links=[name],
-                             seq_reversed=seq_reversed)
-    memory_name = Memory(name=name, size=size)
-    Layer(
-        name = name,
-        type = "mixed",
-        size = size,
-        active_type = active_type,
-        bias = bias,
-        inputs = [IdentityProjection(input_layer_name),
-                  FullMatrixProjection(memory_name,
-                                       parameter_name = parameter_name,
-                                       ),
-                  ]
-        )
-    RecurrentLayerGroupEnd(name + "_layer_group")
-
-
-def ltr_network(network_name,
-                word_dim=word_dim,
-                wordvec_dim=wordvec_dim,
-                layer2_dim=layer2_dim,
-                layer3_dim=layer3_dim,
-                hidden_dim=hidden_dim,
-                slot_names=slot_names,
-                l1=l1,
-                l2=l2):
-
-    slotnum = len(slot_names)
-    for i in xrange(slotnum):
-        Inputs(slot_names[i] + network_name)
-    for i in xrange(slotnum):
-        Layer(
-            name = slot_names[i] + network_name,
-            type = "data",
-            size = word_dim,
-            device = -1,
-        )
-        Layer(
-            name = slot_names[i] + "_embedding_" + network_name,
-            type = "mixed",
-            size = wordvec_dim,
-            bias = False,
-            device = -1,
-            inputs = TableProjection(slot_names[i] + network_name,
-                                     parameter_name = "embedding.w0",
-                                     decay_rate_l1=l1,
-                                     sparse_remote_update = True,
-                                     sparse_update = sparse_update,
-                                     ),
-        )
-        SimpleRecurrentLayer(
-            name = slot_names[i] + "_rnn1_" + network_name,
-            size = hidden_dim,
-            active_type = "tanh",
-            bias = Bias(initial_std = 0,
-                        parameter_name = "rnn1.bias"),
-            input_layer_name = slot_names[i] + "_embedding_" + network_name,
-            parameter_name = "rnn1.w0",
-            )
-        Layer(
-            name = slot_names[i] + "_rnnlast_" + network_name,
-            type = "seqlastins",
-            inputs = [
-                slot_names[i] + "_rnn1_" + network_name,
-            ],
-        )
-    Layer(
-        name = "layer2_" + network_name,
-        type = "fc",
-        active_type = "tanh",
-        size = layer2_dim,
-        bias = Bias(parameter_name = "layer2.bias"),
-        inputs = [Input(slot_name + "_rnnlast_" + network_name, 
-                        parameter_name = "_layer2_" + slot_name + ".w", 
-                        decay_rate = l2, 
-                        initial_smart = True) for slot_name in slot_names]
-    )
-    Layer(
-        name = "layer3_" + network_name,
-        type = "fc",
-        active_type = "tanh",
-        size = layer3_dim,
-        bias = Bias(parameter_name = "layer3.bias"),
-        inputs = [
-            Input("layer2_" + network_name, 
-                  parameter_name = "_layer3.w", 
-                  decay_rate = l2, 
-                  initial_smart = True),
-        ]
-    )
-    Layer(
-        name = "output_" + network_name,
-        type = "fc",
-        size = 1,
-        bias = False,
-        inputs = [
-                  Input("layer3_" + network_name,
-                       parameter_name = "_layerO.w"),
-                 ],
-        )
-
-
-ltr_network("left")
-ltr_network("right")
-Inputs("label")
-Layer(
-    name = "label",
-    type = "data",
-    size = 1,
-    )
-Outputs("cost", "qb_rnnlast_left")
-Layer(
-    name = "cost",
-    type = "rank-cost",
-    inputs = ["output_left", "output_right", "label"],
-    )